AmazonScrape.PageManager.ParseAndValidateProductHtml C# (CSharp) Method

ParseAndValidateProductHtml() private method

Parses and validates a single product's html, returning a Result containing error messages or the valid AmazonItem
private ParseAndValidateProductHtml ( string html ) : Result
html string Product html to parse
return Result
        private Result<AmazonItem> ParseAndValidateProductHtml(string html)
        {
            Result<AmazonItem> result = new Result<AmazonItem>();

            // Parse each item's html and exit early if validation fails on any item.
            string name = Parser.GetProductName(html);
            if (name == null || name.Length == 0)
            {
                // Do not report a "missing product name" status message here.
                // Sometimes Amazon injects blurbs or information
                // sections in lieu of results (book results, for example).
                // This should not trigger an error.
                return result;
            }

            if (!ItemValidator.ValidateItemName(_searchCriteria, name))
            {
                result.StatusMessage = name + " doesn't contain all search criteria.";
                return result;
            }

            // Scrape the review histogram to obtain the review distribution
            // and the review count (originally review count was being
            // obtained on the main page, but Amazon removes review
            // information from search results if they smell a bot).
            string reviewHistogramHtml = Parser.GetReviewHistogramHtml(html);
            if (reviewHistogramHtml == null || reviewHistogramHtml.Length == 0)
            {
                string msg = "Couldn't obtain review histogram data";
                result.ErrorMessage = msg;
            }

            ScoreDistribution scoreDistribution =
                Parser.GetScoreDistribution(reviewHistogramHtml);
            if (!ItemValidator.ValidateReviewDistribution(_searchCriteria, scoreDistribution))
            {
                result.StatusMessage = name + " doesn't fall within your review distribution.";
                return result;
            }

            int reviewCount = Parser.GetReviewCount(reviewHistogramHtml);
            if (!ItemValidator.ValidateReviewCount(_searchCriteria, reviewCount))
            {
                string message = name + " ";

                if (reviewCount == 0) { message += "doesn't have any reviews."; }
                else
                {
                    message += "only has " + reviewCount.ToString() + " reviews.";
                }
                result.StatusMessage = message;
                return result;
            }

            DoubleRange priceRange = Parser.GetPriceRange(html);
            if (!ItemValidator.ValidatePriceRange(_searchCriteria, priceRange))
            {
                result.StatusMessage = name + " doesn't fit in your price range.";
                return result;
            }

            // Grab the item's URL so the user can go directly to the product page
            Uri url = Parser.GetURL(html);

            // Note: Right now there's no UI capability of validating average rating
            double rating = Parser.GetRating(reviewHistogramHtml);

            // TODO: implement a "prime-only" checkbox in the UI
            bool primeEligibility;
            if (_searchCriteria.StrictPrimeEligibility)
            {
                primeEligibility = Parser.GetStrictPrimeEligibility(url);
            }
            else
            {
                primeEligibility = Parser.GetFuzzyPrimeEligibility(html);
            }

            // Leave the image load for last since it takes longer and if the
            // item doesn't pass validation we don't waste time downloading
            BitmapImage image = Parser.GetImageThumbnail(html);

            // We have everything we need, build the AmazonItem to be returned
            result.Value = new AmazonItem(name,
                reviewCount,
                priceRange,
                scoreDistribution,
                url,
                rating,
                primeEligibility,
                image);

            return result;
        }