AmazonScrape.Parser.GetScoreDistribution C# (CSharp) Метод

GetScoreDistribution() публичный статический Метод

Returns a product's review distribution (percentage of reviews in each category)
public static GetScoreDistribution ( string reviewHistogramHtml ) : ScoreDistribution
reviewHistogramHtml string Review histogram html
Результат ScoreDistribution
        public static ScoreDistribution GetScoreDistribution(string reviewHistogramHtml)
        {
            // Find each instance of review percentage. This regex includes more than we need, but we
            // wind up only grabbing the first five results, which are the ones we care about.
            string reviewDistributionPatterh = @"(?<=title="").*?(?=%)";

            List<string> matches = GetMultipleRegExMatches(reviewHistogramHtml,
                reviewDistributionPatterh);

            //MatchCollection reviewScoreMatches = Regex.Matches(reviewHistogramHtml,
            //    reviewDistributionPatterh, RegexOptions.Singleline);

            // If we can't find any more results, exit
            if (matches.Count == 0)
            { return null; }

            double[] reviews = new double[5];

            // Feed them into the array backwards so that
            // one star reviews are in the zero index
            for (int i = 0; i <5; i++)
            {
                // Reverse the order of the reviews so that index 0 is 1-star,
                // index 1 is 2-star, etc.
                try
                {
                    // The percentage is at the very end of each string
                    // Work backwards to build the value
                    var stack = new Stack<char>();

                    for (var strIndex = matches[i].Length - 1; strIndex >= 0; strIndex--)
                    {
                        if (!char.IsNumber(matches[i][strIndex]))
                        {
                            break;
                        }
                        stack.Push(matches[i][strIndex]);
                    }

                    matches[i] = new string(stack.ToArray());

                    reviews[4 - i] = Convert.ToDouble(matches[i]);
                }
                catch (InvalidCastException)
                {
                    string msg = "Unable to cast review score match {0}";
                    Debug.WriteLine(string.Format(msg,i));
                    reviews[i] = -1;
                }
            }

            return new ScoreDistribution(reviews);
        }

Usage Example

Пример #1
0
        /// <summary>
        /// Parses and validates a single product's html, returning a
        /// Result containing error messages or the valid AmazonItem
        /// </summary>
        /// <param name="html">Product html to parse</param>
        /// <returns>List of AmazonItem Results</returns>
        private Result <AmazonItem> ParseAndValidateProductHtml(string html)
        {
            Result <AmazonItem> result = new Result <AmazonItem>();

            // Parse each item's html and exit early if validation fails on any item.
            string name = Parser.GetProductName(html);

            if (name == null || name.Length == 0)
            {
                // Do not report a "missing product name" status message here.
                // Sometimes Amazon injects blurbs or information
                // sections in lieu of results (book results, for example).
                // This should not trigger an error.
                return(result);
            }

            if (!ItemValidator.ValidateItemName(_searchCriteria, name))
            {
                result.StatusMessage = name + " doesn't contain all search criteria.";
                return(result);
            }

            // Scrape the review histogram to obtain the review distribution
            // and the review count (originally review count was being
            // obtained on the main page, but Amazon removes review
            // information from search results if they smell a bot).
            string reviewHistogramHtml = Parser.GetReviewHistogramHtml(html);

            if (reviewHistogramHtml == null || reviewHistogramHtml.Length == 0)
            {
                string msg = "Couldn't obtain review histogram data";
                result.ErrorMessage = msg;
            }

            ScoreDistribution scoreDistribution =
                Parser.GetScoreDistribution(reviewHistogramHtml);

            if (!ItemValidator.ValidateReviewDistribution(_searchCriteria, scoreDistribution))
            {
                result.StatusMessage = name + " doesn't fall within your review distribution.";
                return(result);
            }

            int reviewCount = Parser.GetReviewCount(reviewHistogramHtml);

            if (!ItemValidator.ValidateReviewCount(_searchCriteria, reviewCount))
            {
                string message = name + " ";

                if (reviewCount == 0)
                {
                    message += "doesn't have any reviews.";
                }
                else
                {
                    message += "only has " + reviewCount.ToString() + " reviews.";
                }
                result.StatusMessage = message;
                return(result);
            }

            DoubleRange priceRange = Parser.GetPriceRange(html);

            if (!ItemValidator.ValidatePriceRange(_searchCriteria, priceRange))
            {
                result.StatusMessage = name + " doesn't fit in your price range.";
                return(result);
            }

            // Grab the item's URL so the user can go directly to the product page
            Uri url = Parser.GetURL(html);

            // Note: Right now there's no UI capability of validating average rating
            double rating = Parser.GetRating(reviewHistogramHtml);

            // TODO: implement a "prime-only" checkbox in the UI
            bool primeEligibility;

            if (_searchCriteria.StrictPrimeEligibility)
            {
                primeEligibility = Parser.GetStrictPrimeEligibility(url);
            }
            else
            {
                primeEligibility = Parser.GetFuzzyPrimeEligibility(html);
            }

            // Leave the image load for last since it takes longer and if the
            // item doesn't pass validation we don't waste time downloading
            BitmapImage image = Parser.GetImageThumbnail(html);

            // We have everything we need, build the AmazonItem to be returned
            result.Value = new AmazonItem(name,
                                          reviewCount,
                                          priceRange,
                                          scoreDistribution,
                                          url,
                                          rating,
                                          primeEligibility,
                                          image);

            return(result);
        }