AmazonScrape.Parser.GetPriceRange C# (CSharp) 메소드

GetPriceRange() 공개 정적인 메소드

Parses a DoubleRange object representing the "high" and "low" prices from the item's html.
public static GetPriceRange ( string itemHtml ) : DoubleRange
itemHtml string
리턴 DoubleRange
        public static DoubleRange GetPriceRange(string itemHtml)
        {
            // Dollarsign and Digits grouped by commas plus decimal
            // and change (change is required)
            string dollarCurrencyFormat = @"\$(\d{1,3}(,\d{3})*).(\d{2})";

            // Optional spaces and hyphen
            string spacesAndHyphen = @"\s+-\s+";

            // Grab the end of the preceeding tag, the dollar amount, and
            // optionally a hyphen and a high range amount before the
            // beginning bracket of the next tag
            string pricePattern = ">" + dollarCurrencyFormat + "(" + spacesAndHyphen + dollarCurrencyFormat + ")?" + "<";

            string match = GetSingleRegExMatch(itemHtml, pricePattern);

            // Need to remove the tag beginning and end:
            match = match.Trim(new char[] { '<', '>' });

            if (match.Length == 0)
            { return new DoubleRange(); }

            List<Double> prices = ParseDoubleValues(match, 2);
            DoubleRange priceRange = new DoubleRange();
            if (prices.Count > 0)
            {
                priceRange.Low = prices[0];
            }

            if (prices.Count > 1)
            {
                priceRange.High = prices[1];
            }

            if (!priceRange.HasHigh)
            {
                priceRange.High = priceRange.Low;
            }

            return priceRange;
        }

Usage Example

예제 #1
0
        /// <summary>
        /// Parses and validates a single product's html, returning a
        /// Result containing error messages or the valid AmazonItem
        /// </summary>
        /// <param name="html">Product html to parse</param>
        /// <returns>List of AmazonItem Results</returns>
        private Result <AmazonItem> ParseAndValidateProductHtml(string html)
        {
            Result <AmazonItem> result = new Result <AmazonItem>();

            // Parse each item's html and exit early if validation fails on any item.
            string name = Parser.GetProductName(html);

            if (name == null || name.Length == 0)
            {
                // Do not report a "missing product name" status message here.
                // Sometimes Amazon injects blurbs or information
                // sections in lieu of results (book results, for example).
                // This should not trigger an error.
                return(result);
            }

            if (!ItemValidator.ValidateItemName(_searchCriteria, name))
            {
                result.StatusMessage = name + " doesn't contain all search criteria.";
                return(result);
            }

            // Scrape the review histogram to obtain the review distribution
            // and the review count (originally review count was being
            // obtained on the main page, but Amazon removes review
            // information from search results if they smell a bot).
            string reviewHistogramHtml = Parser.GetReviewHistogramHtml(html);

            if (reviewHistogramHtml == null || reviewHistogramHtml.Length == 0)
            {
                string msg = "Couldn't obtain review histogram data";
                result.ErrorMessage = msg;
            }

            ScoreDistribution scoreDistribution =
                Parser.GetScoreDistribution(reviewHistogramHtml);

            if (!ItemValidator.ValidateReviewDistribution(_searchCriteria, scoreDistribution))
            {
                result.StatusMessage = name + " doesn't fall within your review distribution.";
                return(result);
            }

            int reviewCount = Parser.GetReviewCount(reviewHistogramHtml);

            if (!ItemValidator.ValidateReviewCount(_searchCriteria, reviewCount))
            {
                string message = name + " ";

                if (reviewCount == 0)
                {
                    message += "doesn't have any reviews.";
                }
                else
                {
                    message += "only has " + reviewCount.ToString() + " reviews.";
                }
                result.StatusMessage = message;
                return(result);
            }

            DoubleRange priceRange = Parser.GetPriceRange(html);

            if (!ItemValidator.ValidatePriceRange(_searchCriteria, priceRange))
            {
                result.StatusMessage = name + " doesn't fit in your price range.";
                return(result);
            }

            // Grab the item's URL so the user can go directly to the product page
            Uri url = Parser.GetURL(html);

            // Note: Right now there's no UI capability of validating average rating
            double rating = Parser.GetRating(reviewHistogramHtml);

            // TODO: implement a "prime-only" checkbox in the UI
            bool primeEligibility;

            if (_searchCriteria.StrictPrimeEligibility)
            {
                primeEligibility = Parser.GetStrictPrimeEligibility(url);
            }
            else
            {
                primeEligibility = Parser.GetFuzzyPrimeEligibility(html);
            }

            // Leave the image load for last since it takes longer and if the
            // item doesn't pass validation we don't waste time downloading
            BitmapImage image = Parser.GetImageThumbnail(html);

            // We have everything we need, build the AmazonItem to be returned
            result.Value = new AmazonItem(name,
                                          reviewCount,
                                          priceRange,
                                          scoreDistribution,
                                          url,
                                          rating,
                                          primeEligibility,
                                          image);

            return(result);
        }