AmazonScrape.Parser.GetPageResultCount C# (CSharp) Method

GetPageResultCount() public static method

Given the html of an Amazon search page result, returns the number of product results.
public static GetPageResultCount ( string pageHtml ) : int
pageHtml string html of entire search page
return int
        public static int GetPageResultCount(string pageHtml)
        {
            // Three possible formats for figuring out the
            // number of results on the page:
            // -------------------------------------------------
            // Case 1: "Showing X Results" (one page)
            // Case 2: "Showing X - Y of Z Results" ( >1 page)
            // Case 3: "Your search "<search term here>" did
            //          not match any products."

            // Grab the section after the resultCount id attribute
            // until the next id attribute
            string resultCountPattern = @"(?<=id=""resultCount"").*?(?= id=)";
            string match = GetSingleRegExMatch(pageHtml, resultCountPattern);

            int resultTotal = 0;

            if (match.Length == 0) return resultTotal;

            // Parse out the numeric values,
            // limiting to two maximum (as in Case 2 above)
            List<Double> resultRange = ParseDoubleValues(match, 2);

            switch (resultRange.Count)
            {
                case 1:
                    try
                    { resultTotal = Convert.ToInt32(resultRange[0]); }
                    catch { }
                    break;
                case 2:
                    try
                    {
                        // ParseDoubleValues thinks the hyphen in the results
                        // denotes a negative number.
                        // e.g. "17-32 of 65,130" will return 17, -32
                        // Get the absolute values before subtracting.
                        resultTotal = Convert.ToInt32(
                            Math.Abs(resultRange[1]) -
                            (Math.Abs(resultRange[0]) - 1));
                    }
                    catch { }
                    break;
            }

            // (Case 3 doesn't need to be handled, since resultTotal
            //  will fall through and correctly remain 0)

            return resultTotal;
        }

Usage Example

Beispiel #1
0
        /// <summary>
        /// Loads, chops up, parses and validates one page worth of results.
        /// </summary>
        /// <param name="sender"></param>
        /// <param name="e"></param>
        public void Work(object sender, DoWorkEventArgs e)
        {
            _status = Status.Working;
            if (Thread.CurrentThread.Name == null)
            {
                Thread.CurrentThread.Name = "Page " + _pageNumber.ToString() + " worker";
            }

            // Set the RunWorkEventArgs so we can check its status on completion
            e.Result = this;

            // Will hold the page's html broken up by each individual product
            _productHtmlSegments = new List <string>();

            // Gets the entire page's html
            string pageHtml = _pageLoadMethod(_pageNumber,
                                              _searchCriteria.SearchText);

            // Get the number of results on this page
            _pageResultCount = Parser.GetPageResultCount(pageHtml);

            // If there are no results, set the status accordingly and exit
            if (_pageResultCount == 0)
            {
                _status = Status.NoResults;
                return;
            }
            else // There are results
            {
                // Break apart the page html by product
                // so they can be parsed individually
                _productHtmlSegments = Parser.GetPageResultItemHtml(pageHtml,
                                                                    _pageResultCount);
            }

            List <Result <AmazonItem> > results = new List <Result <AmazonItem> >();

            // Parse and validate each result, adding to the result list
            foreach (string productHtml in _productHtmlSegments)
            {
                Result <AmazonItem> result =
                    ParseAndValidateProductHtml(productHtml);

                // Don't worry about reporting the progress percentage here.
                // The SearchManager will look at the total results returned
                // and compare with the results requested and report that
                // percentage to the UI (passing in a dummy zero here)
                ReportProgress(0, result);
            }

            // The RunWorkerComplete method fires when method completes
            // This is used as a signal to the SearchManager that we
            // are clear to spawn another thread if necessary.
            _status = Status.Finished;
        }