public PageData FetchPage(string url)
{
if (indexPrivatePages && authorizationCookies == null && authMode != AuthMode.Windows)
{
TryAuthenticate(); // Forms authentication.
}
var fullUrl = string.Concat(webServer.TrimEnd('/'), "/", url.TrimStart('/'));
var httpWebRequest = (HttpWebRequest)WebRequest.Create(fullUrl);
if (indexPrivatePages && authMode == AuthMode.Windows)
{
var userName = cmsConfiguration.Search.GetValue(LuceneSearchConstants.ConfigurationKeys.LuceneAuthorizationWindows_UserName);
var password = cmsConfiguration.Search.GetValue(LuceneSearchConstants.ConfigurationKeys.LuceneAuthorizationWindows_Password);
httpWebRequest.Credentials = new NetworkCredential(userName, password);
}
httpWebRequest.AllowAutoRedirect = true;
httpWebRequest.Timeout = (int)fetchTimeout.TotalMilliseconds;
httpWebRequest.CookieContainer = new CookieContainer();
if (authorizationCookies != null)
{
foreach (Cookie authCookie in authorizationCookies)
{
var cookie = new Cookie(authCookie.Name, authCookie.Value, authCookie.Path, authCookie.Domain);
httpWebRequest.CookieContainer.Add(cookie);
}
}
HttpWebResponse httpWebResponse = null;
var response = new PageData();
response.AbsoluteUri = httpWebRequest.RequestUri.AbsoluteUri;
response.AbsolutePath = httpWebRequest.RequestUri.AbsolutePath;
try
{
httpWebResponse = (HttpWebResponse)httpWebRequest.GetResponse();
response.StatusCode = httpWebResponse.StatusCode;
response.AbsolutePath = httpWebResponse.ResponseUri.AbsolutePath;
response.AbsoluteUri = httpWebResponse.ResponseUri.AbsoluteUri;
using (Stream responseStream = httpWebResponse.GetResponseStream())
{
if (responseStream != null)
{
using (var streamReader = new StreamReader(responseStream, Encoding.UTF8))
{
response.Content = new HtmlDocument();
response.Content.LoadHtml(streamReader.ReadToEnd());
}
}
}
}
catch (Exception ex)
{
Log.ErrorFormat("Lucene web crawler: Failed to fetch page by url {0}.", ex, url);
if (ex.GetType() == typeof(WebException))
{
var webException = (WebException)ex;
response.StatusCode = ((HttpWebResponse)webException.Response).StatusCode;
}
}
finally
{
if (httpWebResponse != null)
{
httpWebResponse.Close();
}
}
return response;
}