string FollowStandard(string url)
{
URL.WebPage webPage;
webPage.Domain = null;
webPage.Page = null;
try
{
webPage = URL.FetchURL(url);
}
catch (System.Net.WebException ex)
{
Logger.Write(ex.ToString() + "\r\nURL: " + url, Settings.Instance.ErrorFile);
// Nothing returned when attempting to fetch the url
//return new List<IRCResponse>() { new IRCResponse(ResponseType.Say, "Nothing found at " + match.Value, message.ReplyTo) };
}
catch (System.UriFormatException ex)
{
Logger.Write(ex.ToString() + "\r\nURL: " + url, Settings.Instance.ErrorFile);
// Invalid url detected, don't really care though.
return null;
}
if (webPage.Domain == null)
return null;
// Hunt for the title tags on the page, and grab the text between them.
string title;
Match match = Regex.Match(webPage.Page, @"<\s*title\s*>(.*?)</title\s*>", RegexOptions.Singleline | RegexOptions.IgnoreCase);
// Title tags found
if (match.Success)
{
// Trim excess whitespace
title = "Title: " + match.Groups[1].Value.Trim();
// Remove newlines
title = Regex.Replace(title, @"(\r|\n)", " ", RegexOptions.IgnoreCase);
// Reduce multiple spaces to a single space
title = Regex.Replace(title, @"(\s+)", " ", RegexOptions.IgnoreCase);
// *Hopefully* replace html character entities with their normal text counterparts
title = HttpUtility.HtmlDecode(title);
// Strip text-direction character entities
title = Regex.Replace(title, @"Ȃ[ac];", string.Empty, RegexOptions.IgnoreCase);
}
// No title tags found
else
{
title = "No title found";
}
return title + " (at " + webPage.Domain + ")";
}