private static async Task<Tuple<string, string>> LoadOneImpl(ISimpleHttpService httpService, string url, IList<Object> target)
{
string domain = url;
if(Uri.IsWellFormedUriString(url, UriKind.Absolute))
domain = new Uri(url).Authority;
Messenger.Default.Send<LoadingMessage>(new LoadingMessage { Loading = true, Percentage = 0, Message = "loading from " + domain });
var page = await httpService.UnAuthedGet(url);
Messenger.Default.Send<LoadingMessage>(new LoadingMessage { Loading = true, Percentage = 50, Message = "processing page from " + domain });
string title;
var pageBlocks = ArticleExtractor.INSTANCE.GetTextAndImageBlocks(page, new Uri(url), out title);
foreach (var tpl in pageBlocks)
{
if (!string.IsNullOrEmpty(tpl.Item2))
{
target.Add(new ReadableArticleImage { Url = tpl.Item2 });
}
StringBuilder articleContentsBuilder = new StringBuilder();
foreach (var pp in tpl.Item1.Split(new char[] { '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries))
{
if (target.Count > 200)
break;
articleContentsBuilder.AppendLine(pp);
if (articleContentsBuilder.Length > 1000)
{
target.Add(new ReadableArticleParagraph { Text = articleContentsBuilder.ToString() });
articleContentsBuilder.Clear();
}
}
if (articleContentsBuilder.Length > 0)
{
target.Add(new ReadableArticleParagraph { Text = articleContentsBuilder.ToString() + "\n\n"});
}
}
var nextPageUrl = MultiPageUtils.FindNextPageLink(SgmlDomBuilder.GetBody(SgmlDomBuilder.BuildDocument(page)), url);
Messenger.Default.Send<LoadingMessage>(new LoadingMessage { Loading = false });
return Tuple.Create(nextPageUrl, title);
}