public string RemoveCats(ref string articleText, string articleTitle)
{
// don't pull category from redirects to a category e.g. page Hello is #REDIRECT[[Category:Hello]]
string rt = Tools.RedirectTarget(articleText);
if (rt.Length > 0 && WikiRegexes.Category.IsMatch(@"[[" + rt + @"]]"))
return "";
List<string> categoryList = new List<string>();
string originalArticleText = articleText;
string articleTextNoComments = Tools.ReplaceWithSpaces(articleText, WikiRegexes.Comments.Matches(articleText));
// don't operate on pages with (incorrectly) multiple defaultsorts
// ignore commented out DEFAULTSORT – https://en.wikipedia.org/wiki/Wikipedia_talk:AutoWikiBrowser/Bugs/Archive_12#Moving_DEFAULTSORT_in_HTML_comments
MatchCollection mc = WikiRegexes.Defaultsort.Matches(articleTextNoComments);
if (mc.Count > 1)
{
Tools.WriteDebug("RemoveCats", "Page " + articleTitle + " has multiple DEFAULTSORTs");
return "";
}
string defaultSort = "";
bool defaultSortRemoved = false;
// allow comments between categories, and keep them in the same place, only grab any comment after the last category if on same line
// whitespace: remove all whitespace after, but leave a blank newline before a heading (rare case where category not in last section)
// performance: apply regex on portion of article containing category links rather than whole text
Match cq = WikiRegexes.CategoryQuick.Match(articleTextNoComments);
if (cq.Success)
{
int cutoff = Math.Max(0, cq.Index - 500);
string cut = articleText.Substring(cutoff);
cut = WikiRegexes.RemoveCatsAllCats.Replace(cut, m => {
if (!CatsForDeletion.IsMatch(m.Value))
categoryList.Add(m.Value.Trim());
// if category not at start of line, leave newline, otherwise text on next line moved up
if (m.Index > 2 && !cut.Substring(m.Index - 2, 2).Trim().Equals(""))
return "\r\n";
return "";
});
// if category tidying has changed comments/nowikis return with no changes – we've pulled a cat from a comment
if (!Tools.UnformattedTextNotChanged(originalArticleText.Substring(cutoff), cut))
{
articleText = originalArticleText;
return "";
}
if (AddCatKey)
categoryList = CatKeyer(categoryList, articleTitle);
// now refresh defaultsort to pick up any comment on same line after it
if (mc.Count > 0)
mc = Regex.Matches(articleText, WikiRegexes.Defaultsort.ToString() + @"(?: *<!--[^<>]*-->)?");
// remove defaultsort now if we can, faster to remove from cut than whole articleText
if (mc.Count > 0 && cut.Contains(mc[0].Value))
{
cut = cut.Replace(mc[0].Value, "");
defaultSortRemoved = true;
}
articleText = articleText.Substring(0, cutoff) + cut;
if (CatCommentRegex.IsMatch(cut))
articleText = CatCommentRegex.Replace(articleText, m =>
{
categoryList.Insert(0, m.Value);
return "";
}, 1);
}
if (Variables.LangCode.Equals("sl") && LifeTime.IsMatch(articleText))
{
defaultSort = LifeTime.Match(articleText).Value;
}
else if (mc.Count > 0)
defaultSort = mc[0].Value;
if (!string.IsNullOrEmpty(defaultSort))
{
// if defaultsort wasn't in the cut area before the categories, remove now
if (!defaultSortRemoved)
articleText = articleText.Replace(defaultSort, "");
if (defaultSort.ToUpper().Contains("DEFAULTSORT"))
defaultSort = TalkPageFixes.FormatDefaultSort(defaultSort);
defaultSort += "\r\n";
}
// Extract any {{uncategorized}} template, but not uncat stub templates
// remove exact duplicates
string uncat = "";
if (TemplateExists(Parsers.GetAllTemplates(originalArticleText), WikiRegexes.Uncat) && WikiRegexes.Uncat.IsMatch(articleTextNoComments))
{
articleText = WikiRegexes.Uncat.Replace(articleText, uncatm =>
{
if (WikiRegexes.PossiblyCommentedStub.IsMatch(uncatm.Value))
return uncatm.Value;
// remove exact duplicates
if (!uncat.Contains(uncatm.Value))
uncat += uncatm.Value + "\r\n";
return "";
});
}
return uncat + defaultSort + ListToString(categoryList);
}