public static void MakeLangSpecificRegexes()
{
NamespacesCaseInsensitive = new Dictionary<int,Regex>();
foreach (var p in Variables.NamespacesCaseInsensitive)
{
NamespacesCaseInsensitive.Add(p.Key, new Regex(p.Value));
}
string category = Variables.NamespacesCaseInsensitive[Namespace.Category],
image = Variables.NamespacesCaseInsensitive[Namespace.File],
template = Variables.NamespacesCaseInsensitive[Namespace.Template],
userns = Variables.NamespacesCaseInsensitive[Namespace.User],
usertalkns = Variables.NamespacesCaseInsensitive[Namespace.UserTalk];
TemplateStart = @"\{\{\s*(:?" + template + ")?";
TemplateNameRegex = Tools.TemplateNameRegex();
Category = new Regex(@"\[\[[\s_]*" + category +
@"[ _]*([^[\]|\r\n]*?)[ _]*(?:\|([^\|\]]*))?[ _]*\]\]");
// allow comments between categories, and keep them in the same place, only grab any comment after the last category if on same line
// whitespace: remove all whitespace after, but leave a blank newline before a heading (rare case where category not in last section)
RemoveCatsAllCats = new Regex(@"<!-- [^<>]*?\[\[\s*" + category + @".*?(\]\]|\|.*?\]\]).*?-->|\[\[" + category
+ @".*?(\]\]|\|.*?\]\])(\s*⌊⌊⌊⌊\d{1,4}⌋⌋⌋⌋| *<!--.*?-->|\s*<!--.*?-->(?=\r\n\[\[\s*" + category
+ @")|\s*(?=\r\n==)|\s*)?", RegexOptions.Singleline);
CategoryQuick = new Regex(@"\[\[[\s_]*" + category);
// Match file name by using allowed character list, [October 2012 any Unicode word characters] then a file extension (these are mandatory on mediawiki), then optional closing ]]
// this allows typo fixing and find&replace to operate on image descriptions
// or, alternatively, an image filename has to have a pipe or ]] after it if using the [[Image: start, so just set first one to
// @"[^\[\]\|\{\}]+\.[a-zA-Z]{3,4}\b(?:\s*(?:\]\]|\|))
// handles images within <gallery> and matches all of {{gallery}} too
// Supported file extensions taken from https://commons.wikimedia.org/wiki/Commons:File_types
string ImagesString = @"(?:\[\[\s*)?" + image +
@"[ \%\!""$&'’\(\)\*,\-.\/0-9:;=\?@\w\\\^_`~\x80-\xFF\+]+\.[a-zA-Z]{3,4}\b(?:\s*(?:\]\]|\|))?";
const string ImageInTemplateString = @"|{{\s*[Gg]allery\s*(?:\|(?>[^\{\}]+|\{(?<DEPTH>)|\}(?<-DEPTH>))*(?(DEPTH)(?!)))?}}|(?<=\|\s*(?:[a-zA-Z\d][a-zA-Z\d_ ]*\s*=)?)[^\|{}=\r\n\[\]]+?\.(?i:djvu|gif|jpe?g|og[agv]|pdf|png|svg|tiff?|mid|xcf)(?=\s*(?:<!--[^>]*?-->\s*|⌊⌊⌊⌊M?\d+⌋⌋⌋⌋\s*)?(?:\||}}))";
Images = new Regex(ImagesString + ImageInTemplateString);
ImagesCountOnly = new Regex(ImagesString + ImageInTemplateString.Replace(@"(?<=", @"(?:"));
ImagesNotTemplates = new Regex(ImagesString);
FileNamespaceLink = new Regex(@"\[\[\s*" + image +
@"((?>[^\[\]]+|\[\[(?<DEPTH>)|\]\](?<-DEPTH>))*(?(DEPTH)(?!)))\]\]");
Stub = new Regex(@"{{" + Variables.Stub + @"\s*(?:\|[^{}]+)?}}");
UserSignature = new Regex(@"\[\[\s*(?:"+ userns + @"|" + usertalkns + @")");
TemplateCall = new Regex(TemplateStart + @"\s*([^\]\|]*)\s*(.*)}}", RegexOptions.Singleline);
LooseCategory = new Regex(@"\[\[[\s_]*" + category + @"[\s_]*([^\|]*?)(\|.*?)?\]\]");
LooseImage = new Regex(@"\[\[\s*?(" + image + @")\s*([^\|\]]+)(.*?)\]\]");
Months = "(" + string.Join("|", Variables.MonthNames) + ")";
MonthsNoGroup = "(?:" + string.Join("|", Variables.MonthNames) + ")";
Dates = new Regex("^(0?[1-9]|[12][0-9]|3[01]) " + Months + "$");
Dates2 = new Regex("^" + Months + " (0?[1-9]|[12][0-9]|3[01])$");
InternationalDates = new Regex(@"\b([1-9]|[12][0-9]|3[01])(?: +| )" + Months + @" +([12]\d{3})\b");
AmericanDates = new Regex(Months + @"(?: +| )([1-9]|[12][0-9]|3[01]),? +([12]\d{3})\b");
DayMonth = new Regex(@"\b([1-9]|[12][0-9]|3[01])(?: +| )" + Months + @"\b");
MonthDay = new Regex(Months + @"(?: +| )([1-9]|[12][0-9]|3[01])\b");
DayMonthRangeSpan = new Regex(@"\b((?:[1-9]|[12][0-9]|3[01])(?:–|–|{{ndash}}|\/)(?:[1-9]|[12][0-9]|3[01])) " + Months + @"\b");
MonthDayRangeSpan = new Regex(Months + @" ((?:[1-9]|[12][0-9]|3[01])(?:–|–|{{ndash}}|\/)(?:[1-9]|[12][0-9]|3[01]))\b");
List<string> magic;
string RedirectString = Variables.MagicWords.TryGetValue("redirect", out magic)
? string.Join("|", magic.ToArray()).Replace("#", "")
: "REDIRECT";
//Regex contains extra opening/closing brackets and double bot, equal sign so that we fix with FixSyntaxRedirects
Redirect = new Regex(@"#(?:" + RedirectString + @")\s*[:|=]?\s*\[?\[?\[\[\s*:?\s*([^\|\[\]]*?)\s*(\|.*?)?\]\]\]?\]?", RegexOptions.IgnoreCase);
string SiaTemplate = "([Ss]urnames?|SIA|[Ss]ia|[Ss]et index article|[Ss]et ?index|[Ss]hip ?index|[Mm]ountain ?index|[[Rr]oad ?index|[Ss]port ?index|[Gg]iven name|[Mm]olForm ?Index|[Mm]olecular formula index|[Cc]hemistry index|[Ee]nzyme index|[Mm]edia set index|[Ll]ake ?index|[Aa]nimal common name|[Ff]ungus common name|[Pp]lant common name)";
SIAs = new Regex(TemplateStart + SiaTemplate + @"\s*(?:\|[^{}]*?)?}}");
string s;
if (Variables.MagicWords.TryGetValue("defaultsort", out magic))
s = "(?i:" + string.Join("|", magic.ToArray()).Replace(":", "") + ")";
else
s = (Variables.LangCode.Equals("en"))
? "(?:(?i:defaultsort(key|CATEGORYSORT)?))"
: "(?i:defaultsort)";
// sv-wiki: allow comment on same line as DEFAULTSORT
if (Variables.LangCode.Equals("sv"))
Defaultsort = new Regex(TemplateStart + s + @"\s*[:\|]\s*(?<key>(?>[^\{\}\r\n]+|\{(?<DEPTH>)|\}(?<-DEPTH>))*(?(DEPTH)(?!))|[^\}\r\n]*?)(?<end>\s*}}(?: *<!--[^<>]+-->)?|\r|\n)",
RegexOptions.ExplicitCapture);
else
Defaultsort = new Regex(TemplateStart + s + @"\s*[:\|]\s*(?<key>(?>[^\{\}\r\n]+|\{(?<DEPTH>)|\}(?<-DEPTH>))*(?(DEPTH)(?!))|[^\}\r\n]*?)(?<end>\s*}}|\r|\n)",
RegexOptions.ExplicitCapture);
Persondata = (Variables.LangCode.Equals("de") ? Tools.NestedTemplateRegex("personendaten") : Tools.NestedTemplateRegex("persondata"));
// if (Variables.URL == Variables.URLLong)
// s = Regex.Escape(Variables.URL);
// else
// {
int pos = Tools.FirstDifference(Variables.URL, Variables.URLLong);
s = Regex.Escape(Variables.URLLong.Substring(0, pos)).Replace(@"https://", @"https?://");
s += "(?:" + Regex.Escape(Variables.URLLong.Substring(pos)) + @"index\.php(?:\?title=|/)|"
+ Regex.Escape(Variables.URL.Substring(pos)) + "/wiki/" + ")";
// }
ExtractTitle = new Regex("^" + s + "([^?&]*)$");
EmptyLink = new Regex(@"\[\[\s*(?:(:?" + category + "|" + image + @")\s*:?\s*(\|.*?)?|[|\s]*)\]\]");
EmptyTemplate = new Regex(@"{{(" + template + @")?[|\s]*}}");
// set orphan, wikify, uncat, disambiguation, inuse templates, date parameter & Link FA/GA/GL strings
string uncattemplate = UncatTemplatesEN;
string DisambigString = DisambigTemplatesEN;
switch(Variables.LangCode)
{
case "ar":
Orphan = Tools.NestedTemplateRegex(@"يتيمة");
uncattemplate = @"(غير مصنفة|غير مصنف|[Uu]ncategori[sz]ed|[Uu]ncategori[sz]ed ?stub|بذرة غير مصنفة)";
DateYearMonthParameter = @"تاريخ={{subst:CURRENTMONTHNAME}} {{subst:CURRENTYEAR}}";
DeadEnd = new Regex(@"(?:{{\s*(?:[Dd]ead ?end|[Ii]nternal ?links|نهاية مسدودة)(?:\|(?:[^{}]+|" + DateYearMonthParameter + @"))?}})");
Wikify =Tools.NestedTemplateRegex(@"وصلات قليلة");
InUse = Tools.NestedTemplateRegex(new[] {"إنشاء", "تحرر", "Underconstruction", "تحت الإنشاء", "تحت الأنشاء", "يحرر", "إنشاء مقالة", "انشاء مقالة", "Inuse", "تحرير كثيف", "يحرر المقالة", "تحت التحرير", "قيد الاستخدام" });
DisambigString = "([Dd]isambig|توضيح|صفحة توضيح|أسمياء)";
break;
case "arz":
Orphan = Tools.NestedTemplateRegex(@"يتيمه");
uncattemplate = @"(مش متصنفه|[Uu]ncategori[sz]ed|[Uu]ncategori[sz]ed ?stub|تقاوى مش متصنفه)";
DateYearMonthParameter = @"تاريخ={{subst:CURRENTMONTHNAME}} {{subst:CURRENTYEAR}}";
DeadEnd = new Regex(@"(?:{{\s*(?:[Dd]ead ?end|نهايه مسدوده)(?:\|(?:[^{}]+|" + DateYearMonthParameter + @"))?}})");
Wikify =Tools.NestedTemplateRegex(@"ويكى");
DisambigString = "([Dd]isambig|صفحة توضيح|توضيح)";
break;
case "ca":
InUse = Tools.NestedTemplateRegex(new[] {"Modificant", "Editant-se", "Editant" });
DisambigString = "([Dd]esambiguació|[Dd]esambigua|[Dd]isambig)";
break;
case "de":
DisambigString = "([Bb]egriffsklärung)";
break;
case "el":
Orphan = Tools.NestedTemplateRegex(@"Ορφανό");
uncattemplate = "([Αα]κατηγοριοποίητο)";
DateYearMonthParameter = @"ημερομηνία={{subst:CURRENTYEAR}} {{subst:CURRENTMONTH}}";
DeadEnd = new Regex(@"(?:{{\s*(?:[Dd]ead ?end)(?:\|(?:[^{}]+|" + DateYearMonthParameter + @"))?}})");
Wikify = new Regex(@"(?:{{\s*(?:Underlinked)(?:\s*\|\s*(?:" +DateYearMonthParameter +@"|.*?))?}})", RegexOptions.IgnoreCase);
InUse = Tools.NestedTemplateRegex(new[] {"Inuse", "Σε χρήση" });
DisambigString = "([Αα]ποσαφήνιση|[Αα]ποσαφ|[Dd]isambig)";
break;
case "eo":
InUse = Tools.NestedTemplateRegex(new[] {"Redaktas", "Redaktata", "Uzata" });
break;
case "es":
InUse = Tools.NestedTemplateRegex(new[] {"En uso", "Enuso" });
DisambigString = "([Dd]esambiguación|[Dd]esambig|[Dd]es|[Dd]esambiguacion|[Dd]isambig)";
break;
case "fr":
InUse = Tools.NestedTemplateRegex(new[] {"En cours" });
break;
case "hu":
InUse = Tools.NestedTemplateRegex(new[] {"Építés alatt", "Fejlesztés"});
break;
case "hy":
Orphan = Tools.NestedTemplateRegex(@"Որբ");
uncattemplate = "(Կատեգորիա չկա|Կչ|[Uu]ncategorized)";
DeadEnd = new Regex(@"(?:{{\s*(?:[Dd]ead ?end|[Uu]nderlinked|Փակ)(?:\|(?:[^{}]+|" +DateYearMonthParameter +@"))?}}|\s*Փակ\s*=\s*(?:{{subst:CURRENTMONTHNAME}} {{subst:CURRENTYEAR}}|[^{}\|]+))");
Wikify = new Regex(@"{{\s*Վիքիֆիկացում(?:\s*\|\s*(" + DateYearMonthParameter + @"|.*?))?}}", RegexOptions.IgnoreCase);
InUse = Tools.NestedTemplateRegex(new[] {"Խմբագրում եմ"});
break;
case "it":
InUse = Tools.NestedTemplateRegex(new[] {"WIP", "Wip" });
break;
case "pl":
DisambigString = "([Dd]isambig)";
break;
case "pt":
InUse = Tools.NestedTemplateRegex(new[] {"Em edição", "Emuso", "Emedição"});
break;
case "ro":
InUse = Tools.NestedTemplateRegex(new[] {"S-dezvoltare"});
break;
case "ru":
uncattemplate = "([Нн]ет категорий|[Uu]ncategorized|[Uu]ncategorized stub|[Nn]ocat)";
Orphan = Tools.NestedTemplateRegex(new[] {@"изолированная статья", @"Сирота", @"Orphan"});
DateYearMonthParameter = @"date={{subst:CURRENTMONTHNAME}} {{subst:CURRENTYEAR}}";
DeadEnd = new Regex(@"(?:{{\s*(?:[Tt]упиковая статья|[Dd]ead ?end)(?:\|(?:[^{}]+|" + DateYearMonthParameter + @"))?}})");
Wikify = new Regex(@"({{\s*(?:Wikify|Викифицировать|Тупиковая статья|Underlinked)(?:\s*\|\s*(" +DateYearMonthParameter +@"|.*?))?}})", RegexOptions.IgnoreCase);
InUse = Tools.NestedTemplateRegex(new[] {"Редактирую", "Перерабатываю", "Inuse-by", "Пишу", "Inuse", "Правлю", "Перевожу", "In-use", "Processing", "Process", "Статья редактируется", "Викифицирую", "Under construction" });
DisambigString = @"([Аа]ТДы|[Вв]оенные\ части|[Вв]оинские\ формирования|[Вв]оинские\ части|[Гг]оры|[Жж]ДС|[Жж]дс|[Мм]ногозначность|[Нн]Пы|[Нн]еоднозначность|[Нн]еоднозначность2|[Нн]пы|[Оо]дноименные\ фильмы|[Оо]дноимённые\ НП|[Оо]дноимённые\ воинские\ части|[Оо]дноимённые\ горные\ объекты|[Оо]дноимённые\ горы|[Оо]дноимённые\ железнодорожные\ станции|[Оо]дноимённые\ координаты|[Оо]дноимённые\ корабли|[Оо]дноимённые\ монастыри|[Оо]дноимённые\ муниципальные\ образования|[Оо]дноимённые\ муниципальные\ образования|[Оо]дноимённые\ населённые\ пункты|[Оо]дноимённые\ объекты\ АТД|[Оо]дноимённые\ озёра|[Оо]дноимённые\ острова|[Оо]дноимённые\ памятники|[Оо]дноимённые\ площади|[Оо]дноимённые\ реки|[Оо]дноимённые\ станции|[Оо]дноимённые\ станции\ метро|[Оо]дноимённые\ улицы|[Оо]дноимённые\ фильмы|[Оо]дноимённые\ храмы|[Оо]днофамильцы-тёзки|[Оо]зёра|[Оо]строва|[Рр]еки|[Сс]писок\ однофамильцев|[Сс]писок\ однофамильцев-тёзок|[Сс]писок\ полных\ тёзок|[Сс]писок\ тёзок|[Сс]писок\ тёзок-однофамильцев|[Сс]танции|[Тт]ёзки-однофамильцы|[Cc]hurchdis|[Cc]oorddis|[Dd]isambig|[Dd]isambiguation|[Mm]etrodis|[Mm]ilitarydis|[Mm]ondis|[Mm]onumdis|[Mm]ountaindis|[Mm]oviedis|[Pp]lacedis|[Rr]iverdis|[Rr]oaddis|[Ss]hipdis|[Ss]tationdis|[Ss]urname)";
break;
case "sq":
uncattemplate = "([Pp]a kategori|[Uu]ncategorized)";
Orphan = Tools.NestedTemplateRegex(new[] {@"Faqe e palidhur", "Orphan"});
DateYearMonthParameter = @"date={{subst:CURRENTMONTHNAME}} {{subst:CURRENTYEAR}}";
break;
case "sv":
Orphan = Tools.NestedTemplateRegex(@"Föräldralös");
uncattemplate = "([Oo]kategoriserad|[Uu]ncategori[sz]ed|[Uu]ncategori[sz]ed ?stub)";
DateYearMonthParameter = @"datum={{subst:CURRENTYEAR}}-{{subst:CURRENTMONTH}}";
DeadEnd = new Regex(@"(?:{{\s*(?:[Dd]ead ?end)(?:\|(?:[^{}]+|" + DateYearMonthParameter + @"))?}})");
Wikify = new Regex(@"{{\s*Ickewiki(?:\s*\|\s*(" + DateYearMonthParameter + @"|.*?))?}}", RegexOptions.IgnoreCase);
InUse = Tools.NestedTemplateRegex(new[] {"Pågår", "Information kommer", "Pågående uppdateringar", "Ständiga uppdateringar", "PÅGÅR", "Påbörjad", "Bearbetning pågår"});
DisambigString = "(4LA|[Bb]etydelselista|[Dd]ab|[Dd]isambig|[Dd]isambiguation|[Ee]fternamn|[Ff]örgrening|[Ff]örgreningssida|[Ff]lertydig|[Ff]örnamn|[Gg]affel|[Gg]ren|[Gg]rensida|[Hh]ndis||[Nn]amnförgrening|[Nn]amngrensida|[Oo]rtnamn|[Rr]obotskapad förgrening|[Tt]rebokstavsförkortning|[Tt]rebokstavsförgrening)";
break;
case "zh":
DateYearMonthParameter = @"time={{subst:#time:c}}";
Orphan = Tools.NestedTemplateRegex(new[] {@"Orphan"});
InUse = Tools.NestedTemplateRegex(new[] {"Inuse", "UnderConstruction", "工事中", "Inedit", "Editing", "使用中", "2小时内重大修改 " });
break;
default:
DateYearMonthParameter = @"date={{subst:CURRENTMONTHNAME}} {{subst:CURRENTYEAR}}";
Orphan = Tools.NestedTemplateRegex(new[] {@"Orphan"});
// uncattemplate = UncatTemplatesEN;
DeadEnd = new Regex(@"(?:{{\s*(?:[Dd]ead ?end|[Ii]nternal ?links|[Nn]uevointernallinks|[Dd]ep)(?:\|(?:[^{}]+|" +DateYearMonthParameter +@"))?}}|({{\s*(?:[Aa]rticle|[Mm]ultiple)\s*issues\b[^{}]*?(?:{{subst:CURRENTMONTHNAME}} {{subst:CURRENTYEAR}})?[^{}]*?)*\|\s*dead ?end\s*=\s*(?:{{subst:CURRENTMONTHNAME}} {{subst:CURRENTYEAR}}|[^{}\|]+))");
Wikify = new Regex(@"(?:{{\s*(?:Wikify|Underlinked)(?:\s*\|\s*(?:" +DateYearMonthParameter +@"|.*?))?}}|({{\s*(?:Article|Multiple)\s*issues\b[^{}]*?)\|\s*(?:wikify|underlinked)\s*=\s*(?:{{subst:CURRENTMONTHNAME}} {{subst:CURRENTYEAR}}|[^{}\|]+))", RegexOptions.IgnoreCase);
InUse = Tools.NestedTemplateRegex(new[] {"Inuse", "In use", "GOCEinuse", "goceinuse", "in creation", "increation" });
LinkFGAs = Tools.NestedTemplateRegex(new [] {"link FA", "link GA"});
// DisambigString = DisambigTemplatesEN;
break;
}
Disambigs = new Regex(TemplateStart + DisambigString + @"\s*(?:\|[^{}]*?)?}}(?: *<!--.*?-->(?=\r\n|$))?", RegexOptions.Multiline);
DisambigsGeneral = Tools.NestedTemplateRegex(new[] {"Disamb", "Disambig", "Disambiguation", "Dab"});
DisambigsCleanup = Tools.NestedTemplateRegex(new[] {"Disambig-cleanup", "Disambig cleanup", "Disambiguation cleanup"});
Uncat = new Regex(@"{{\s*" + uncattemplate + @"((\s*\|[^{}]+)?\s*|\s*\|((?>[^\{\}]+|\{\{(?<DEPTH>)|\}\}(?<-DEPTH>))*(?(DEPTH)(?!))))\}\}");
PossiblyCommentedStub =
new Regex(
@"(<!-- ?\{\{" + Variables.Stub + @"\b\}\}.*?-->|\{\{" + Variables.Stub + @"\s*(?:\|(?:[^{}]+|" + DateYearMonthParameter + @"))?}})");
if (Variables.LangCode.Equals("fr"))
ReferenceList = Tools.NestedTemplateRegex(new [] { "références", "references", "reflist" });
else
ReferenceList = Tools.NestedTemplateRegex(new [] { "reflist", "references-small", "references-2column"});
if(Variables.Project == ProjectEnum.wikipedia && Variables.LangCode.Equals("simple"))
{
SeeAlso = new Regex(@"(==+)\s*(related +pages|see +also)\s*\1", RegexOptions.IgnoreCase);
ExternalLinksHeader = new Regex(@"== *(Other +websites|External +links?) *==", RegexOptions.IgnoreCase | RegexOptions.RightToLeft);
}
else
{
ExternalLinksHeader = new Regex(@"== *External +links? *==", RegexOptions.IgnoreCase | RegexOptions.RightToLeft);
SeeAlso = new Regex(@"(==+)\s*see +also\s*\1", RegexOptions.IgnoreCase);
}
}