WikiFunctions.Parse.Parsers.UnclosedTags C# (CSharp) Method

UnclosedTags() public static method

Searches for any unclosed <math>, <source>, <ref>, <code>, <nowiki>, <small>, <pre> <center> <sup> <sub> or <gallery> tags and comments
public static UnclosedTags ( string articleText ) : int>.Dictionary
articleText string The article text
return int>.Dictionary
        public static Dictionary<int, int> UnclosedTags(string articleText)
        {
            Dictionary<int, int> back = new Dictionary<int, int>();

            // Performance: get all tags, filter to the ones we're checking, compare the count of matched tags of same name
            // Then do full tag search if unmatched tags found

            // get all tags in format <tag...> in article
            MatchCollection anyTagMatchCollection = AnyTag.Matches(articleText);
            List<string> AnyTagList = (from Match m in anyTagMatchCollection
                select m.Groups[1].Value.Trim().ToLower()).ToList();

            // discard self-closing tags in <tag/> format, discard wiki comments
            AnyTagList = AnyTagList.FindAll(s => !s.EndsWith("/") && !s.StartsWith("!--"));

            // remove any text after first space, so we're left with tag name only
            AnyTagList = AnyTagList.Select(s => s.Contains(" ") ? s.Substring(0, s.IndexOf(" ")).Trim() : s).ToList();

            // filter to only the tags we're checking
            AnyTagList = AnyTagList.FindAll(s => MathSourceCodeNowikiPreTagList.Contains(s.TrimStart('/')));

            // Count the tag names in use, determine if unmatched tags by comparing count of opening and closing tags
            bool unmatched = false;
            Dictionary<string, int> tagCounts = AnyTagList.GroupBy(x => x).ToDictionary(x => x.Key, y => y.Count());
            foreach(KeyValuePair<string, int> kvp in tagCounts)
            {
                int matchedCount = 0;
                string othertag = kvp.Key.StartsWith("/") ? kvp.Key.TrimStart('/') : "/" + kvp.Key;
                if (tagCounts.TryGetValue(othertag, out matchedCount) && matchedCount == kvp.Value)
                    continue;

                unmatched = true;
                break;
            }

            // check for any unmatched tags or unclosed part tag
            if(!unmatched)
            {
                // now check for unclosed part tag
                string noTags = Tools.ReplaceWithSpaces(articleText, anyTagMatchCollection);
                int tagOpen = noTags.IndexOf('<');

                if(tagOpen == -1 || (tagOpen > 0 && noTags.Substring(tagOpen).Contains('>')))
                    return back;
            }

            // if here then have some unmatched tags, so do full clear down and search
            // performance of Refs/SourceCode is better if IgnoreCase avoided
            articleText = articleText.ToLower();
            articleText = Tools.ReplaceWithSpaces(articleText, WikiRegexes.UnformattedText);
            articleText = Tools.ReplaceWithSpaces(articleText, WikiRegexes.GalleryTag, 2);
            articleText = Tools.ReplaceWithSpaces(articleText, new Regex(WikiRegexes.Refs.ToString(), RegexOptions.Singleline));

            // some (badly done) List of pages can have hundreds of unclosed small or center tags, causes regex bactracking when using <DEPTH>
            // so workaround solution: if > 10 unclosed tags, only remove tags without other tags embedded in them
            // Workaround constraint: we might incorrectly report some valid tags with < or > in them as unclosed
            if (AnyTagList.Count(s => !s.StartsWith("/")) > (AnyTagList.Count(s => s.StartsWith("/")) + 10))
            {
                while(SimpleTagPair.IsMatch(articleText))
                    articleText = Tools.ReplaceWithSpaces(articleText, SimpleTagPair);
            }
            else
            {
                articleText = Tools.ReplaceWithSpaces(articleText, new Regex(WikiRegexes.SourceCode.ToString(), RegexOptions.Singleline));
                articleText = Tools.ReplaceWithSpaces(articleText, CenterTag, 2);
                articleText = Tools.ReplaceWithSpaces(articleText, WikiRegexes.Small);
                articleText = Tools.ReplaceWithSpaces(articleText, SupTag, 2);
                articleText = Tools.ReplaceWithSpaces(articleText, SubTag, 2);
            }

            foreach (Match m in MathSourceCodeNowikiPreTag.Matches(articleText))
            {
                back.Add(m.Index, m.Length);
            }
            return back;
        }
Parsers