ContentFiltering.Office.Word.Cleaners.CorrectTagsClosingCleaner.Clean C# (CSharp) Méthode

Clean() public méthode

Corrects the img and br tags generated by Word.
public Clean ( string htmlSource ) : string
htmlSource string The html source to be corrected.
Résultat string
        public string Clean(string htmlSource)
        {
            //The string builder will be appendend when more then 1000 corrupted tags are found.
            int slack = 1000;
            string correctionString = " /";
            string searchedString = "<" + tagName;
            StringBuilder sb = new StringBuilder(htmlSource.Length + slack);
            sb.Insert(0, htmlSource);
            int startIndex = 0;
            int endIndex = 0;
            int nonValidTags = 0;
            do
            {
                startIndex = htmlSource.IndexOf(searchedString, endIndex);
                if (startIndex >= 0)
                {
                    endIndex = htmlSource.IndexOf('>', startIndex);
                    if (endIndex > 0)
                    {
                        //The tag is missing  the '/' before the '>' character
                        if (!(htmlSource[endIndex - 1].CompareTo('/') == 0))
                        {
                            sb.Insert(endIndex + nonValidTags * correctionString.Length, correctionString);
                            nonValidTags++;
                        }
                    }
                }
            } while (startIndex < (htmlSource.Length - 1) && endIndex < (htmlSource.Length - 1) && (startIndex >= 0) && (endIndex >= 0));
            return sb.ToString();
        }

Usage Example

        public void TestCleaner()
        {
            bool canLoadXML = false;
            IHTMLCleaner tagClosingCleaner1 = new CorrectTagsClosingCleaner("img");
            initialHTML1 = tagClosingCleaner1.Clean(initialHTML1);

            IHTMLCleaner tagClosingCleaner2 = new CorrectTagsClosingCleaner("br");
            initialHTML2 = tagClosingCleaner2.Clean(initialHTML2);

            Assert.AreEqual(initialHTML1, expectedHTML1);
            Assert.AreEqual(initialHTML2, expectedHTML2);

            try
            {
                new XmlDocument().LoadXml(initialHTML1);
                new XmlDocument().LoadXml(initialHTML2);
                canLoadXML = true;
            }
            catch
            {
                canLoadXML = false;
            }

            Assert.IsTrue(canLoadXML);
        }
CorrectTagsClosingCleaner