ContentFiltering.Office.Word.Cleaners.TidyHTMLCleaner.Clean C# (CSharp) Method

Clean() public method

Uses Tidy.Net to clean a html source.
public Clean ( string htmlSource ) : string
htmlSource string The original html source.
return string
        public string Clean(string htmlSource)
        {
            Tidy tidy = new Tidy();
            //Options required dor xhtml conversion.
            tidy.Options.DocType = DocType.Strict;
            tidy.Options.DropFontTags = true;
            tidy.Options.LogicalEmphasis = true;
            tidy.Options.Xhtml = true;
            tidy.Options.XmlOut = true;
            tidy.Options.MakeClean = true;
            tidy.Options.TidyMark = false;
            tidy.Options.DropEmptyParas = true;
            tidy.Options.IndentContent = true;
            tidy.Options.SmartIndent = true;
            tidy.Options.Word2000 = isWordHtml;
            tidy.Options.EncloseBlockText = true;

            tidy.Options.XmlTags = true;
            tidy.Options.FixComments = true;
            TidyMessageCollection tmc = new TidyMessageCollection();
            MemoryStream input = new MemoryStream();
            MemoryStream output = new MemoryStream();

            byte[] byteArray = Encoding.UTF8.GetBytes(htmlSource);
            input.Write(byteArray, 0, byteArray.Length);
            input.Position = 0;
            try
            {
                tidy.Parse(input, output, tmc);
            }
            catch (FormatException ex)
            {
                Log.Exception(ex);
                return htmlSource;
            }
            string cleanContent = Encoding.UTF8.GetString(output.ToArray());
            return cleanContent;
        }