public Clean ( string htmlSource ) : string | ||
htmlSource | string | The html source to be corrected. |
return | string |
public string Clean(string htmlSource)
{
//The string builder will be appendend when more then 1000 corrupted tags are found.
int slack = 1000;
string correctionString = " /";
string searchedString = "<" + tagName;
StringBuilder sb = new StringBuilder(htmlSource.Length + slack);
sb.Insert(0, htmlSource);
int startIndex = 0;
int endIndex = 0;
int nonValidTags = 0;
do
{
startIndex = htmlSource.IndexOf(searchedString, endIndex);
if (startIndex >= 0)
{
endIndex = htmlSource.IndexOf('>', startIndex);
if (endIndex > 0)
{
//The tag is missing the '/' before the '>' character
if (!(htmlSource[endIndex - 1].CompareTo('/') == 0))
{
sb.Insert(endIndex + nonValidTags * correctionString.Length, correctionString);
nonValidTags++;
}
}
}
} while (startIndex < (htmlSource.Length - 1) && endIndex < (htmlSource.Length - 1) && (startIndex >= 0) && (endIndex >= 0));
return sb.ToString();
}
public void TestCleaner() { bool canLoadXML = false; IHTMLCleaner tagClosingCleaner1 = new CorrectTagsClosingCleaner("img"); initialHTML1 = tagClosingCleaner1.Clean(initialHTML1); IHTMLCleaner tagClosingCleaner2 = new CorrectTagsClosingCleaner("br"); initialHTML2 = tagClosingCleaner2.Clean(initialHTML2); Assert.AreEqual(initialHTML1, expectedHTML1); Assert.AreEqual(initialHTML2, expectedHTML2); try { new XmlDocument().LoadXml(initialHTML1); new XmlDocument().LoadXml(initialHTML2); canLoadXML = true; } catch { canLoadXML = false; } Assert.IsTrue(canLoadXML); }