public string StripHtml(string html) { var doc = new HtmlDocument(); doc.LoadHtml(html); Parsers.ForEach(x => x(doc.DocumentNode)); return Regex.Replace(doc.DocumentNode.InnerText, "[\r\n]{3,}", "\r\n\r\n"); }
public void StripHtml_DoesRemoveAllHtml() { // Arrange var parser = new HtmlAgilityPackParser(Uri); // Act var result = parser.StripHtml("<html><head><style>.css { padding: 10px; }</style></head><body><h1>Heading</h1><div class=\"css\">This is a test.</div>With an image.<img src=\"image.jpg\" alt=\"image\" /></body></html>"); //Assert Assert.That(result, Is.EqualTo("Heading\r\nThis is a test.\r\nWith an image.")); }