private static void HtmlDecode(string value, StringBuilder output)
{
Debug.Assert(output != null);
int l = value.Length;
for (int i = 0; i < l; i++)
{
char ch = value[i];
if (ch == '&')
{
// We found a '&'. Now look for the next ';' or '&'. The idea is that
// if we find another '&' before finding a ';', then this is not an entity,
// and the next '&' might start a real entity (VSWhidbey 275184)
int index = value.IndexOfAny(s_htmlEntityEndingChars, i + 1);
if (index > 0 && value[index] == ';')
{
int entityOffset = i + 1;
int entityLength = index - entityOffset;
if (entityLength > 1 && value[entityOffset] == '#')
{
// The # syntax can be in decimal or hex, e.g.
// å --> decimal
// å --> same char in hex
// See http://www.w3.org/TR/REC-html40/charset.html#entities
bool parsedSuccessfully;
uint parsedValue;
if (value[entityOffset + 1] == 'x' || value[entityOffset + 1] == 'X')
{
parsedSuccessfully = uint.TryParse(value.Substring(entityOffset + 2, entityLength - 2), NumberStyles.AllowHexSpecifier, CultureInfo.InvariantCulture, out parsedValue);
}
else
{
parsedSuccessfully = uint.TryParse(value.Substring(entityOffset + 1, entityLength - 1), NumberStyles.Integer, CultureInfo.InvariantCulture, out parsedValue);
}
if (parsedSuccessfully)
{
// decoded character must be U+0000 .. U+10FFFF, excluding surrogates
parsedSuccessfully = ((parsedValue < HIGH_SURROGATE_START) || (LOW_SURROGATE_END < parsedValue && parsedValue <= UNICODE_PLANE16_END));
}
if (parsedSuccessfully)
{
if (parsedValue <= UNICODE_PLANE00_END)
{
// single character
output.Append((char)parsedValue);
}
else
{
// multi-character
char leadingSurrogate, trailingSurrogate;
ConvertSmpToUtf16(parsedValue, out leadingSurrogate, out trailingSurrogate);
output.Append(leadingSurrogate);
output.Append(trailingSurrogate);
}
i = index; // already looked at everything until semicolon
continue;
}
}
else
{
string entity = value.Substring(entityOffset, entityLength);
i = index; // already looked at everything until semicolon
char entityChar = HtmlEntities.Lookup(entity);
if (entityChar != (char)0)
{
ch = entityChar;
}
else
{
output.Append('&');
output.Append(entity);
output.Append(';');
continue;
}
}
}
}
output.Append(ch);
}
}