private Encoding DetectEncoding()
{
Debug.Assert(_ps.bytes != null);
Debug.Assert(_ps.bytePos == 0);
if (_ps.bytesUsed < 2)
{
return null;
}
int first2Bytes = _ps.bytes[0] << 8 | _ps.bytes[1];
int next2Bytes = (_ps.bytesUsed >= 4) ? (_ps.bytes[2] << 8 | _ps.bytes[3]) : 0;
switch (first2Bytes)
{
// Removing USC4 encoding
case 0x0000:
switch (next2Bytes)
{
case 0xFEFF:
return Ucs4Encoding.UCS4_Bigendian;
case 0x003C:
return Ucs4Encoding.UCS4_Bigendian;
case 0xFFFE:
return Ucs4Encoding.UCS4_2143;
case 0x3C00:
return Ucs4Encoding.UCS4_2143;
}
break;
case 0xFEFF:
if (next2Bytes == 0x0000)
{
return Ucs4Encoding.UCS4_3412;
}
else
{
return Encoding.BigEndianUnicode;
}
case 0xFFFE:
if (next2Bytes == 0x0000)
{
return Ucs4Encoding.UCS4_Littleendian;
}
else
{
return Encoding.Unicode;
}
case 0x3C00:
if (next2Bytes == 0x0000)
{
return Ucs4Encoding.UCS4_Littleendian;
}
else
{
return Encoding.Unicode;
}
case 0x003C:
if (next2Bytes == 0x0000)
{
return Ucs4Encoding.UCS4_3412;
}
else
{
return Encoding.BigEndianUnicode;
}
case 0x4C6F:
if (next2Bytes == 0xA794)
{
Throw(SR.Xml_UnknownEncoding, "ebcdic");
}
break;
case 0xEFBB:
if ((next2Bytes & 0xFF00) == 0xBF00)
{
return new UTF8Encoding(true, true);
}
break;
}
// Default encoding is ASCII (using SafeAsciiDecoder) until we read xml declaration.
// If we set UTF8 encoding now, it will throw exceptions (=slow) when decoding non-UTF8-friendly
// characters after the xml declaration, which may be perfectly valid in the encoding
// specified in xml declaration.
return null;
}