/// <summary>
/// Unescape the percent-encodings
/// </summary>
/// <param name="reader">The iterator point to the first % char</param>
/// <param name="writer">The place to write to</param>
/// <param name="end">The end of the sequence</param>
private static bool DecodeCore(ref MemoryPoolIterator2 reader, ref MemoryPoolIterator2 writer, MemoryPoolIterator2 end)
{
// preserves the original head. if the percent-encodings cannot be interpreted as sequence of UTF-8 octets,
// bytes from this till the last scanned one will be copied to the memory pointed by writer.
var byte1 = UnescapePercentEncoding(ref reader, end);
if (byte1 == -1)
{
return false;
}
if (byte1 <= 0x7F)
{
// first byte < U+007f, it is a single byte ASCII
writer.Put((byte)byte1);
return true;
}
int byte2 = 0, byte3 = 0, byte4 = 0;
// anticipate more bytes
var currentDecodeBits = 0;
var byteCount = 1;
var expectValueMin = 0;
if ((byte1 & 0xE0) == 0xC0)
{
// 110x xxxx, expect one more byte
currentDecodeBits = byte1 & 0x1F;
byteCount = 2;
expectValueMin = 0x80;
}
else if ((byte1 & 0xF0) == 0xE0)
{
// 1110 xxxx, expect two more bytes
currentDecodeBits = byte1 & 0x0F;
byteCount = 3;
expectValueMin = 0x800;
}
else if ((byte1 & 0xF8) == 0xF0)
{
// 1111 0xxx, expect three more bytes
currentDecodeBits = byte1 & 0x07;
byteCount = 4;
expectValueMin = 0x10000;
}
else
{
// invalid first byte
return false;
}
var remainingBytes = byteCount - 1;
while (remainingBytes > 0)
{
// read following three chars
if (CompareIterators(ref reader, ref end))
{
return false;
}
var nextItr = reader;
var nextByte = UnescapePercentEncoding(ref nextItr, end);
if (nextByte == -1)
{
return false;
}
if ((nextByte & 0xC0) != 0x80)
{
// the follow up byte is not in form of 10xx xxxx
return false;
}
currentDecodeBits = (currentDecodeBits << 6) | (nextByte & 0x3F);
remainingBytes--;
if (remainingBytes == 1 && currentDecodeBits >= 0x360 && currentDecodeBits <= 0x37F)
{
// this is going to end up in the range of 0xD800-0xDFFF UTF-16 surrogates that
// are not allowed in UTF-8;
return false;
}
if (remainingBytes == 2 && currentDecodeBits >= 0x110)
{
// this is going to be out of the upper Unicode bound 0x10FFFF.
return false;
}
reader = nextItr;
if (byteCount - remainingBytes == 2)
{
byte2 = nextByte;
}
else if (byteCount - remainingBytes == 3)
{
byte3 = nextByte;
}
else if (byteCount - remainingBytes == 4)
{
byte4 = nextByte;
}
}
if (currentDecodeBits < expectValueMin)
{
// overlong encoding (e.g. using 2 bytes to encode something that only needed 1).
return false;
}
// all bytes are verified, write to the output
if (byteCount > 0)
{
writer.Put((byte)byte1);
}
if (byteCount > 1)
{
writer.Put((byte)byte2);
}
if (byteCount > 2)
{
writer.Put((byte)byte3);
}
if (byteCount > 3)
{
writer.Put((byte)byte4);
}
return true;
}