private int BreakText(byte[] buffer, int offset, int length)
{
// See if we might be breaking a utf8 sequence
if (length > 0 && (buffer[offset + length - 1] & 0x80) == 0x80)
{
// Find the lead char of the utf8 sequence (0x11xxxxxx)
int originalLength = length;
do
{
length--;
}
while (length > 0 && (buffer[offset + length] & 0xC0) != 0xC0);
// Couldn't find the lead char
if (length == 0)
return originalLength; // Invalid utf8 sequence - can't break
// Count how many bytes follow the lead char
byte b = (byte)(buffer[offset + length] << 2);
int byteCount = 2;
while ((b & 0x80) == 0x80)
{
b = (byte)(b << 1);
byteCount++;
// There shouldn't be more than 3 bytes following the lead char
if (byteCount > 4)
return originalLength; // Invalid utf8 sequence - can't break
}
if (length + byteCount == originalLength)
return originalLength; // sequence fits exactly
}
return length;
}