/* PunycodeDecode() converts Punycode to Unicode. The input is */
/* represented as an array of ASCII code points, and the output */
/* will be represented as an array of Unicode code points. The */
/* input_length is the number of code points in the input. The */
/* output_length is an in/out argument: the caller passes in */
/* the maximum number of code points that it can receive, and */
/* on successful return it will contain the actual number of */
/* code points output. The case_flags array needs room for at */
/* least output_length values, or it can be a null pointer if the */
/* case information is not needed. A nonzero flag suggests that */
/* the corresponding Unicode character be forced to uppercase */
/* by the caller (if possible), while zero suggests that it be */
/* forced to lowercase (if possible). ASCII code points are */
/* output already in the proper case, but their flags will be set */
/* appropriately so that applying the flags would be harmless. */
/* The return value can be any of the punycode_status values */
/* defined above; if not punycode_success, then output_length, */
/* output, and case_flags might contain garbage. On success, the */
/* decoder will never need to write an output_length greater than */
/* input_length, because of how the encoding is defined. */
private static string PunycodeDecode(string ascii)
{
// 0 length strings aren't allowed
if (ascii.Length == 0)
{
throw new ArgumentException(SR.Argument_IdnBadLabelSize, nameof(ascii));
}
// Throw if we're too long
if (ascii.Length > c_defaultNameLimit - (IsDot(ascii[ascii.Length - 1]) ? 0 : 1))
{
throw new ArgumentException(SR.Format(SR.Argument_IdnBadNameSize,
c_defaultNameLimit - (IsDot(ascii[ascii.Length - 1]) ? 0 : 1)), nameof(ascii));
}
// output stringbuilder
StringBuilder output = new StringBuilder(ascii.Length);
// Dot searching
int iNextDot = 0;
int iAfterLastDot = 0;
int iOutputAfterLastDot = 0;
while (iNextDot < ascii.Length)
{
// Find end of this segment
iNextDot = ascii.IndexOf('.', iAfterLastDot);
if (iNextDot < 0 || iNextDot > ascii.Length)
{
iNextDot = ascii.Length;
}
// Only allowed to have empty . section at end (www.microsoft.com.)
if (iNextDot == iAfterLastDot)
{
// Only allowed to have empty sections as trailing .
if (iNextDot != ascii.Length)
{
throw new ArgumentException(SR.Argument_IdnBadLabelSize, nameof(ascii));
}
// Last dot, stop
break;
}
// In either case it can't be bigger than segment size
if (iNextDot - iAfterLastDot > c_labelLimit)
{
throw new ArgumentException(SR.Argument_IdnBadLabelSize, nameof(ascii));
}
// See if this section's ASCII or ACE
if (ascii.Length < c_strAcePrefix.Length + iAfterLastDot ||
!ascii.Substring(iAfterLastDot, c_strAcePrefix.Length).Equals(c_strAcePrefix, StringComparison.OrdinalIgnoreCase))
{
// Its ASCII, copy it
output.Append(ascii.Substring(iAfterLastDot, iNextDot - iAfterLastDot));
}
else
{
// Not ASCII, bump up iAfterLastDot to be after ACE Prefix
iAfterLastDot += c_strAcePrefix.Length;
// Get number of basic code points (where delimiter is)
// numBasicCodePoints < 0 if there're no basic code points
int iTemp = ascii.LastIndexOf(c_delimiter, iNextDot - 1);
// Trailing - not allowed
if (iTemp == iNextDot - 1)
{
throw new ArgumentException(SR.Argument_IdnBadPunycode, nameof(ascii));
}
int numBasicCodePoints;
if (iTemp <= iAfterLastDot)
{
numBasicCodePoints = 0;
}
else
{
numBasicCodePoints = iTemp - iAfterLastDot;
// Copy all the basic code points, making sure they're all in the allowed range,
// and losing the casing for all of them.
for (int copyAscii = iAfterLastDot; copyAscii < iAfterLastDot + numBasicCodePoints; copyAscii++)
{
// Make sure we don't allow unicode in the ascii part
if (ascii[copyAscii] > 0x7f)
{
throw new ArgumentException(SR.Argument_IdnBadPunycode, nameof(ascii));
}
// When appending make sure they get lower cased
output.Append((char)(ascii[copyAscii] >= 'A' && ascii[copyAscii] <= 'Z' ? ascii[copyAscii] - 'A' + 'a' : ascii[copyAscii]));
}
}
// Get ready for main loop. Start at beginning if we didn't have any
// basic code points, otherwise start after the -.
// asciiIndex will be next character to read from ascii
int asciiIndex = iAfterLastDot + (numBasicCodePoints > 0 ? numBasicCodePoints + 1 : 0);
// initialize our state
int n = c_initialN;
int bias = c_initialBias;
int i = 0;
int w, k;
// no Supplementary characters yet
int numSurrogatePairs = 0;
// Main loop, read rest of ascii
while (asciiIndex < iNextDot)
{
/* Decode a generalized variable-length integer into delta, */
/* which gets added to i. The overflow checking is easier */
/* if we increase i as we go, then subtract off its starting */
/* value at the end to obtain delta. */
int oldi = i;
for (w = 1, k = c_punycodeBase; ; k += c_punycodeBase)
{
// Check to make sure we aren't overrunning our ascii string
if (asciiIndex >= iNextDot)
{
throw new ArgumentException(SR.Argument_IdnBadPunycode, nameof(ascii));
}
// decode the digit from the next char
int digit = DecodeDigit(ascii[asciiIndex++]);
Debug.Assert(w > 0, "[IdnMapping.punycode_decode]Expected w > 0");
if (digit > (c_maxint - i) / w)
{
throw new ArgumentException(SR.Argument_IdnBadPunycode, nameof(ascii));
}
i += (int)(digit * w);
int t = k <= bias ? c_tmin : k >= bias + c_tmax ? c_tmax : k - bias;
if (digit < t)
{
break;
}
Debug.Assert(c_punycodeBase != t, "[IdnMapping.punycode_decode]Expected t != c_punycodeBase (36)");
if (w > c_maxint / (c_punycodeBase - t))
{
throw new ArgumentException(SR.Argument_IdnBadPunycode, nameof(ascii));
}
w *= (c_punycodeBase - t);
}
bias = Adapt(i - oldi, (output.Length - iOutputAfterLastDot - numSurrogatePairs) + 1, oldi == 0);
/* i was supposed to wrap around from output.Length to 0, */
/* incrementing n each time, so we'll fix that now: */
Debug.Assert((output.Length - iOutputAfterLastDot - numSurrogatePairs) + 1 > 0,
"[IdnMapping.punycode_decode]Expected to have added > 0 characters this segment");
if (i / ((output.Length - iOutputAfterLastDot - numSurrogatePairs) + 1) > c_maxint - n)
{
throw new ArgumentException(SR.Argument_IdnBadPunycode, nameof(ascii));
}
n += (int)(i / (output.Length - iOutputAfterLastDot - numSurrogatePairs + 1));
i %= (output.Length - iOutputAfterLastDot - numSurrogatePairs + 1);
// Make sure n is legal
if ((n < 0 || n > 0x10ffff) || (n >= 0xD800 && n <= 0xDFFF))
{
throw new ArgumentException(SR.Argument_IdnBadPunycode, nameof(ascii));
}
// insert n at position i of the output: Really tricky if we have surrogates
int iUseInsertLocation;
String strTemp = Char.ConvertFromUtf32(n);
// If we have supplimentary characters
if (numSurrogatePairs > 0)
{
// Hard way, we have supplimentary characters
int iCount;
for (iCount = i, iUseInsertLocation = iOutputAfterLastDot; iCount > 0; iCount--, iUseInsertLocation++)
{
// If its a surrogate, we have to go one more
if (iUseInsertLocation >= output.Length)
{
throw new ArgumentException(SR.Argument_IdnBadPunycode, nameof(ascii));
}
if (Char.IsSurrogate(output[iUseInsertLocation]))
{
iUseInsertLocation++;
}
}
}
else
{
// No Supplementary chars yet, just add i
iUseInsertLocation = iOutputAfterLastDot + i;
}
// Insert it
output.Insert(iUseInsertLocation, strTemp);
// If it was a surrogate increment our counter
if (IsSupplementary(n))
{
numSurrogatePairs++;
}
// Index gets updated
i++;
}
// Do BIDI testing
bool bRightToLeft = false;
// Check for RTL. If right-to-left, then 1st & last chars must be RTL
BidiCategory eBidi = CharUnicodeInfo.GetBidiCategory(output.ToString(), iOutputAfterLastDot);
if (eBidi == BidiCategory.RightToLeft || eBidi == BidiCategory.RightToLeftArabic)
{
// It has to be right to left.
bRightToLeft = true;
}
// Check the rest of them to make sure RTL/LTR is consistent
for (int iTest = iOutputAfterLastDot; iTest < output.Length; iTest++)
{
// This might happen if we run into a pair
if (Char.IsLowSurrogate(output.ToString(), iTest))
{
continue;
}
// Check to see if its LTR
eBidi = CharUnicodeInfo.GetBidiCategory(output.ToString(), iTest);
if ((bRightToLeft && eBidi == BidiCategory.LeftToRight) ||
(!bRightToLeft && (eBidi == BidiCategory.RightToLeft || eBidi == BidiCategory.RightToLeftArabic)))
{
throw new ArgumentException(SR.Argument_IdnBadBidi, nameof(ascii));
}
}
// Its also a requirement that the last one be RTL if 1st is RTL
if (bRightToLeft && eBidi != BidiCategory.RightToLeft && eBidi != BidiCategory.RightToLeftArabic)
{
// Oops, last wasn't RTL, last should be RTL if first is RTL
throw new ArgumentException(SR.Argument_IdnBadBidi, nameof(ascii));
}
}
// See if this label was too long
if (iNextDot - iAfterLastDot > c_labelLimit)
{
throw new ArgumentException(SR.Argument_IdnBadLabelSize, nameof(ascii));
}
// Done with this segment, add dot if necessary
if (iNextDot != ascii.Length)
{
output.Append('.');
}
iAfterLastDot = iNextDot + 1;
iOutputAfterLastDot = output.Length;
}
// Throw if we're too long
if (output.Length > c_defaultNameLimit - (IsDot(output[output.Length - 1]) ? 0 : 1))
{
throw new ArgumentException(SR.Format(SR.Argument_IdnBadNameSize, c_defaultNameLimit - (IsDot(output[output.Length - 1]) ? 0 : 1)), nameof(ascii));
}
// Return our output string
return(output.ToString());
}