System.Globalization.CharUnicodeInfo.GetBidiCategory C# (CSharp) Method

CharUnicodeInfo Class Documentation Usage Examples Of System.Globalization.CharUnicodeInfo::GetBidiCategory Show file Open project: gbarnett/shared-source-cli-2.0

GetBidiCategory() static private method

static private GetBidiCategory ( String s, int index ) : BidiCategory
s	String
index	int
return	BidiCategory

        internal static BidiCategory GetBidiCategory(String s, int index) {
            if (s==null)
                throw new ArgumentNullException("s");
            if (((uint)index)>=((uint)s.Length)) {
                throw new ArgumentOutOfRangeException("index");
            }
            return ((BidiCategory)InternalGetCategoryValue(InternalConvertToUtf32(s, index), BIDI_CATEGORY_OFFSET));
        }

Usage Example

Example #1

Show file

        /* PunycodeDecode() converts Punycode to Unicode.  The input is  */
        /* represented as an array of ASCII code points, and the output   */
        /* will be represented as an array of Unicode code points.  The   */
        /* input_length is the number of code points in the input.  The   */
        /* output_length is an in/out argument: the caller passes in      */
        /* the maximum number of code points that it can receive, and     */
        /* on successful return it will contain the actual number of      */
        /* code points output.  The case_flags array needs room for at    */
        /* least output_length values, or it can be a null pointer if the */
        /* case information is not needed.  A nonzero flag suggests that  */
        /* the corresponding Unicode character be forced to uppercase     */
        /* by the caller (if possible), while zero suggests that it be    */
        /* forced to lowercase (if possible).  ASCII code points are      */
        /* output already in the proper case, but their flags will be set */
        /* appropriately so that applying the flags would be harmless.    */
        /* The return value can be any of the punycode_status values      */
        /* defined above; if not punycode_success, then output_length,    */
        /* output, and case_flags might contain garbage.  On success, the */
        /* decoder will never need to write an output_length greater than */
        /* input_length, because of how the encoding is defined.          */

        private static string PunycodeDecode(string ascii)
        {
            // 0 length strings aren't allowed
            if (ascii.Length == 0)
            {
                throw new ArgumentException(SR.Argument_IdnBadLabelSize, nameof(ascii));
            }

            // Throw if we're too long
            if (ascii.Length > c_defaultNameLimit - (IsDot(ascii[ascii.Length - 1]) ? 0 : 1))
            {
                throw new ArgumentException(SR.Format(SR.Argument_IdnBadNameSize,
                                                      c_defaultNameLimit - (IsDot(ascii[ascii.Length - 1]) ? 0 : 1)), nameof(ascii));
            }

            // output stringbuilder
            StringBuilder output = new StringBuilder(ascii.Length);

            // Dot searching
            int iNextDot            = 0;
            int iAfterLastDot       = 0;
            int iOutputAfterLastDot = 0;

            while (iNextDot < ascii.Length)
            {
                // Find end of this segment
                iNextDot = ascii.IndexOf('.', iAfterLastDot);
                if (iNextDot < 0 || iNextDot > ascii.Length)
                {
                    iNextDot = ascii.Length;
                }

                // Only allowed to have empty . section at end (www.microsoft.com.)
                if (iNextDot == iAfterLastDot)
                {
                    // Only allowed to have empty sections as trailing .
                    if (iNextDot != ascii.Length)
                    {
                        throw new ArgumentException(SR.Argument_IdnBadLabelSize, nameof(ascii));
                    }

                    // Last dot, stop
                    break;
                }

                // In either case it can't be bigger than segment size
                if (iNextDot - iAfterLastDot > c_labelLimit)
                {
                    throw new ArgumentException(SR.Argument_IdnBadLabelSize, nameof(ascii));
                }

                // See if this section's ASCII or ACE
                if (ascii.Length < c_strAcePrefix.Length + iAfterLastDot ||
                    !ascii.Substring(iAfterLastDot, c_strAcePrefix.Length).Equals(c_strAcePrefix, StringComparison.OrdinalIgnoreCase))
                {
                    // Its ASCII, copy it
                    output.Append(ascii.Substring(iAfterLastDot, iNextDot - iAfterLastDot));
                }
                else
                {
                    // Not ASCII, bump up iAfterLastDot to be after ACE Prefix
                    iAfterLastDot += c_strAcePrefix.Length;

                    // Get number of basic code points (where delimiter is)
                    // numBasicCodePoints < 0 if there're no basic code points
                    int iTemp = ascii.LastIndexOf(c_delimiter, iNextDot - 1);

                    // Trailing - not allowed
                    if (iTemp == iNextDot - 1)
                    {
                        throw new ArgumentException(SR.Argument_IdnBadPunycode, nameof(ascii));
                    }

                    int numBasicCodePoints;
                    if (iTemp <= iAfterLastDot)
                    {
                        numBasicCodePoints = 0;
                    }
                    else
                    {
                        numBasicCodePoints = iTemp - iAfterLastDot;

                        // Copy all the basic code points, making sure they're all in the allowed range,
                        // and losing the casing for all of them.
                        for (int copyAscii = iAfterLastDot; copyAscii < iAfterLastDot + numBasicCodePoints; copyAscii++)
                        {
                            // Make sure we don't allow unicode in the ascii part
                            if (ascii[copyAscii] > 0x7f)
                            {
                                throw new ArgumentException(SR.Argument_IdnBadPunycode, nameof(ascii));
                            }

                            // When appending make sure they get lower cased
                            output.Append((char)(ascii[copyAscii] >= 'A' && ascii[copyAscii] <= 'Z' ? ascii[copyAscii] - 'A' + 'a' : ascii[copyAscii]));
                        }
                    }

                    // Get ready for main loop.  Start at beginning if we didn't have any
                    // basic code points, otherwise start after the -.
                    // asciiIndex will be next character to read from ascii
                    int asciiIndex = iAfterLastDot + (numBasicCodePoints > 0 ? numBasicCodePoints + 1 : 0);

                    // initialize our state
                    int n    = c_initialN;
                    int bias = c_initialBias;
                    int i    = 0;

                    int w, k;

                    // no Supplementary characters yet
                    int numSurrogatePairs = 0;

                    // Main loop, read rest of ascii
                    while (asciiIndex < iNextDot)
                    {
                        /* Decode a generalized variable-length integer into delta,  */
                        /* which gets added to i.  The overflow checking is easier   */
                        /* if we increase i as we go, then subtract off its starting */
                        /* value at the end to obtain delta.                         */
                        int oldi = i;

                        for (w = 1, k = c_punycodeBase;  ; k += c_punycodeBase)
                        {
                            // Check to make sure we aren't overrunning our ascii string
                            if (asciiIndex >= iNextDot)
                            {
                                throw new ArgumentException(SR.Argument_IdnBadPunycode, nameof(ascii));
                            }

                            // decode the digit from the next char
                            int digit = DecodeDigit(ascii[asciiIndex++]);

                            Debug.Assert(w > 0, "[IdnMapping.punycode_decode]Expected w > 0");
                            if (digit > (c_maxint - i) / w)
                            {
                                throw new ArgumentException(SR.Argument_IdnBadPunycode, nameof(ascii));
                            }

                            i += (int)(digit * w);
                            int t = k <= bias ? c_tmin : k >= bias + c_tmax ? c_tmax : k - bias;
                            if (digit < t)
                            {
                                break;
                            }
                            Debug.Assert(c_punycodeBase != t, "[IdnMapping.punycode_decode]Expected t != c_punycodeBase (36)");
                            if (w > c_maxint / (c_punycodeBase - t))
                            {
                                throw new ArgumentException(SR.Argument_IdnBadPunycode, nameof(ascii));
                            }
                            w *= (c_punycodeBase - t);
                        }

                        bias = Adapt(i - oldi, (output.Length - iOutputAfterLastDot - numSurrogatePairs) + 1, oldi == 0);

                        /* i was supposed to wrap around from output.Length to 0,   */
                        /* incrementing n each time, so we'll fix that now: */
                        Debug.Assert((output.Length - iOutputAfterLastDot - numSurrogatePairs) + 1 > 0,
                                     "[IdnMapping.punycode_decode]Expected to have added > 0 characters this segment");
                        if (i / ((output.Length - iOutputAfterLastDot - numSurrogatePairs) + 1) > c_maxint - n)
                        {
                            throw new ArgumentException(SR.Argument_IdnBadPunycode, nameof(ascii));
                        }
                        n += (int)(i / (output.Length - iOutputAfterLastDot - numSurrogatePairs + 1));
                        i %= (output.Length - iOutputAfterLastDot - numSurrogatePairs + 1);

                        // Make sure n is legal
                        if ((n < 0 || n > 0x10ffff) || (n >= 0xD800 && n <= 0xDFFF))
                        {
                            throw new ArgumentException(SR.Argument_IdnBadPunycode, nameof(ascii));
                        }

                        // insert n at position i of the output:  Really tricky if we have surrogates
                        int    iUseInsertLocation;
                        String strTemp = Char.ConvertFromUtf32(n);

                        // If we have supplimentary characters
                        if (numSurrogatePairs > 0)
                        {
                            // Hard way, we have supplimentary characters
                            int iCount;
                            for (iCount = i, iUseInsertLocation = iOutputAfterLastDot; iCount > 0; iCount--, iUseInsertLocation++)
                            {
                                // If its a surrogate, we have to go one more
                                if (iUseInsertLocation >= output.Length)
                                {
                                    throw new ArgumentException(SR.Argument_IdnBadPunycode, nameof(ascii));
                                }
                                if (Char.IsSurrogate(output[iUseInsertLocation]))
                                {
                                    iUseInsertLocation++;
                                }
                            }
                        }
                        else
                        {
                            // No Supplementary chars yet, just add i
                            iUseInsertLocation = iOutputAfterLastDot + i;
                        }

                        // Insert it
                        output.Insert(iUseInsertLocation, strTemp);

                        // If it was a surrogate increment our counter
                        if (IsSupplementary(n))
                        {
                            numSurrogatePairs++;
                        }

                        // Index gets updated
                        i++;
                    }

                    // Do BIDI testing
                    bool bRightToLeft = false;

                    // Check for RTL.  If right-to-left, then 1st & last chars must be RTL
                    BidiCategory eBidi = CharUnicodeInfo.GetBidiCategory(output.ToString(), iOutputAfterLastDot);
                    if (eBidi == BidiCategory.RightToLeft || eBidi == BidiCategory.RightToLeftArabic)
                    {
                        // It has to be right to left.
                        bRightToLeft = true;
                    }

                    // Check the rest of them to make sure RTL/LTR is consistent
                    for (int iTest = iOutputAfterLastDot; iTest < output.Length; iTest++)
                    {
                        // This might happen if we run into a pair
                        if (Char.IsLowSurrogate(output.ToString(), iTest))
                        {
                            continue;
                        }

                        // Check to see if its LTR
                        eBidi = CharUnicodeInfo.GetBidiCategory(output.ToString(), iTest);
                        if ((bRightToLeft && eBidi == BidiCategory.LeftToRight) ||
                            (!bRightToLeft && (eBidi == BidiCategory.RightToLeft || eBidi == BidiCategory.RightToLeftArabic)))
                        {
                            throw new ArgumentException(SR.Argument_IdnBadBidi, nameof(ascii));
                        }
                    }

                    // Its also a requirement that the last one be RTL if 1st is RTL
                    if (bRightToLeft && eBidi != BidiCategory.RightToLeft && eBidi != BidiCategory.RightToLeftArabic)
                    {
                        // Oops, last wasn't RTL, last should be RTL if first is RTL
                        throw new ArgumentException(SR.Argument_IdnBadBidi, nameof(ascii));
                    }
                }

                // See if this label was too long
                if (iNextDot - iAfterLastDot > c_labelLimit)
                {
                    throw new ArgumentException(SR.Argument_IdnBadLabelSize, nameof(ascii));
                }

                // Done with this segment, add dot if necessary
                if (iNextDot != ascii.Length)
                {
                    output.Append('.');
                }

                iAfterLastDot       = iNextDot + 1;
                iOutputAfterLastDot = output.Length;
            }

            // Throw if we're too long
            if (output.Length > c_defaultNameLimit - (IsDot(output[output.Length - 1]) ? 0 : 1))
            {
                throw new ArgumentException(SR.Format(SR.Argument_IdnBadNameSize, c_defaultNameLimit - (IsDot(output[output.Length - 1]) ? 0 : 1)), nameof(ascii));
            }

            // Return our output string
            return(output.ToString());
        }

All Usage Examples Of System.Globalization.CharUnicodeInfo::GetBidiCategory

CharUnicodeInfo

GetBidiCategory

GetDecimalDigitValue

GetDigitValue

GetNumericValue

GetUnicodeCategory

InternalConvertToUtf32

InternalGetCategoryValue

InternalGetDecimalDigitValue

InternalGetDigitValue

InternalGetDigitValues

InternalGetNumericValue

InternalGetUnicodeCategory

IsCombiningCategory

IsWhiteSpace

nativeInitTable