System.Uri.ParseRemaining C# (CSharp) Method

Uri Class Documentation Show file Open project: dotnet/corefx
ParseRemaining() private method

private ParseRemaining ( ) : void
return	void
        private unsafe void ParseRemaining()
        {
            // ensure we parsed up to the path
            EnsureUriInfo();

            Flags cF = Flags.Zero;

            if (UserDrivenParsing)
                goto Done;

            // Do we have to continue building Iri'zed string from original string
            bool buildIriStringFromPath = _iriParsing && ((_flags & Flags.HasUnicode) != 0) && ((_flags & Flags.RestUnicodeNormalized) == 0);

            ushort origIdx;     // stores index to switched original string
            ushort idx = _info.Offset.Scheme;
            ushort length = (ushort)_string.Length;
            Check result = Check.None;
            UriSyntaxFlags syntaxFlags = _syntax.Flags;    // perf

            // m_Info.Offset values may be parsed twice but we lock only on m_Flags update.

            fixed (char* str = _string)
            {
                // Cut trailing spaces in m_String
                if (length > idx && UriHelper.IsLWS(str[length - 1]))
                {
                    --length;
                    while (length != idx && UriHelper.IsLWS(str[--length]))
                        ;
                    ++length;
                }

                if (IsImplicitFile)
                {
                    cF |= Flags.SchemeNotCanonical;
                }
                else
                {
                    ushort i = 0;
                    ushort syntaxLength = (ushort)_syntax.SchemeName.Length;
                    for (; i < syntaxLength; ++i)
                    {
                        if (_syntax.SchemeName[i] != str[idx + i])
                            cF |= Flags.SchemeNotCanonical;
                    }
                    // For an authority Uri only // after the scheme would be canonical
                    // (for compatibility with: http:\\host)
                    if (((_flags & Flags.AuthorityFound) != 0) && (idx + i + 3 >= length || str[idx + i + 1] != '/' ||
                        str[idx + i + 2] != '/'))
                    {
                        cF |= Flags.SchemeNotCanonical;
                    }
                }


                //Check the form of the user info
                if ((_flags & Flags.HasUserInfo) != 0)
                {
                    idx = _info.Offset.User;
                    result = CheckCanonical(str, ref idx, _info.Offset.Host, '@');
                    if ((result & Check.DisplayCanonical) == 0)
                    {
                        cF |= Flags.UserNotCanonical;
                    }
                    if ((result & (Check.EscapedCanonical | Check.BackslashInPath)) != Check.EscapedCanonical)
                    {
                        cF |= Flags.E_UserNotCanonical;
                    }
                    if (_iriParsing && ((result & (Check.DisplayCanonical | Check.EscapedCanonical | Check.BackslashInPath
                                                    | Check.FoundNonAscii | Check.NotIriCanonical))
                                                    == (Check.DisplayCanonical | Check.FoundNonAscii)))
                    {
                        cF |= Flags.UserIriCanonical;
                    }
                }
            }
            //
            // Delay canonical Host checking to avoid creation of a host string
            // Will do that on demand.
            //


            //
            //We have already checked on the port in EnsureUriInfo() that calls CreateUriInfo
            //

            //
            // Parsing the Path if any
            //

            // For iri parsing if we found unicode the idx has offset into m_orig string..
            // so restart parsing from there and make m_Info.Offset.Path as m_string.length

            idx = _info.Offset.Path;
            origIdx = _info.Offset.Path;

            //Some uris do not have a query
            //    When '?' is passed as delimiter, then it's special case
            //    so both '?' and '#' will work as delimiters
            if (buildIriStringFromPath)
            {
                // Dos paths have no host.  Other schemes cleared/set m_String with host information in PrivateParseMinimal.
                if (IsDosPath)
                {
                    if (IsImplicitFile)
                    {
                        _string = string.Empty;
                    }
                    else
                    {
                        _string = _syntax.SchemeName + SchemeDelimiter;
                    }
                }
                
                // If host is absent, uri is abnormal and relative as in RFC 3986 section 5.4.2
                if (_info.Offset.Host == _info.Offset.Path)
                {
                    _string = _syntax.SchemeName + ":";
                }

                _info.Offset.Path = (ushort)_string.Length;
                idx = _info.Offset.Path;

                ushort offset = origIdx;
                if (IsImplicitFile || ((syntaxFlags & (UriSyntaxFlags.MayHaveQuery | UriSyntaxFlags.MayHaveFragment)) == 0))
                {
                    FindEndOfComponent(_originalUnicodeString, ref origIdx, (ushort)_originalUnicodeString.Length, c_DummyChar);
                }
                else
                {
                    FindEndOfComponent(_originalUnicodeString, ref origIdx, (ushort)_originalUnicodeString.Length,
                   (_syntax.InFact(UriSyntaxFlags.MayHaveQuery) ? '?' : _syntax.InFact(UriSyntaxFlags.MayHaveFragment) ? '#' : c_EOL));
                }

                // Correctly escape unescape
                string escapedPath = EscapeUnescapeIri(_originalUnicodeString, offset, origIdx, UriComponents.Path);

                // Normalize path
                try
                {
                    _string += escapedPath;
                }
                catch (ArgumentException)
                {
                    UriFormatException e = GetException(ParsingError.BadFormat);
                    throw e;
                }

                length = (ushort)_string.Length;
            }

            fixed (char* str = _string)
            {
                if (IsImplicitFile || ((syntaxFlags & (UriSyntaxFlags.MayHaveQuery | UriSyntaxFlags.MayHaveFragment)) == 0))
                {
                    result = CheckCanonical(str, ref idx, length, c_DummyChar);
                }
                else
                {
                    result = CheckCanonical(str, ref idx, length, (((syntaxFlags & UriSyntaxFlags.MayHaveQuery) != 0)
                        ? '?' : _syntax.InFact(UriSyntaxFlags.MayHaveFragment) ? '#' : c_EOL));
                }

                // ATTN:
                // This may render problems for unknown schemes, but in general for an authority based Uri
                // (that has slashes) a path should start with "/"
                // This becomes more interesting knowing how a file uri is used in "file://c:/path"
                // It will be converted to file:///c:/path
                //
                // However, even more interesting is that vsmacros://c:\path will not add the third slash in the _canoical_ case
                //
                // We use special syntax flag to check if the path is rooted, i.e. has a first slash
                //
                if (((_flags & Flags.AuthorityFound) != 0) && ((syntaxFlags & UriSyntaxFlags.PathIsRooted) != 0)
                    && (_info.Offset.Path == length || (str[_info.Offset.Path] != '/' && str[_info.Offset.Path] != '\\')))
                {
                    cF |= Flags.FirstSlashAbsent;
                }
            }
            // Check the need for compression or backslashes conversion
            // we included IsDosPath since it may come with other than FILE uri, for ex. scheme://C:\path
            // (This is very unfortunate that the original design has included that feature)
            bool nonCanonical = false;
            if (IsDosPath || (((_flags & Flags.AuthorityFound) != 0) &&
                (((syntaxFlags & (UriSyntaxFlags.CompressPath | UriSyntaxFlags.ConvertPathSlashes)) != 0) ||
                _syntax.InFact(UriSyntaxFlags.UnEscapeDotsAndSlashes))))
            {
                if (((result & Check.DotSlashEscaped) != 0) && _syntax.InFact(UriSyntaxFlags.UnEscapeDotsAndSlashes))
                {
                    cF |= (Flags.E_PathNotCanonical | Flags.PathNotCanonical);
                    nonCanonical = true;
                }

                if (((syntaxFlags & (UriSyntaxFlags.ConvertPathSlashes)) != 0) && (result & Check.BackslashInPath) != 0)
                {
                    cF |= (Flags.E_PathNotCanonical | Flags.PathNotCanonical);
                    nonCanonical = true;
                }

                if (((syntaxFlags & (UriSyntaxFlags.CompressPath)) != 0) && ((cF & Flags.E_PathNotCanonical) != 0 ||
                    (result & Check.DotSlashAttn) != 0))
                {
                    cF |= Flags.ShouldBeCompressed;
                }

                if ((result & Check.BackslashInPath) != 0)
                    cF |= Flags.BackslashInPath;
            }
            else if ((result & Check.BackslashInPath) != 0)
            {
                // for a "generic" path '\' should be escaped
                cF |= Flags.E_PathNotCanonical;
                nonCanonical = true;
            }

            if ((result & Check.DisplayCanonical) == 0)
            {
                // For implicit file the user string is usually in perfect display format,
                // Hence, ignoring complains from CheckCanonical()
                // V1 compat. In fact we should simply ignore dontEscape parameter for Implicit file.
                // Currently we don't.
                if (((_flags & Flags.ImplicitFile) == 0) || ((_flags & Flags.UserEscaped) != 0) ||
                    (result & Check.ReservedFound) != 0)
                {
                    //means it's found as escaped or has unescaped Reserved Characters
                    cF |= Flags.PathNotCanonical;
                    nonCanonical = true;
                }
            }

            if (((_flags & Flags.ImplicitFile) != 0) && (result & (Check.ReservedFound | Check.EscapedCanonical)) != 0)
            {
                // need to escape reserved chars or re-escape '%' if an "escaped sequence" was found
                result &= ~Check.EscapedCanonical;
            }

            if ((result & Check.EscapedCanonical) == 0)
            {
                //means it's found as not completely escaped
                cF |= Flags.E_PathNotCanonical;
            }

            if (_iriParsing && !nonCanonical & ((result & (Check.DisplayCanonical | Check.EscapedCanonical
                            | Check.FoundNonAscii | Check.NotIriCanonical))
                            == (Check.DisplayCanonical | Check.FoundNonAscii)))
            {
                cF |= Flags.PathIriCanonical;
            }

            //
            //Now we've got to parse the Query if any. Note that Query requires the presence of '?'
            //
            if (buildIriStringFromPath)
            {
                ushort offset = origIdx;

                if (origIdx < _originalUnicodeString.Length && _originalUnicodeString[origIdx] == '?')
                {
                    ++origIdx; // This is to exclude first '?' character from checking
                    FindEndOfComponent(_originalUnicodeString, ref origIdx, (ushort)_originalUnicodeString.Length, ((syntaxFlags & (UriSyntaxFlags.MayHaveFragment)) != 0) ? '#' : c_EOL);

                    // Correctly escape unescape
                    string escapedPath = EscapeUnescapeIri(_originalUnicodeString, offset, origIdx, UriComponents.Query);

                    // Normalize path
                    try
                    {
                        _string += escapedPath;
                    }
                    catch (ArgumentException)
                    {
                        UriFormatException e = GetException(ParsingError.BadFormat);
                        throw e;
                    }

                    length = (ushort)_string.Length;
                }
            }

            _info.Offset.Query = idx;

            fixed (char* str = _string)
            {
                if (idx < length && str[idx] == '?')
                {
                    ++idx; // This is to exclude first '?' character from checking
                    result = CheckCanonical(str, ref idx, length, ((syntaxFlags & (UriSyntaxFlags.MayHaveFragment)) != 0)
                        ? '#' : c_EOL);
                    if ((result & Check.DisplayCanonical) == 0)
                    {
                        cF |= Flags.QueryNotCanonical;
                    }

                    if ((result & (Check.EscapedCanonical | Check.BackslashInPath)) != Check.EscapedCanonical)
                    {
                        cF |= Flags.E_QueryNotCanonical;
                    }

                    if (_iriParsing && ((result & (Check.DisplayCanonical | Check.EscapedCanonical | Check.BackslashInPath
                                | Check.FoundNonAscii | Check.NotIriCanonical))
                                == (Check.DisplayCanonical | Check.FoundNonAscii)))
                    {
                        cF |= Flags.QueryIriCanonical;
                    }
                }
            }
            //
            //Now we've got to parse the Fragment if any. Note that Fragment requires the presence of '#'
            //
            if (buildIriStringFromPath)
            {
                ushort offset = origIdx;

                if (origIdx < _originalUnicodeString.Length && _originalUnicodeString[origIdx] == '#')
                {
                    ++origIdx; // This is to exclude first '#' character from checking
                    FindEndOfComponent(_originalUnicodeString, ref origIdx, (ushort)_originalUnicodeString.Length, c_EOL);

                    // Correctly escape unescape
                    string escapedPath = EscapeUnescapeIri(_originalUnicodeString, offset, origIdx, UriComponents.Fragment);

                    // Normalize path
                    try
                    {
                        _string += escapedPath;
                    }
                    catch (ArgumentException)
                    {
                        UriFormatException e = GetException(ParsingError.BadFormat);
                        throw e;
                    }

                    length = (ushort)_string.Length;
                }
            }

            _info.Offset.Fragment = idx;

            fixed (char* str = _string)
            {
                if (idx < length && str[idx] == '#')
                {
                    ++idx; // This is to exclude first '#' character from checking
                    //We don't using c_DummyChar since want to allow '?' and '#' as unescaped
                    result = CheckCanonical(str, ref idx, length, c_EOL);
                    if ((result & Check.DisplayCanonical) == 0)
                    {
                        cF |= Flags.FragmentNotCanonical;
                    }

                    if ((result & (Check.EscapedCanonical | Check.BackslashInPath)) != Check.EscapedCanonical)
                    {
                        cF |= Flags.E_FragmentNotCanonical;
                    }

                    if (_iriParsing && ((result & (Check.DisplayCanonical | Check.EscapedCanonical | Check.BackslashInPath
                                | Check.FoundNonAscii | Check.NotIriCanonical))
                                == (Check.DisplayCanonical | Check.FoundNonAscii)))
                    {
                        cF |= Flags.FragmentIriCanonical;
                    }
                }
            }
            _info.Offset.End = idx;
        Done:

            cF |= Flags.AllUriInfoSet;
            lock (_info)
            {
                _flags |= cF;
            }
            _flags |= Flags.RestUnicodeNormalized;
        }
Uri
AllowIdnStatic
CalculateCaseInsensitiveHashCode
Canonicalize
CheckAuthorityHelper
CheckAuthorityHelperHandleAnyHostIri
CheckAuthorityHelperHandleDnsIri
CheckCanonical
CheckForColonInFirstPathSegment
CheckForEscapedUnreserved
CheckForUnicode
CheckHostName