private unsafe void ParseRemaining()
{
// ensure we parsed up to the path
EnsureUriInfo();
Flags cF = Flags.Zero;
if (UserDrivenParsing)
goto Done;
// Do we have to continue building Iri'zed string from original string
bool buildIriStringFromPath = _iriParsing && ((_flags & Flags.HasUnicode) != 0) && ((_flags & Flags.RestUnicodeNormalized) == 0);
ushort origIdx; // stores index to switched original string
ushort idx = _info.Offset.Scheme;
ushort length = (ushort)_string.Length;
Check result = Check.None;
UriSyntaxFlags syntaxFlags = _syntax.Flags; // perf
// m_Info.Offset values may be parsed twice but we lock only on m_Flags update.
fixed (char* str = _string)
{
// Cut trailing spaces in m_String
if (length > idx && UriHelper.IsLWS(str[length - 1]))
{
--length;
while (length != idx && UriHelper.IsLWS(str[--length]))
;
++length;
}
if (IsImplicitFile)
{
cF |= Flags.SchemeNotCanonical;
}
else
{
ushort i = 0;
ushort syntaxLength = (ushort)_syntax.SchemeName.Length;
for (; i < syntaxLength; ++i)
{
if (_syntax.SchemeName[i] != str[idx + i])
cF |= Flags.SchemeNotCanonical;
}
// For an authority Uri only // after the scheme would be canonical
// (for compatibility with: http:\\host)
if (((_flags & Flags.AuthorityFound) != 0) && (idx + i + 3 >= length || str[idx + i + 1] != '/' ||
str[idx + i + 2] != '/'))
{
cF |= Flags.SchemeNotCanonical;
}
}
//Check the form of the user info
if ((_flags & Flags.HasUserInfo) != 0)
{
idx = _info.Offset.User;
result = CheckCanonical(str, ref idx, _info.Offset.Host, '@');
if ((result & Check.DisplayCanonical) == 0)
{
cF |= Flags.UserNotCanonical;
}
if ((result & (Check.EscapedCanonical | Check.BackslashInPath)) != Check.EscapedCanonical)
{
cF |= Flags.E_UserNotCanonical;
}
if (_iriParsing && ((result & (Check.DisplayCanonical | Check.EscapedCanonical | Check.BackslashInPath
| Check.FoundNonAscii | Check.NotIriCanonical))
== (Check.DisplayCanonical | Check.FoundNonAscii)))
{
cF |= Flags.UserIriCanonical;
}
}
}
//
// Delay canonical Host checking to avoid creation of a host string
// Will do that on demand.
//
//
//We have already checked on the port in EnsureUriInfo() that calls CreateUriInfo
//
//
// Parsing the Path if any
//
// For iri parsing if we found unicode the idx has offset into m_orig string..
// so restart parsing from there and make m_Info.Offset.Path as m_string.length
idx = _info.Offset.Path;
origIdx = _info.Offset.Path;
//Some uris do not have a query
// When '?' is passed as delimiter, then it's special case
// so both '?' and '#' will work as delimiters
if (buildIriStringFromPath)
{
// Dos paths have no host. Other schemes cleared/set m_String with host information in PrivateParseMinimal.
if (IsDosPath)
{
if (IsImplicitFile)
{
_string = string.Empty;
}
else
{
_string = _syntax.SchemeName + SchemeDelimiter;
}
}
// If host is absent, uri is abnormal and relative as in RFC 3986 section 5.4.2
if (_info.Offset.Host == _info.Offset.Path)
{
_string = _syntax.SchemeName + ":";
}
_info.Offset.Path = (ushort)_string.Length;
idx = _info.Offset.Path;
ushort offset = origIdx;
if (IsImplicitFile || ((syntaxFlags & (UriSyntaxFlags.MayHaveQuery | UriSyntaxFlags.MayHaveFragment)) == 0))
{
FindEndOfComponent(_originalUnicodeString, ref origIdx, (ushort)_originalUnicodeString.Length, c_DummyChar);
}
else
{
FindEndOfComponent(_originalUnicodeString, ref origIdx, (ushort)_originalUnicodeString.Length,
(_syntax.InFact(UriSyntaxFlags.MayHaveQuery) ? '?' : _syntax.InFact(UriSyntaxFlags.MayHaveFragment) ? '#' : c_EOL));
}
// Correctly escape unescape
string escapedPath = EscapeUnescapeIri(_originalUnicodeString, offset, origIdx, UriComponents.Path);
// Normalize path
try
{
_string += escapedPath;
}
catch (ArgumentException)
{
UriFormatException e = GetException(ParsingError.BadFormat);
throw e;
}
length = (ushort)_string.Length;
}
fixed (char* str = _string)
{
if (IsImplicitFile || ((syntaxFlags & (UriSyntaxFlags.MayHaveQuery | UriSyntaxFlags.MayHaveFragment)) == 0))
{
result = CheckCanonical(str, ref idx, length, c_DummyChar);
}
else
{
result = CheckCanonical(str, ref idx, length, (((syntaxFlags & UriSyntaxFlags.MayHaveQuery) != 0)
? '?' : _syntax.InFact(UriSyntaxFlags.MayHaveFragment) ? '#' : c_EOL));
}
// ATTN:
// This may render problems for unknown schemes, but in general for an authority based Uri
// (that has slashes) a path should start with "/"
// This becomes more interesting knowing how a file uri is used in "file://c:/path"
// It will be converted to file:///c:/path
//
// However, even more interesting is that vsmacros://c:\path will not add the third slash in the _canoical_ case
//
// We use special syntax flag to check if the path is rooted, i.e. has a first slash
//
if (((_flags & Flags.AuthorityFound) != 0) && ((syntaxFlags & UriSyntaxFlags.PathIsRooted) != 0)
&& (_info.Offset.Path == length || (str[_info.Offset.Path] != '/' && str[_info.Offset.Path] != '\\')))
{
cF |= Flags.FirstSlashAbsent;
}
}
// Check the need for compression or backslashes conversion
// we included IsDosPath since it may come with other than FILE uri, for ex. scheme://C:\path
// (This is very unfortunate that the original design has included that feature)
bool nonCanonical = false;
if (IsDosPath || (((_flags & Flags.AuthorityFound) != 0) &&
(((syntaxFlags & (UriSyntaxFlags.CompressPath | UriSyntaxFlags.ConvertPathSlashes)) != 0) ||
_syntax.InFact(UriSyntaxFlags.UnEscapeDotsAndSlashes))))
{
if (((result & Check.DotSlashEscaped) != 0) && _syntax.InFact(UriSyntaxFlags.UnEscapeDotsAndSlashes))
{
cF |= (Flags.E_PathNotCanonical | Flags.PathNotCanonical);
nonCanonical = true;
}
if (((syntaxFlags & (UriSyntaxFlags.ConvertPathSlashes)) != 0) && (result & Check.BackslashInPath) != 0)
{
cF |= (Flags.E_PathNotCanonical | Flags.PathNotCanonical);
nonCanonical = true;
}
if (((syntaxFlags & (UriSyntaxFlags.CompressPath)) != 0) && ((cF & Flags.E_PathNotCanonical) != 0 ||
(result & Check.DotSlashAttn) != 0))
{
cF |= Flags.ShouldBeCompressed;
}
if ((result & Check.BackslashInPath) != 0)
cF |= Flags.BackslashInPath;
}
else if ((result & Check.BackslashInPath) != 0)
{
// for a "generic" path '\' should be escaped
cF |= Flags.E_PathNotCanonical;
nonCanonical = true;
}
if ((result & Check.DisplayCanonical) == 0)
{
// For implicit file the user string is usually in perfect display format,
// Hence, ignoring complains from CheckCanonical()
// V1 compat. In fact we should simply ignore dontEscape parameter for Implicit file.
// Currently we don't.
if (((_flags & Flags.ImplicitFile) == 0) || ((_flags & Flags.UserEscaped) != 0) ||
(result & Check.ReservedFound) != 0)
{
//means it's found as escaped or has unescaped Reserved Characters
cF |= Flags.PathNotCanonical;
nonCanonical = true;
}
}
if (((_flags & Flags.ImplicitFile) != 0) && (result & (Check.ReservedFound | Check.EscapedCanonical)) != 0)
{
// need to escape reserved chars or re-escape '%' if an "escaped sequence" was found
result &= ~Check.EscapedCanonical;
}
if ((result & Check.EscapedCanonical) == 0)
{
//means it's found as not completely escaped
cF |= Flags.E_PathNotCanonical;
}
if (_iriParsing && !nonCanonical & ((result & (Check.DisplayCanonical | Check.EscapedCanonical
| Check.FoundNonAscii | Check.NotIriCanonical))
== (Check.DisplayCanonical | Check.FoundNonAscii)))
{
cF |= Flags.PathIriCanonical;
}
//
//Now we've got to parse the Query if any. Note that Query requires the presence of '?'
//
if (buildIriStringFromPath)
{
ushort offset = origIdx;
if (origIdx < _originalUnicodeString.Length && _originalUnicodeString[origIdx] == '?')
{
++origIdx; // This is to exclude first '?' character from checking
FindEndOfComponent(_originalUnicodeString, ref origIdx, (ushort)_originalUnicodeString.Length, ((syntaxFlags & (UriSyntaxFlags.MayHaveFragment)) != 0) ? '#' : c_EOL);
// Correctly escape unescape
string escapedPath = EscapeUnescapeIri(_originalUnicodeString, offset, origIdx, UriComponents.Query);
// Normalize path
try
{
_string += escapedPath;
}
catch (ArgumentException)
{
UriFormatException e = GetException(ParsingError.BadFormat);
throw e;
}
length = (ushort)_string.Length;
}
}
_info.Offset.Query = idx;
fixed (char* str = _string)
{
if (idx < length && str[idx] == '?')
{
++idx; // This is to exclude first '?' character from checking
result = CheckCanonical(str, ref idx, length, ((syntaxFlags & (UriSyntaxFlags.MayHaveFragment)) != 0)
? '#' : c_EOL);
if ((result & Check.DisplayCanonical) == 0)
{
cF |= Flags.QueryNotCanonical;
}
if ((result & (Check.EscapedCanonical | Check.BackslashInPath)) != Check.EscapedCanonical)
{
cF |= Flags.E_QueryNotCanonical;
}
if (_iriParsing && ((result & (Check.DisplayCanonical | Check.EscapedCanonical | Check.BackslashInPath
| Check.FoundNonAscii | Check.NotIriCanonical))
== (Check.DisplayCanonical | Check.FoundNonAscii)))
{
cF |= Flags.QueryIriCanonical;
}
}
}
//
//Now we've got to parse the Fragment if any. Note that Fragment requires the presence of '#'
//
if (buildIriStringFromPath)
{
ushort offset = origIdx;
if (origIdx < _originalUnicodeString.Length && _originalUnicodeString[origIdx] == '#')
{
++origIdx; // This is to exclude first '#' character from checking
FindEndOfComponent(_originalUnicodeString, ref origIdx, (ushort)_originalUnicodeString.Length, c_EOL);
// Correctly escape unescape
string escapedPath = EscapeUnescapeIri(_originalUnicodeString, offset, origIdx, UriComponents.Fragment);
// Normalize path
try
{
_string += escapedPath;
}
catch (ArgumentException)
{
UriFormatException e = GetException(ParsingError.BadFormat);
throw e;
}
length = (ushort)_string.Length;
}
}
_info.Offset.Fragment = idx;
fixed (char* str = _string)
{
if (idx < length && str[idx] == '#')
{
++idx; // This is to exclude first '#' character from checking
//We don't using c_DummyChar since want to allow '?' and '#' as unescaped
result = CheckCanonical(str, ref idx, length, c_EOL);
if ((result & Check.DisplayCanonical) == 0)
{
cF |= Flags.FragmentNotCanonical;
}
if ((result & (Check.EscapedCanonical | Check.BackslashInPath)) != Check.EscapedCanonical)
{
cF |= Flags.E_FragmentNotCanonical;
}
if (_iriParsing && ((result & (Check.DisplayCanonical | Check.EscapedCanonical | Check.BackslashInPath
| Check.FoundNonAscii | Check.NotIriCanonical))
== (Check.DisplayCanonical | Check.FoundNonAscii)))
{
cF |= Flags.FragmentIriCanonical;
}
}
}
_info.Offset.End = idx;
Done:
cF |= Flags.AllUriInfoSet;
lock (_info)
{
_flags |= cF;
}
_flags |= Flags.RestUnicodeNormalized;
}