internal SyntaxToken ScanXmlContent()
{
int Here = 0;
bool IsAllWhitespace = true;
// lets do a funky peek-behind to make sure we are not restarting after a non-Ws char.
if (_lineBufferOffset > 0)
{
var prevChar = PeekAheadChar(-1);
if (prevChar != '>' && !XmlCharType.IsWhiteSpace(prevChar))
{
IsAllWhitespace = false;
}
}
var scratch = GetScratch();
while (CanGetCharAtOffset(Here))
{
char c = PeekAheadChar(Here);
switch (c)
{
case UCH_CR:
case UCH_LF:
Here = SkipLineBreak(c, Here);
scratch.Append(UCH_LF);
break;
case ' ':
case UCH_TAB:
scratch.Append(c);
Here += 1;
break;
case '&':
if (Here != 0)
{
return XmlMakeTextLiteralToken(null, Here, scratch);
}
return ScanXmlReference(null);
case '<':
SyntaxList<SyntaxNode> precedingTrivia = null;
if (Here != 0)
{
if (!IsAllWhitespace)
{
return XmlMakeTextLiteralToken(null, Here, scratch);
}
else
{
scratch.Clear(); // will not use this
Here = 0; // consumed chars.
precedingTrivia = ScanXmlTrivia(PeekChar());
}
}
Debug.Assert(Here == 0);
if (CanGetCharAtOffset(1))
{
char ch = PeekAheadChar(1);
switch (ch)
{
case '!':
if (CanGetCharAtOffset(2))
{
switch ((PeekAheadChar(2)))
{
case '-':
if (CanGetCharAtOffset(3) && PeekAheadChar(3) == '-')
{
return XmlMakeBeginCommentToken(precedingTrivia, _scanNoTriviaFunc);
}
break;
case '[':
if (CanGetCharAtOffset(8) &&
PeekAheadChar(3) == 'C' &&
PeekAheadChar(4) == 'D' &&
PeekAheadChar(5) == 'A' &&
PeekAheadChar(6) == 'T' &&
PeekAheadChar(7) == 'A' &&
PeekAheadChar(8) == '[')
{
return XmlMakeBeginCDataToken(precedingTrivia, _scanNoTriviaFunc);
}
break;
case 'D':
if (CanGetCharAtOffset(8) &&
PeekAheadChar(3) == 'O' &&
PeekAheadChar(4) == 'C' &&
PeekAheadChar(5) == 'T' &&
PeekAheadChar(6) == 'Y' &&
PeekAheadChar(7) == 'P' &&
PeekAheadChar(8) == 'E')
{
return XmlMakeBeginDTDToken(precedingTrivia);
}
break;
}
}
break;
case '?':
return XmlMakeBeginProcessingInstructionToken(precedingTrivia, _scanNoTriviaFunc);
case '/':
return XmlMakeBeginEndElementToken(precedingTrivia, _scanNoTriviaFunc);
}
}
return XmlMakeLessToken(precedingTrivia);
case ']':
if (CanGetCharAtOffset(Here + 2) && PeekAheadChar(Here + 1) == ']' && PeekAheadChar(Here + 2) == '>')
{
// // If valid characters found then return them.
if (Here != 0)
{
return XmlMakeTextLiteralToken(null, Here, scratch);
}
return XmlMakeTextLiteralToken(null, 3, ERRID.ERR_XmlEndCDataNotAllowedInContent);
}
goto ScanChars;
case '#':
// // Even though # is valid in content, abort xml scanning if the m_State shows and error
// // and the line begins with NL WS* # WS* KW
//TODO: error recovery - how can we do ths?
//If m_State.m_IsXmlError Then
// MakeXmlCharToken(tokens.tkXmlCharData, Here - m_InputStreamPosition, IsAllWhitespace)
// m_InputStreamPosition = Here
// Dim sharp As Token = MakeToken(tokens.tkSharp, 1)
// m_InputStreamPosition += 1
// While (m_InputStream(m_InputStreamPosition) = " "c OrElse m_InputStream(m_InputStreamPosition) = UCH_TAB)
// m_InputStreamPosition += 1
// End While
// ScanXmlQName()
// Dim restart As Token = CheckXmlForStatement()
// If restart IsNot Nothing Then
// ' // Abort Xml - Found Keyword space at the beginning of the line
// AbandonTokens(restart)
// m_State.Init(LexicalState.VB)
// MakeToken(tokens.tkXmlAbort, 0)
// Return
// End If
// AbandonTokens(sharp)
// Here = m_InputStreamPosition
//End If
goto ScanChars;
case '%':
//TODO: error recovery. We cannot do this.
//If there is all whitespace after ">", it will be scanned as insignificant,
//but in this case it is significant.
//Also as far as I can see Dev10 does not resync on "%>" text anyways.
//' // Even though %> is valid in pcdata. When inside of an embedded expression
//' // return this sequence separately so that the xml literal completion code can
//' // easily detect the end of an embedded expression that may be temporarily hidden
//' // by a new element. i.e. <%= <a> %>
//If CanGetCharAtOffset(Here + 1) AndAlso _
// PeekAheadChar(Here + 1) = ">"c Then
// ' // If valid characters found then return them.
// If Here <> 0 Then
// Return XmlMakeCharDataToken(Nothing, Here, New String(value.ToArray))
// End If
// ' // Create a special pcdata token for the possible tkEndXmlEmbedded
// Return XmlMakeCharDataToken(Nothing, 2, "%>")
//Else
// IsAllWhitespace = False
// value.Add("%"c)
// Here += 1
//End If
//Continue While
goto ScanChars;
default:
ScanChars:
;
// // Check characters are valid
IsAllWhitespace = false;
var xmlCh = ScanXmlChar(Here);
if (xmlCh.Length == 0)
{
// bad char
if (Here > 0)
{
return XmlMakeTextLiteralToken(null, Here, scratch);
}
else
{
return XmlMakeBadToken(null, 1, ERRID.ERR_IllegalChar);
}
}
xmlCh.AppendTo(scratch);
Here += xmlCh.Length;
break;
}
}
// no more chars
if (Here > 0)
{
return XmlMakeTextLiteralToken(null, Here, scratch);
}
else
{
return MakeEofToken();
}
}