public void Parse(IReader reader, IHandler handler)
{
if (reader == null) throw new ArgumentNullException("reader");
if (handler == null) handler = new HandlerAdapter();
AttrListImpl attrList = new AttrListImpl();
string lastAttrName = null;
Stack tagStack = new Stack();
string elementName = null;
line = 1;
col = 0;
int currCh = 0;
int stateCode = 0;
StringBuilder sbChars = new StringBuilder();
bool seenCData = false;
bool isComment = false;
bool isDTD = false;
int bracketSwitch = 0;
handler.OnStartParsing(this);
while (true) {
++this.col;
int prevCh = currCh;
currCh = reader.Read();
if (currCh == -1) {
if (stateCode != 0) {
FatalErr("Unexpected EOF");
}
break;
}
int charCode = "<>/?=&'\"![ ]\t\r\n".IndexOf((char)currCh) & 0xF;
if (charCode == (int)CharKind.CR) continue; // ignore
// whitepace ::= (#x20 | #x9 | #xd | #xa)+
if (charCode == (int)CharKind.TAB) charCode = (int)CharKind.SPACE; // tab == space
if (charCode == (int)CharKind.EOL) {
this.col = 0;
this.line++;
charCode = (int)CharKind.SPACE;
}
int actionCode = MiniParser.Xlat(charCode, stateCode);
stateCode = actionCode & 0xFF;
// Ignore newline inside attribute value.
if (currCh == '\n' && (stateCode == 0xE || stateCode == 0xF)) continue;
actionCode >>= 8;
if (stateCode >= 0x80) {
if (stateCode == 0xFF) {
FatalErr("State dispatch error.");
} else {
FatalErr(errors[stateCode ^ 0x80]);
}
}
switch (actionCode) {
case (int)ActionCode.START_ELEM:
handler.OnStartElement(elementName, attrList);
if (currCh != '/') {
tagStack.Push(elementName);
} else {
handler.OnEndElement(elementName);
}
attrList.Clear();
break;
case (int)ActionCode.END_ELEM:
elementName = sbChars.ToString();
sbChars = new StringBuilder();
string endName = null;
if (tagStack.Count == 0 ||
elementName != (endName = tagStack.Pop() as string)) {
if (endName == null) {
FatalErr("Tag stack underflow");
} else {
FatalErr(String.Format("Expected end tag '{0}' but found '{1}'", elementName, endName));
}
}
handler.OnEndElement(elementName);
break;
case (int)ActionCode.END_NAME:
elementName = sbChars.ToString();
sbChars = new StringBuilder();
if (currCh != '/' && currCh != '>') break;
goto case (int)ActionCode.START_ELEM;
case (int)ActionCode.SET_ATTR_NAME:
lastAttrName = sbChars.ToString();
sbChars = new StringBuilder();
break;
case (int)ActionCode.SET_ATTR_VAL:
if (lastAttrName == null) FatalErr("Internal error.");
attrList.Add(lastAttrName, sbChars.ToString());
sbChars = new StringBuilder();
lastAttrName = null;
break;
case (int)ActionCode.SEND_CHARS:
handler.OnChars(sbChars.ToString());
sbChars = new StringBuilder();
break;
case (int)ActionCode.START_CDATA:
string cdata = "CDATA[";
isComment = false;
isDTD = false;
if (currCh == '-') {
currCh = reader.Read();
if (currCh != '-') FatalErr("Invalid comment");
this.col++;
isComment = true;
twoCharBuff[0] = -1;
twoCharBuff[1] = -1;
} else {
if (currCh != '[') {
isDTD = true;
bracketSwitch = 0;
break;
}
for (int i = 0; i < cdata.Length; i++) {
if (reader.Read() != cdata[i]) {
this.col += i+1;
break;
}
}
this.col += cdata.Length;
seenCData = true;
}
break;
case (int)ActionCode.END_CDATA:
int n = 0;
currCh = ']';
while (currCh == ']') {
currCh = reader.Read();
n++;
}
if (currCh != '>') {
for (int i = 0; i < n; i++) sbChars.Append(']');
sbChars.Append((char)currCh);
stateCode = 0x12;
} else {
for (int i = 0; i < n-2; i++) sbChars.Append(']');
seenCData = false;
}
this.col += n;
break;
case (int)ActionCode.ERROR:
FatalErr(String.Format("Error {0}", stateCode));
break;
case (int)ActionCode.STATE_CHANGE:
break;
case (int)ActionCode.FLUSH_CHARS_STATE_CHANGE:
sbChars = new StringBuilder();
if (currCh != '<') goto case (int)ActionCode.ACC_CHARS_STATE_CHANGE;
break;
case (int)ActionCode.ACC_CHARS_STATE_CHANGE:
sbChars.Append((char)currCh);
break;
case (int)ActionCode.ACC_CDATA:
if (isComment) {
if (currCh == '>'
&& twoCharBuff[0] == '-'
&& twoCharBuff[1] == '-') {
isComment = false;
stateCode = 0;
} else {
twoCharBuff[0] = twoCharBuff[1];
twoCharBuff[1] = currCh;
}
} else if (isDTD) {
if (currCh == '<' || currCh == '>') bracketSwitch ^= 1;
if (currCh == '>' && bracketSwitch != 0) {
isDTD = false;
stateCode = 0;
}
} else {
if (this.splitCData
&& sbChars.Length > 0
&& seenCData) {
handler.OnChars(sbChars.ToString());
sbChars = new StringBuilder();
}
seenCData = false;
sbChars.Append((char)currCh);
}
break;
case (int)ActionCode.PROC_CHAR_REF:
currCh = reader.Read();
int cl = this.col + 1;
if (currCh == '#') { // character reference
int r = 10;
int chCode = 0;
int nDigits = 0;
currCh = reader.Read();
cl++;
if (currCh == 'x') {
currCh = reader.Read();
cl++;
r=16;
}
NumberStyles style = r == 16 ? NumberStyles.HexNumber : NumberStyles.Integer;
while (true) {
int x = -1;
if (Char.IsNumber((char)currCh) || "abcdef".IndexOf(Char.ToLower((char)currCh)) != -1) {
try {
x = Int32.Parse(new string((char)currCh, 1), style);
} catch (FormatException) {x = -1;}
}
if (x == -1) break;
chCode *= r;
chCode += x;
nDigits++;
currCh = reader.Read();
cl++;
}
if (currCh == ';' && nDigits > 0) {
sbChars.Append((char)chCode);
} else {
FatalErr("Bad char ref");
}
} else {
// entity reference
string entityRefChars = "aglmopqstu"; // amp | apos | quot | gt | lt
string entities = "&'\"><";
int pos = 0;
int entIdx = 0xF;
int pred = 0;
int predShift = 0;
int sbLen = sbChars.Length;
while (true) {
if (pos != 0xF) pos = entityRefChars.IndexOf((char)currCh) & 0xF;
if (pos == 0xF) FatalErr(errors[7]);
sbChars.Append((char)currCh);
int path = "\uFF35\u3F8F\u4F8F\u0F5F\uFF78\uE1F4\u2299\uEEFF\uEEFF\uFF4F"[pos];
int lBr = (path >> 4) & 0xF;
int rBr = path & 0xF;
int lPred = path >> 12;
int rPred = (path >> 8) & 0xF;
currCh = reader.Read();
cl++;
pos = 0xF;
if (lBr != 0xF && currCh == entityRefChars[lBr]) {
if (lPred < 0xE) entIdx = lPred;
pred = lPred;
predShift = 12; // left
} else if (rBr != 0xF && currCh == entityRefChars[rBr]) {
if (rPred < 0xE) entIdx = rPred;
pred = rPred;
predShift = 8; // right
} else if (currCh == ';') {
if (entIdx != 0xF
&& predShift != 0
&& ((path >> predShift) & 0xF) == 0xE) break;
continue; // pos == 0xF
}
pos=0;
}
int l = cl - this.col - 1;
if ((l > 0 && l < 5)
&&(StrEquals("amp", sbChars, sbLen, l)
|| StrEquals("apos", sbChars, sbLen, l)
|| StrEquals("quot", sbChars, sbLen, l)
|| StrEquals("lt", sbChars, sbLen, l)
|| StrEquals("gt", sbChars, sbLen, l))
) {
sbChars.Length = sbLen;
sbChars.Append(entities[entIdx]);
} else FatalErr(errors[7]);
}
this.col = cl;
break;
default:
FatalErr(String.Format("Unexpected action code - {0}.", actionCode));
break;
}
} // while (true)
handler.OnEndParsing(this);
}