internal RegexNode ScanGroupOpen()
{
char ch = '\0';
int NodeType;
char close = '>';
// just return a RegexNode if we have:
// 1. "(" followed by nothing
// 2. "(x" where x != ?
// 3. "(?)"
if (CharsRight() == 0 || RightChar() != '?' || (RightChar() == '?' && (CharsRight() > 1 && RightChar(1) == ')')))
{
if (UseOptionN() || _ignoreNextParen)
{
_ignoreNextParen = false;
return new RegexNode(RegexNode.Group, _options);
}
else
return new RegexNode(RegexNode.Capture, _options, _autocap++, -1);
}
MoveRight();
for (;;)
{
if (CharsRight() == 0)
break;
switch (ch = MoveRightGetChar())
{
case ':':
NodeType = RegexNode.Group;
break;
case '=':
_options &= ~(RegexOptions.RightToLeft);
NodeType = RegexNode.Require;
break;
case '!':
_options &= ~(RegexOptions.RightToLeft);
NodeType = RegexNode.Prevent;
break;
case '>':
NodeType = RegexNode.Greedy;
break;
case '\'':
close = '\'';
goto case '<';
// fallthrough
case '<':
if (CharsRight() == 0)
goto BreakRecognize;
switch (ch = MoveRightGetChar())
{
case '=':
if (close == '\'')
goto BreakRecognize;
_options |= RegexOptions.RightToLeft;
NodeType = RegexNode.Require;
break;
case '!':
if (close == '\'')
goto BreakRecognize;
_options |= RegexOptions.RightToLeft;
NodeType = RegexNode.Prevent;
break;
default:
MoveLeft();
int capnum = -1;
int uncapnum = -1;
bool proceed = false;
// grab part before -
if (ch >= '0' && ch <= '9')
{
capnum = ScanDecimal();
if (!IsCaptureSlot(capnum))
capnum = -1;
// check if we have bogus characters after the number
if (CharsRight() > 0 && !(RightChar() == close || RightChar() == '-'))
throw MakeException(SR.InvalidGroupName);
if (capnum == 0)
throw MakeException(SR.CapnumNotZero);
}
else if (RegexCharClass.IsWordChar(ch))
{
string capname = ScanCapname();
if (IsCaptureName(capname))
capnum = CaptureSlotFromName(capname);
// check if we have bogus character after the name
if (CharsRight() > 0 && !(RightChar() == close || RightChar() == '-'))
throw MakeException(SR.InvalidGroupName);
}
else if (ch == '-')
{
proceed = true;
}
else
{
// bad group name - starts with something other than a word character and isn't a number
throw MakeException(SR.InvalidGroupName);
}
// grab part after - if any
if ((capnum != -1 || proceed == true) && CharsRight() > 0 && RightChar() == '-')
{
MoveRight();
ch = RightChar();
if (ch >= '0' && ch <= '9')
{
uncapnum = ScanDecimal();
if (!IsCaptureSlot(uncapnum))
throw MakeException(SR.Format(SR.UndefinedBackref, uncapnum));
// check if we have bogus characters after the number
if (CharsRight() > 0 && RightChar() != close)
throw MakeException(SR.InvalidGroupName);
}
else if (RegexCharClass.IsWordChar(ch))
{
string uncapname = ScanCapname();
if (IsCaptureName(uncapname))
uncapnum = CaptureSlotFromName(uncapname);
else
throw MakeException(SR.Format(SR.UndefinedNameRef, uncapname));
// check if we have bogus character after the name
if (CharsRight() > 0 && RightChar() != close)
throw MakeException(SR.InvalidGroupName);
}
else
{
// bad group name - starts with something other than a word character and isn't a number
throw MakeException(SR.InvalidGroupName);
}
}
// actually make the node
if ((capnum != -1 || uncapnum != -1) && CharsRight() > 0 && MoveRightGetChar() == close)
{
return new RegexNode(RegexNode.Capture, _options, capnum, uncapnum);
}
goto BreakRecognize;
}
break;
case '(':
// alternation construct (?(...) | )
int parenPos = Textpos();
if (CharsRight() > 0)
{
ch = RightChar();
// check if the alternation condition is a backref
if (ch >= '0' && ch <= '9')
{
int capnum = ScanDecimal();
if (CharsRight() > 0 && MoveRightGetChar() == ')')
{
if (IsCaptureSlot(capnum))
return new RegexNode(RegexNode.Testref, _options, capnum);
else
throw MakeException(SR.Format(SR.UndefinedReference, capnum.ToString(CultureInfo.CurrentCulture)));
}
else
throw MakeException(SR.Format(SR.MalformedReference, capnum.ToString(CultureInfo.CurrentCulture)));
}
else if (RegexCharClass.IsWordChar(ch))
{
string capname = ScanCapname();
if (IsCaptureName(capname) && CharsRight() > 0 && MoveRightGetChar() == ')')
return new RegexNode(RegexNode.Testref, _options, CaptureSlotFromName(capname));
}
}
// not a backref
NodeType = RegexNode.Testgroup;
Textto(parenPos - 1); // jump to the start of the parentheses
_ignoreNextParen = true; // but make sure we don't try to capture the insides
int charsRight = CharsRight();
if (charsRight >= 3 && RightChar(1) == '?')
{
char rightchar2 = RightChar(2);
// disallow comments in the condition
if (rightchar2 == '#')
throw MakeException(SR.AlternationCantHaveComment);
// disallow named capture group (?<..>..) in the condition
if (rightchar2 == '\'')
throw MakeException(SR.AlternationCantCapture);
else
{
if (charsRight >= 4 && (rightchar2 == '<' && RightChar(3) != '!' && RightChar(3) != '='))
throw MakeException(SR.AlternationCantCapture);
}
}
break;
default:
MoveLeft();
NodeType = RegexNode.Group;
ScanOptions();
if (CharsRight() == 0)
goto BreakRecognize;
if ((ch = MoveRightGetChar()) == ')')
return null;
if (ch != ':')
goto BreakRecognize;
break;
}
return new RegexNode(NodeType, _options);
}
BreakRecognize:
;
// break Recognize comes here
throw MakeException(SR.UnrecognizedGrouping);
}