System.Text.RegularExpressions.RegexWriter.EmitFragment C# (CSharp) Method

EmitFragment() private method

The main RegexCode generator. It does a depth-first walk through the tree and calls EmitFragment to emits code before and after each child of an interior node, and at each leaf.
private EmitFragment ( int nodetype, RegexNode node, int curIndex ) : void
nodetype int
node RegexNode
curIndex int
return void
        private void EmitFragment(int nodetype, RegexNode node, int curIndex)
        {
            int bits = 0;

            if (nodetype <= RegexNode.Ref)
            {
                if (node.UseOptionR())
                    bits |= RegexCode.Rtl;
                if ((node._options & RegexOptions.IgnoreCase) != 0)
                    bits |= RegexCode.Ci;
            }

            switch (nodetype)
            {
                case RegexNode.Concatenate | BeforeChild:
                case RegexNode.Concatenate | AfterChild:
                case RegexNode.Empty:
                    break;

                case RegexNode.Alternate | BeforeChild:
                    if (curIndex < node._children.Count - 1)
                    {
                        PushInt(CurPos());
                        Emit(RegexCode.Lazybranch, 0);
                    }
                    break;

                case RegexNode.Alternate | AfterChild:
                    {
                        if (curIndex < node._children.Count - 1)
                        {
                            int LBPos = PopInt();
                            PushInt(CurPos());
                            Emit(RegexCode.Goto, 0);
                            PatchJump(LBPos, CurPos());
                        }
                        else
                        {
                            int I;
                            for (I = 0; I < curIndex; I++)
                            {
                                PatchJump(PopInt(), CurPos());
                            }
                        }
                        break;
                    }

                case RegexNode.Testref | BeforeChild:
                    switch (curIndex)
                    {
                        case 0:
                            Emit(RegexCode.Setjump);
                            PushInt(CurPos());
                            Emit(RegexCode.Lazybranch, 0);
                            Emit(RegexCode.Testref, MapCapnum(node._m));
                            Emit(RegexCode.Forejump);
                            break;
                    }
                    break;

                case RegexNode.Testref | AfterChild:
                    switch (curIndex)
                    {
                        case 0:
                            {
                                int Branchpos = PopInt();
                                PushInt(CurPos());
                                Emit(RegexCode.Goto, 0);
                                PatchJump(Branchpos, CurPos());
                                Emit(RegexCode.Forejump);
                                if (node._children.Count > 1)
                                    break;
                                // else fallthrough
                                goto case 1;
                            }
                        case 1:
                            PatchJump(PopInt(), CurPos());
                            break;
                    }
                    break;

                case RegexNode.Testgroup | BeforeChild:
                    switch (curIndex)
                    {
                        case 0:
                            Emit(RegexCode.Setjump);
                            Emit(RegexCode.Setmark);
                            PushInt(CurPos());
                            Emit(RegexCode.Lazybranch, 0);
                            break;
                    }
                    break;

                case RegexNode.Testgroup | AfterChild:
                    switch (curIndex)
                    {
                        case 0:
                            Emit(RegexCode.Getmark);
                            Emit(RegexCode.Forejump);
                            break;
                        case 1:
                            int Branchpos = PopInt();
                            PushInt(CurPos());
                            Emit(RegexCode.Goto, 0);
                            PatchJump(Branchpos, CurPos());
                            Emit(RegexCode.Getmark);
                            Emit(RegexCode.Forejump);

                            if (node._children.Count > 2)
                                break;
                            // else fallthrough
                            goto case 2;
                        case 2:
                            PatchJump(PopInt(), CurPos());
                            break;
                    }
                    break;

                case RegexNode.Loop | BeforeChild:
                case RegexNode.Lazyloop | BeforeChild:

                    if (node._n < int.MaxValue || node._m > 1)
                        Emit(node._m == 0 ? RegexCode.Nullcount : RegexCode.Setcount, node._m == 0 ? 0 : 1 - node._m);
                    else
                        Emit(node._m == 0 ? RegexCode.Nullmark : RegexCode.Setmark);

                    if (node._m == 0)
                    {
                        PushInt(CurPos());
                        Emit(RegexCode.Goto, 0);
                    }
                    PushInt(CurPos());
                    break;

                case RegexNode.Loop | AfterChild:
                case RegexNode.Lazyloop | AfterChild:
                    {
                        int StartJumpPos = CurPos();
                        int Lazy = (nodetype - (RegexNode.Loop | AfterChild));

                        if (node._n < int.MaxValue || node._m > 1)
                            Emit(RegexCode.Branchcount + Lazy, PopInt(), node._n == int.MaxValue ? int.MaxValue : node._n - node._m);
                        else
                            Emit(RegexCode.Branchmark + Lazy, PopInt());

                        if (node._m == 0)
                            PatchJump(PopInt(), StartJumpPos);
                    }
                    break;

                case RegexNode.Group | BeforeChild:
                case RegexNode.Group | AfterChild:
                    break;

                case RegexNode.Capture | BeforeChild:
                    Emit(RegexCode.Setmark);
                    break;

                case RegexNode.Capture | AfterChild:
                    Emit(RegexCode.Capturemark, MapCapnum(node._m), MapCapnum(node._n));
                    break;

                case RegexNode.Require | BeforeChild:
                    // NOTE: the following line causes lookahead/lookbehind to be
                    // NON-BACKTRACKING. It can be commented out with (*)
                    Emit(RegexCode.Setjump);

                    Emit(RegexCode.Setmark);
                    break;

                case RegexNode.Require | AfterChild:
                    Emit(RegexCode.Getmark);

                    // NOTE: the following line causes lookahead/lookbehind to be
                    // NON-BACKTRACKING. It can be commented out with (*)
                    Emit(RegexCode.Forejump);

                    break;

                case RegexNode.Prevent | BeforeChild:
                    Emit(RegexCode.Setjump);
                    PushInt(CurPos());
                    Emit(RegexCode.Lazybranch, 0);
                    break;

                case RegexNode.Prevent | AfterChild:
                    Emit(RegexCode.Backjump);
                    PatchJump(PopInt(), CurPos());
                    Emit(RegexCode.Forejump);
                    break;

                case RegexNode.Greedy | BeforeChild:
                    Emit(RegexCode.Setjump);
                    break;

                case RegexNode.Greedy | AfterChild:
                    Emit(RegexCode.Forejump);
                    break;

                case RegexNode.One:
                case RegexNode.Notone:
                    Emit(node._type | bits, node._ch);
                    break;

                case RegexNode.Notoneloop:
                case RegexNode.Notonelazy:
                case RegexNode.Oneloop:
                case RegexNode.Onelazy:
                    if (node._m > 0)
                        Emit(((node._type == RegexNode.Oneloop || node._type == RegexNode.Onelazy) ?
                              RegexCode.Onerep : RegexCode.Notonerep) | bits, node._ch, node._m);
                    if (node._n > node._m)
                        Emit(node._type | bits, node._ch, node._n == int.MaxValue ?
                             int.MaxValue : node._n - node._m);
                    break;

                case RegexNode.Setloop:
                case RegexNode.Setlazy:
                    if (node._m > 0)
                        Emit(RegexCode.Setrep | bits, StringCode(node._str), node._m);
                    if (node._n > node._m)
                        Emit(node._type | bits, StringCode(node._str),
                             (node._n == int.MaxValue) ? int.MaxValue : node._n - node._m);
                    break;

                case RegexNode.Multi:
                    Emit(node._type | bits, StringCode(node._str));
                    break;

                case RegexNode.Set:
                    Emit(node._type | bits, StringCode(node._str));
                    break;

                case RegexNode.Ref:
                    Emit(node._type | bits, MapCapnum(node._m));
                    break;

                case RegexNode.Nothing:
                case RegexNode.Bol:
                case RegexNode.Eol:
                case RegexNode.Boundary:
                case RegexNode.Nonboundary:
                case RegexNode.ECMABoundary:
                case RegexNode.NonECMABoundary:
                case RegexNode.Beginning:
                case RegexNode.Start:
                case RegexNode.EndZ:
                case RegexNode.End:
                    Emit(node._type);
                    break;

                default:
                    throw new ArgumentException(SR.Format(SR.UnexpectedOpcode, nodetype.ToString(CultureInfo.CurrentCulture)));
            }
        }

Usage Example

Esempio n. 1
0
        /// <summary>
        /// The top level RegexCode generator. It does a depth-first walk
        /// through the tree and calls EmitFragment to emits code before
        /// and after each child of an interior node, and at each leaf.
        /// </summary>
        public RegexCode RegexCodeFromRegexTree(RegexTree tree)
        {
            Span <int>  emittedSpan  = stackalloc int[EmittedSize];
            Span <int>  intStackSpan = stackalloc int[IntStackSize];
            RegexWriter writer       = new RegexWriter(emittedSpan, intStackSpan);

            // construct sparse capnum mapping if some numbers are unused
            int capsize;

            if (tree._capnumlist == null || tree._captop == tree._capnumlist.Length)
            {
                capsize      = tree._captop;
                writer._caps = null;
            }
            else
            {
                capsize      = tree._capnumlist.Length;
                writer._caps = tree._caps;
                for (int i = 0; i < tree._capnumlist.Length; i++)
                {
                    writer._caps[tree._capnumlist[i]] = i;
                }
            }

            RegexNode curNode  = tree._root;
            int       curChild = 0;

            writer.Emit(RegexCode.Lazybranch, 0);

            for (; ;)
            {
                if (curNode._children == null)
                {
                    writer.EmitFragment(curNode._type, curNode, 0);
                }
                else if (curChild < curNode._children.Count)
                {
                    writer.EmitFragment(curNode._type | BeforeChild, curNode, curChild);

                    curNode = curNode._children[curChild];
                    writer._intStack.Append(curChild);
                    curChild = 0;
                    continue;
                }

                if (writer._intStack.Length == 0)
                {
                    break;
                }

                curChild = writer._intStack.Pop();
                curNode  = curNode._next;

                writer.EmitFragment(curNode._type | AfterChild, curNode, curChild);
                curChild++;
            }

            writer.PatchJump(0, writer._emitted.Length);
            writer.Emit(RegexCode.Stop);

            RegexPrefix fcPrefix = RegexFCD.FirstChars(tree);
            RegexPrefix prefix   = RegexFCD.Prefix(tree);
            bool        rtl      = ((tree._options & RegexOptions.RightToLeft) != 0);

            CultureInfo     culture = (tree._options & RegexOptions.CultureInvariant) != 0 ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture;
            RegexBoyerMoore bmPrefix;

            if (prefix != null && prefix.Prefix.Length > 0)
            {
                bmPrefix = new RegexBoyerMoore(prefix.Prefix, prefix.CaseInsensitive, rtl, culture);
            }
            else
            {
                bmPrefix = null;
            }

            int anchors = RegexFCD.Anchors(tree);

            int[] emitted = writer._emitted.AsReadOnlySpan().ToArray();

            // Cleaning up and returning the borrowed arrays
            writer._emitted.Dispose();
            writer._intStack.Dispose();

            return(new RegexCode(emitted, writer._stringTable, writer._trackCount, writer._caps, capsize, bmPrefix, fcPrefix, anchors, rtl));
        }