Lucene.Net.Analysis.CharFilters.MappingCharFilter.Read C# (CSharp) Method

Read() public method

public Read ( ) : int
return int
        public override int Read()
        {

            //System.out.println("\nread");
            while (true)
            {

                if (replacement != null && replacementPointer < replacement.Length)
                {
                    //System.out.println("  return repl[" + replacementPointer + "]=" + replacement.chars[replacement.offset + replacementPointer]);
                    return replacement.Chars[replacement.Offset + replacementPointer++];
                }

                // TODO: a more efficient approach would be Aho/Corasick's
                // algorithm
                // (http://en.wikipedia.org/wiki/Aho%E2%80%93Corasick_string_matching_algorithm)
                // or this generalizatio: www.cis.uni-muenchen.de/people/Schulz/Pub/dictle5.ps
                //
                // I think this would be (almost?) equivalent to 1) adding
                // epsilon arcs from all final nodes back to the init
                // node in the FST, 2) adding a .* (skip any char)
                // loop on the initial node, and 3) determinizing
                // that.  Then we would not have to Restart matching
                // at each position.

                int lastMatchLen = -1;
                CharsRef lastMatch = null;

                int firstCH = buffer.Get(inputOff);
                if (firstCH != -1)
                {
                    // LUCENENET fix: Check the dictionary to ensure it contains a key before reading it.
                    char key = Convert.ToChar((char)firstCH);
                    FST.Arc<CharsRef> arc = cachedRootArcs.ContainsKey(key) ? cachedRootArcs[key] : null;
                    if (arc != null)
                    {
                        if (!FST.TargetHasArcs(arc))
                        {
                            // Fast pass for single character match:
                            Debug.Assert(arc.IsFinal);
                            lastMatchLen = 1;
                            lastMatch = arc.Output;
                        }
                        else
                        {
                            int lookahead = 0;
                            CharsRef output = arc.Output;
                            while (true)
                            {
                                lookahead++;

                                if (arc.IsFinal)
                                {
                                    // Match! (to node is final)
                                    lastMatchLen = lookahead;
                                    lastMatch = outputs.Add(output, arc.NextFinalOutput);
                                    // Greedy: keep searching to see if there's a
                                    // longer match...
                                }

                                if (!FST.TargetHasArcs(arc))
                                {
                                    break;
                                }

                                int ch = buffer.Get(inputOff + lookahead);
                                if (ch == -1)
                                {
                                    break;
                                }
                                if ((arc = map.FindTargetArc(ch, arc, scratchArc, fstReader)) == null)
                                {
                                    // Dead end
                                    break;
                                }
                                output = outputs.Add(output, arc.Output);
                            }
                        }
                    }
                }

                if (lastMatch != null)
                {
                    inputOff += lastMatchLen;
                    //System.out.println("  match!  len=" + lastMatchLen + " repl=" + lastMatch);
                    int diff = lastMatchLen - lastMatch.Length;

                    if (diff != 0)
                    {
                        int prevCumulativeDiff = LastCumulativeDiff;
                        if (diff > 0)
                        {
                            // Replacement is shorter than matched input:
                            AddOffCorrectMap(inputOff - diff - prevCumulativeDiff, prevCumulativeDiff + diff);
                        }
                        else
                        {
                            // Replacement is longer than matched input: remap
                            // the "extra" chars all back to the same input
                            // offset:
                            int outputStart = inputOff - prevCumulativeDiff;
                            for (int extraIDX = 0; extraIDX < -diff; extraIDX++)
                            {
                                AddOffCorrectMap(outputStart + extraIDX, prevCumulativeDiff - extraIDX - 1);
                            }
                        }
                    }

                    replacement = lastMatch;
                    replacementPointer = 0;

                }
                else
                {
                    int ret = buffer.Get(inputOff);
                    if (ret != -1)
                    {
                        inputOff++;
                        buffer.FreeBefore(inputOff);
                    }
                    return ret;
                }
            }
        }

Same methods

MappingCharFilter::Read ( char cbuf, int off, int len ) : int

Usage Example

コード例 #1
0
        public virtual void TestReaderReset()
        {
            CharFilter cs = new MappingCharFilter(normMap, new StringReader("x"));
            char[] buf = new char[10];
            int len = cs.Read(buf, 0, 10);
            assertEquals(1, len);
            assertEquals('x', buf[0]);
            len = cs.Read(buf, 0, 10);
            assertEquals(-1, len);

            // rewind
            cs.Reset();
            len = cs.Read(buf, 0, 10);
            assertEquals(1, len);
            assertEquals('x', buf[0]);
        }
All Usage Examples Of Lucene.Net.Analysis.CharFilters.MappingCharFilter::Read