Lucene.Net.Analysis.Synonym.TestSynonymMapFilter.SlowSynMatcher C# (CSharp) Method

SlowSynMatcher() protected method

protected SlowSynMatcher ( string doc, IList syns, int maxOutputLength ) : string
doc string
syns IList
maxOutputLength int
return string
        protected virtual string SlowSynMatcher(string doc, IList<OneSyn> syns, int maxOutputLength)
        {
            assertTrue(doc.Length % 2 == 0);
            int numInputs = doc.Length / 2;
            bool[] keepOrigs = new bool[numInputs];
            bool[] hasMatch = new bool[numInputs];
            Arrays.Fill(keepOrigs, false);
            string[] outputs = new string[numInputs + maxOutputLength];
            OneSyn[] matches = new OneSyn[numInputs];
            foreach (OneSyn syn in syns)
            {
                int idx = -1;
                while (true)
                {
                    idx = doc.IndexOf(syn.@in, 1 + idx, StringComparison.Ordinal);
                    if (idx == -1)
                    {
                        break;
                    }
                    assertTrue(idx % 2 == 0);
                    int matchIDX = idx / 2;
                    assertTrue([email protected] % 2 == 1);
                    if (matches[matchIDX] == null)
                    {
                        matches[matchIDX] = syn;
                    }
                    else if ([email protected] > matches[matchIDX][email protected])
                    {
                        // Greedy conflict resolution: longer match wins:
                        matches[matchIDX] = syn;
                    }
                    else
                    {
                        assertTrue([email protected] < matches[matchIDX][email protected]);
                    }
                }
            }

            // Greedy conflict resolution: if syn matches a range of inputs,
            // it prevents other syns from matching that range
            for (int inputIDX = 0; inputIDX < numInputs; inputIDX++)
            {
                OneSyn match = matches[inputIDX];
                if (match != null)
                {
                    int synInLength = (1 + [email protected]) / 2;
                    for (int nextInputIDX = inputIDX + 1; nextInputIDX < numInputs && nextInputIDX < (inputIDX + synInLength); nextInputIDX++)
                    {
                        matches[nextInputIDX] = null;
                    }
                }
            }

            // Fill overlapping outputs:
            for (int inputIDX = 0; inputIDX < numInputs; inputIDX++)
            {
                OneSyn syn = matches[inputIDX];
                if (syn == null)
                {
                    continue;
                }
                for (int idx = 0; idx < (1 + [email protected]) / 2; idx++)
                {
                    hasMatch[inputIDX + idx] = true;
                    keepOrigs[inputIDX + idx] |= syn.keepOrig;
                }
                foreach (string synOut in syn.@out)
                {
                    string[] synOutputs = synOut.Split(new string[] { " " }, StringSplitOptions.RemoveEmptyEntries);
                    assertEquals(synOutputs.Length, (1 + synOut.Length) / 2);
                    int matchEnd = inputIDX + synOutputs.Length;
                    int synUpto = 0;
                    for (int matchIDX = inputIDX; matchIDX < matchEnd; matchIDX++)
                    {
                        if (outputs[matchIDX] == null)
                        {
                            outputs[matchIDX] = synOutputs[synUpto++];
                        }
                        else
                        {
                            outputs[matchIDX] = outputs[matchIDX] + "/" + synOutputs[synUpto++];
                        }
                        int endOffset;
                        if (matchIDX < numInputs)
                        {
                            int posLen;
                            if (synOutputs.Length == 1)
                            {
                                // Add full endOffset
                                endOffset = (inputIDX * 2) + [email protected];
                                posLen = syn.keepOrig ? (1 + [email protected]) / 2 : 1;
                            }
                            else
                            {
                                // Add endOffset matching input token's
                                endOffset = (matchIDX * 2) + 1;
                                posLen = 1;
                            }
                            outputs[matchIDX] = outputs[matchIDX] + ":" + endOffset + "_" + posLen;
                        }
                    }
                }
            }

            StringBuilder sb = new StringBuilder();
            string[] inputTokens = doc.Split(new string[] { " " }, StringSplitOptions.RemoveEmptyEntries);
            int limit = inputTokens.Length + maxOutputLength;
            for (int inputIDX = 0; inputIDX < limit; inputIDX++)
            {
                bool posHasOutput = false;
                if (inputIDX >= numInputs && outputs[inputIDX] == null)
                {
                    break;
                }
                if (inputIDX < numInputs && (!hasMatch[inputIDX] || keepOrigs[inputIDX]))
                {
                    assertTrue(inputTokens[inputIDX].Length != 0);
                    sb.Append(inputTokens[inputIDX]);
                    posHasOutput = true;
                }

                if (outputs[inputIDX] != null)
                {
                    if (posHasOutput)
                    {
                        sb.Append('/');
                    }
                    sb.Append(outputs[inputIDX]);
                }
                else if (!posHasOutput)
                {
                    continue;
                }
                if (inputIDX < limit - 1)
                {
                    sb.Append(' ');
                }
            }

            return sb.ToString();
        }