public override bool IncrementToken()
{
while (true)
{
// if there are any generated tokens, return them... don't try any
// matches against them, as we specifically don't want recursion.
if (replacement != null && replacement.MoveNext())
{
Copy(this, replacement.Current);
return true;
}
// common case fast-path of first token not matching anything
AttributeSource firstTok = NextTok();
if (firstTok == null)
{
return false;
}
var termAtt = firstTok.AddAttribute<ICharTermAttribute>();
SlowSynonymMap result = map.submap != null ? map.submap.Get(termAtt.Buffer(), 0, termAtt.Length) : null;
if (result == null)
{
Copy(this, firstTok);
return true;
}
// fast-path failed, clone ourselves if needed
if (firstTok == this)
{
firstTok = CloneAttributes();
}
// OK, we matched a token, so find the longest match.
matched = new LinkedList<AttributeSource>();
result = Match(result);
if (result == null)
{
// no match, simply return the first token read.
Copy(this, firstTok);
return true;
}
// reuse, or create new one each time?
List<AttributeSource> generated = new List<AttributeSource>(result.synonyms.Length + matched.Count + 1);
//
// there was a match... let's generate the new tokens, merging
// in the matched tokens (position increments need adjusting)
//
AttributeSource lastTok = matched.Count == 0 ? firstTok : matched.Last.Value;
bool includeOrig = result.IncludeOrig;
AttributeSource origTok = includeOrig ? firstTok : null;
IPositionIncrementAttribute firstPosIncAtt = firstTok.AddAttribute<IPositionIncrementAttribute>();
int origPos = firstPosIncAtt.PositionIncrement; // position of origTok in the original stream
int repPos = 0; // curr position in replacement token stream
int pos = 0; // current position in merged token stream
for (int i = 0; i < result.synonyms.Length; i++)
{
Token repTok = result.synonyms[i];
AttributeSource newTok = firstTok.CloneAttributes();
ICharTermAttribute newTermAtt = newTok.AddAttribute<ICharTermAttribute>();
IOffsetAttribute newOffsetAtt = newTok.AddAttribute<IOffsetAttribute>();
IPositionIncrementAttribute newPosIncAtt = newTok.AddAttribute<IPositionIncrementAttribute>();
IOffsetAttribute lastOffsetAtt = lastTok.AddAttribute<IOffsetAttribute>();
newOffsetAtt.SetOffset(newOffsetAtt.StartOffset(), lastOffsetAtt.EndOffset());
newTermAtt.CopyBuffer(repTok.Buffer(), 0, repTok.Length);
repPos += repTok.PositionIncrement;
if (i == 0) // make position of first token equal to original
{
repPos = origPos;
}
// if necessary, insert original tokens and adjust position increment
while (origTok != null && origPos <= repPos)
{
IPositionIncrementAttribute origPosInc = origTok.AddAttribute<IPositionIncrementAttribute>();
origPosInc.PositionIncrement = origPos - pos;
generated.Add(origTok);
pos += origPosInc.PositionIncrement;
//origTok = matched.Count == 0 ? null : matched.RemoveFirst();
if (matched.Count == 0)
{
origTok = null;
}
else
{
origTok = matched.First.Value;
matched.RemoveFirst();
}
if (origTok != null)
{
origPosInc = origTok.AddAttribute<IPositionIncrementAttribute>();
origPos += origPosInc.PositionIncrement;
}
}
newPosIncAtt.PositionIncrement = repPos - pos;
generated.Add(newTok);
pos += newPosIncAtt.PositionIncrement;
}
// finish up any leftover original tokens
while (origTok != null)
{
IPositionIncrementAttribute origPosInc = origTok.AddAttribute<IPositionIncrementAttribute>();
origPosInc.PositionIncrement = origPos - pos;
generated.Add(origTok);
pos += origPosInc.PositionIncrement;
if (matched.Count == 0)
{
origTok = null;
}
else
{
origTok = matched.First.Value;
matched.RemoveFirst();
}
if (origTok != null)
{
origPosInc = origTok.AddAttribute<IPositionIncrementAttribute>();
origPos += origPosInc.PositionIncrement;
}
}
// what if we replaced a longer sequence with a shorter one?
// a/0 b/5 => foo/0
// should I re-create the gap on the next buffered token?
replacement = generated.GetEnumerator();
// Now return to the top of the loop to read and return the first
// generated token.. The reason this is done is that we may have generated
// nothing at all, and may need to continue with more matching logic.
}
}