/*
* (non-Javadoc)
*
* @see Lucene.Net.Analysis.TokenStream#next()
*/
public override Token Next(/* in */ Token reusableToken)
{
System.Diagnostics.Debug.Assert(reusableToken != null);
int posIncr = 1;
while (true)
{
int tokenType = scanner.GetNextToken();
if (tokenType == StandardTokenizerImpl.YYEOF)
{
return(null);
}
if (scanner.Yylength() <= maxTokenLength)
{
reusableToken.Clear();
reusableToken.SetPositionIncrement(posIncr);
scanner.GetText(reusableToken);
int start = scanner.Yychar();
reusableToken.SetStartOffset(start);
reusableToken.SetEndOffset(start + reusableToken.TermLength());
// This 'if' should be removed in the next release. For now, it converts
// invalid acronyms to HOST. When removed, only the 'else' part should
// remain.
if (tokenType == StandardTokenizerImpl.ACRONYM_DEP)
{
if (replaceInvalidAcronym)
{
reusableToken.SetType(StandardTokenizerImpl.TOKEN_TYPES[StandardTokenizerImpl.HOST]);
reusableToken.SetTermLength(reusableToken.TermLength() - 1); // remove extra '.'
}
else
{
reusableToken.SetType(StandardTokenizerImpl.TOKEN_TYPES[StandardTokenizerImpl.ACRONYM]);
}
}
else
{
reusableToken.SetType(StandardTokenizerImpl.TOKEN_TYPES[tokenType]);
}
return(reusableToken);
}
// When we skip a too-long term, we still increment the
// position increment
else
{
posIncr++;
}
}
}