protected override void Decompose()
{
// get the hyphenation points
Hyphenation.Hyphenation hyphens = hyphenator.Hyphenate(termAtt.Buffer(), 0, termAtt.Length, 1, 1);
// No hyphen points found -> exit
if (hyphens == null)
{
return;
}
int[] hyp = hyphens.HyphenationPoints;
for (int i = 0; i < hyp.Length; ++i)
{
int remaining = hyp.Length - i;
int start = hyp[i];
CompoundToken longestMatchToken = null;
for (int j = 1; j < remaining; j++)
{
int partLength = hyp[i + j] - start;
// if the part is longer than maxSubwordSize we
// are done with this round
if (partLength > this.maxSubwordSize)
{
break;
}
// we only put subwords to the token stream
// that are longer than minPartSize
if (partLength < this.minSubwordSize)
{
// BOGUS/BROKEN/FUNKY/WACKO: somehow we have negative 'parts' according to the
// calculation above, and we rely upon minSubwordSize being >=0 to filter them out...
continue;
}
// check the dictionary
if (dictionary == null || dictionary.Contains(termAtt.Buffer(), start, partLength))
{
if (this.onlyLongestMatch)
{
if (longestMatchToken != null)
{
if (longestMatchToken.txt.Length < partLength)
{
longestMatchToken = new CompoundToken(this, start, partLength);
}
}
else
{
longestMatchToken = new CompoundToken(this, start, partLength);
}
}
else
{
tokens.AddLast(new CompoundToken(this, start, partLength));
}
}
else if (dictionary.Contains(termAtt.Buffer(), start, partLength - 1))
{
// check the dictionary again with a word that is one character
// shorter
// to avoid problems with genitive 's characters and other binding
// characters
if (this.onlyLongestMatch)
{
if (longestMatchToken != null)
{
if (longestMatchToken.txt.Length < partLength - 1)
{
longestMatchToken = new CompoundToken(this, start, partLength - 1);
}
}
else
{
longestMatchToken = new CompoundToken(this, start, partLength - 1);
}
}
else
{
tokens.AddLast(new CompoundToken(this, start, partLength - 1));
}
}
}
if (this.onlyLongestMatch && longestMatchToken != null)
{
tokens.AddLast(longestMatchToken);
}
}
}
}