org.apache.lucene.analysis.compound.hyphenation.HyphenationTree.searchPatterns C# (CSharp) Method

searchPatterns() protected method

Search for all possible partial matches of word starting at index an update interletter values. In other words, it does something like:

for(i=0; i<patterns.length; i++) { if ( word.substring(index).startsWidth(patterns[i]) ) update_interletter_values(patterns[i]); }

But it is done in an efficient way since the patterns are stored in a ternary tree. In fact, this is the whole purpose of having the tree: doing this search without having to test every single pattern. The number of patterns for languages such as English range from 4000 to 10000. Thus, doing thousands of string comparisons for each word to hyphenate would be really slow without the tree. The tradeoff is memory, but using a ternary tree instead of a trie, almost halves the the memory used by Lout or TeX. It's also faster than using a hash table

protected searchPatterns ( char word, int index, sbyte il ) : void
word char null terminated word to match
index int start index from word
il sbyte interletter values array to update
return void
	  protected internal virtual void searchPatterns(char[] word, int index, sbyte[] il)
	  {
		sbyte[] values;
		int i = index;
		char p, q;
		char sp = word[i];
		p = root;

		while (p > 0 && p < sc.Length)
		{
		  if (sc[p] == 0xFFFF)
		  {
			if (hstrcmp(word, i, kv.Array, lo[p]) == 0)
			{
			  values = getValues(eq[p]); // data pointer is in eq[]
			  int j = index;
			  for (int k = 0; k < values.Length; k++)
			  {
				if (j < il.Length && values[k] > il[j])
				{
				  il[j] = values[k];
				}
				j++;
			  }
			}
			return;
		  }
		  int d = sp - sc[p];
		  if (d == 0)
		  {
			if (sp == 0)
			{
			  break;
			}
			sp = word[++i];
			p = eq[p];
			q = p;

			// look for a pattern ending at this position by searching for
			// the null char ( splitchar == 0 )
			while (q > 0 && q < sc.Length)
			{
			  if (sc[q] == 0xFFFF) // stop at compressed branch
			  {
				break;
			  }
			  if (sc[q] == 0)
			  {
				values = getValues(eq[q]);
				int j = index;
				for (int k = 0; k < values.Length; k++)
				{
				  if (j < il.Length && values[k] > il[j])
				  {
					il[j] = values[k];
				  }
				  j++;
				}
				break;
			  }
			  else
			  {
				q = lo[q];

				/// <summary>
				/// actually the code should be: q = sc[q] < 0 ? hi[q] : lo[q]; but
				/// java chars are unsigned
				/// </summary>
			  }
			}
		  }
		  else
		  {
			p = d < 0 ? lo[p] : hi[p];
		  }
		}
	  }