org.apache.lucene.analysis.miscellaneous.PatternAnalyzer.createComponents C# (CSharp) Method

createComponents() public method

Creates a token stream that tokenizes the given string into token terms (aka words).
public createComponents ( string fieldName, Reader reader, string text ) : TokenStreamComponents
fieldName string /// the name of the field to tokenize (currently ignored).
reader Reader /// reader (e.g. charfilter) of the original text. can be null.
text string /// the string to tokenize
return TokenStreamComponents
	  public TokenStreamComponents createComponents(string fieldName, Reader reader, string text)
	  {
		// Ideally the Analyzer superclass should have a method with the same signature, 
		// with a default impl that simply delegates to the StringReader flavour. 
		if (reader == null)
		{
		  reader = new FastStringReader(text);
		}

		if (pattern == NON_WORD_PATTERN) // fast path
		{
		  return new TokenStreamComponents(new FastStringTokenizer(reader, true, toLowerCase, stopWords));
		} // fast path
		else if (pattern == WHITESPACE_PATTERN)
		{
		  return new TokenStreamComponents(new FastStringTokenizer(reader, false, toLowerCase, stopWords));
		}

		Tokenizer tokenizer = new PatternTokenizer(reader, pattern, toLowerCase);
		TokenStream result = (stopWords != null) ? new StopFilter(matchVersion, tokenizer, stopWords) : tokenizer;
		return new TokenStreamComponents(tokenizer, result);
	  }

Same methods

PatternAnalyzer::createComponents ( string fieldName, Reader reader ) : TokenStreamComponents