public PatternAnalyzer ( System.Version matchVersion, Pattern pattern, bool toLowerCase, CharArraySet stopWords ) : System | ||
matchVersion | System.Version | currently does nothing |
pattern | Pattern | /// a regular expression delimiting tokens |
toLowerCase | bool |
/// if true returns tokens after applying
/// String.toLowerCase() |
stopWords | CharArraySet |
/// if non-null, ignores all tokens that are contained in the
/// given stop set (after previously having applied toLowerCase()
/// if applicable). For example, created via
/// WordlistLoader.getWordSet(new File("samples/fulltext/stopwords.txt")
/// or other stop words
/// lists . |
return | System |
public PatternAnalyzer(Version matchVersion, Pattern pattern, bool toLowerCase, CharArraySet stopWords)
{
if (pattern == null)
{
throw new System.ArgumentException("pattern must not be null");
}
if (eqPattern(NON_WORD_PATTERN, pattern))
{
pattern = NON_WORD_PATTERN;
}
else if (eqPattern(WHITESPACE_PATTERN, pattern))
{
pattern = WHITESPACE_PATTERN;
}
if (stopWords != null && stopWords.size() == 0)
{
stopWords = null;
}
this.pattern = pattern;
this.toLowerCase = toLowerCase;
this.stopWords = stopWords;
this.matchVersion = matchVersion;
}