public bool ProcessTrainingSample(SentenceData sample)
{
n_learn_samples++;
for (int iword = 1; iword < sample.CountWords() - 1; ++iword)
{
WordData token = sample.GetWord(iword);
string wordform = token.GetWord().ToLower();
if (wordform.Contains(" "))
{
// кратные пробелы сокращаем до одинарных
System.Text.RegularExpressions.Regex rx = new System.Text.RegularExpressions.Regex("[ ]{2,}");
wordform = rx.Replace(wordform, " ");
}
string lemma = gren.GetEntryName(token.GetEntryID());
if (IsUnknownLexem(lemma) || IsNumword(lemma))
{
continue;
}
int POS_tag = tags.MatchTags(token, gren);
table.Store(POS_tag, wordform, lemma);
n_learn_wordforms++;
}
return(true);
}