Nuve.NGrams.NGramModel.AddStartStopSymbols C# (CSharp) Method

AddStartStopSymbols() public method

public AddStartStopSymbols ( IList tokens ) : void
tokens IList
return void
        public void AddStartStopSymbols(IList<string> tokens)
        {
            if (extractor.MaxNGramSize <= 1) return;

            for (int i = 0; i < extractor.MaxNGramSize - 1; i++)
            {
                string index = i.ToString();
                tokens.Insert(0, Start.Insert(2, index));
            }

            tokens.Add(Stop);
        }

Usage Example

Ejemplo n.º 1
0
        public void AddStartStopSymbolsTest()
        {
            var tokens = new[] {"this", "is", "a", "test"};
            List<string> actual = tokens.ToList();
            List<string> expected = tokens.ToList();
            var model = new NGramModel(Unigram);
            model.AddStartStopSymbols(actual);
            CollectionAssert.AreEqual(expected, actual);

            model = new NGramModel(Bigram);
            actual = tokens.ToList();
            expected = new[] {"<s0>", "this", "is", "a", "test", "</s>"}.ToList();
            model.AddStartStopSymbols(actual);
            CollectionAssert.AreEqual(expected, actual);

            model = new NGramModel(Trigram);
            actual = tokens.ToList();
            expected = new[] {"<s1>", "<s0>", "this", "is", "a", "test", "</s>"}.ToList();
            model.AddStartStopSymbols(actual);
            CollectionAssert.AreEqual(expected, actual);

            model = new NGramModel(4);
            actual = tokens.ToList();
            expected = new[] {"<s2>", "<s1>", "<s0>", "this", "is", "a", "test", "</s>"}.ToList();
            model.AddStartStopSymbols(actual);
            CollectionAssert.AreEqual(expected, actual);
        }