public void GenerateTest2()
{
Accord.Math.Tools.SetupGenerator(42);
// Consider some phrases:
//
string[][] phrases =
{
new[] { "those", "are", "sample", "words", "from", "a", "dictionary" },
new[] { "those", "are", "sample", "words" },
new[] { "sample", "words", "are", "words" },
new[] { "those", "words" },
new[] { "those", "are", "words" },
new[] { "words", "from", "a", "dictionary" },
new[] { "those", "are", "words", "from", "a", "dictionary" }
};
// Let's begin by transforming them to sequence of
// integer labels using a codification codebook:
var codebook = new Codification("Words", phrases);
// Now we can create the training data for the models:
int[][] sequence = codebook.Translate("Words", phrases);
// To create the models, we will specify a forward topology,
// as the sequences have definite start and ending points.
//
var topology = new Forward(states: 4);
int symbols = codebook["Words"].Symbols; // We have 7 different words
// Create the hidden Markov model
HiddenMarkovModel hmm = new HiddenMarkovModel(topology, symbols);
// Create the learning algorithm
BaumWelchLearning teacher = new BaumWelchLearning(hmm);
// Teach the model about the phrases
double error = teacher.Run(sequence);
// Now, we can ask the model to generate new samples
// from the word distributions it has just learned:
//
int[] sample = hmm.Generate(3);
// And the result will be: "those", "are", "words".
string[] result = codebook.Translate("Words", sample);
Assert.AreEqual("those", result[0]);
Assert.AreEqual("are", result[1]);
Assert.AreEqual("words", result[2]);
}