public void ComputeTest2()
{
// Some sample texts
string[] spamTokens = Tokenize(@"I decided to sign up for the Disney Half Marathon. Half of a marathon is 13.1 miles. A full marathon is 26.2 miles. You may wonder why the strange number of miles. “26.2” is certainly not an even number. And after running 26 miles who cares about the point two? You might think that 26.2 miles is a whole number of kilometers. It isn’t. In fact, it is even worse in kilometers – 42.1648128. I bet you don’t see many t-shirts in England with that number printed on the front.");
string[] loremTokens = Tokenize(@"Lorem ipsum dolor sit amet, Nulla nec tortor. Donec id elit quis purus consectetur consequat. Nam congue semper tellus. Sed erat dolor, dapibus sit amet, venenatis ornare, ultrices ut, nisi. Aliquam ante. Suspendisse scelerisque dui nec velit. Duis augue augue, gravida euismod, vulputate ac, facilisis id, sem. Morbi in orci. Nulla purus lacus, pulvinar vel, malesuada ac, mattis nec, quam. Nam molestie scelerisque quam. Nullam feugiat cursus lacus.orem ipsum dolor sit amet.");
// Their respective classes
string[] classes = { "spam", "lorem" };
// Create a new Bag-of-Words for the texts
BagOfWords bow = new BagOfWords(spamTokens, loremTokens)
{
// Limit the maximum number of occurrences in
// the feature vector to a single instance
MaximumOccurance = 1
};
// Define the symbols for the Naïve Bayes
int[] symbols = new int[bow.NumberOfWords];
for (int i = 0; i < symbols.Length; i++)
symbols[i] = bow.MaximumOccurance + 1;
// Create input and outputs for training
int[][] inputs =
{
bow.GetFeatureVector(spamTokens),
bow.GetFeatureVector(loremTokens)
};
int[] outputs =
{
0, // spam
1, // lorem
};
// Create the naïve Bayes model
NaiveBayes bayes = new NaiveBayes(2, symbols);
for (int i = 0; i < bayes.ClassCount; i++)
for (int j = 0; j < bayes.SymbolCount.Length; j++)
for (int k = 0; k < bayes.SymbolCount[j]; k++)
bayes.Distributions[i, j][k] = 1e-10;
// Estimate the model
bayes.Estimate(inputs, outputs);
// Initialize with prior probabilities
for (int i = 0; i < bayes.ClassCount; i++)
for (int j = 0; j < bayes.SymbolCount.Length; j++)
{
double sum = bayes.Distributions[i, j].Sum();
Assert.AreEqual(1, sum, 1e-5);
}
// Consume the model
{
// First an example to classify as lorem
int[] input = bow.GetFeatureVector(loremTokens);
int answer = bayes.Compute(input);
string result = classes[answer];
Assert.AreEqual("lorem", result);
}
{
// Then an example to classify as spam
int[] input = bow.GetFeatureVector(spamTokens);
int answer = bayes.Compute(input);
string result = classes[answer];
Assert.AreEqual("spam", result);
}
}