Accord.Tests.MachineLearning.NaiveBayesTest.ComputeTest2 C# (CSharp) Method

ComputeTest2() private method

private ComputeTest2 ( ) : void
return void
        public void ComputeTest2()
        {
            // Some sample texts
            string[] spamTokens = Tokenize(@"I decided to sign up for the Disney Half Marathon. Half of a marathon is 13.1 miles. A full marathon is 26.2 miles. You may wonder why the strange number of miles. “26.2” is certainly not an even number. And after running 26 miles who cares about the point two? You might think that 26.2 miles is a whole number of kilometers. It isn’t. In fact, it is even worse in kilometers – 42.1648128. I bet you don’t see many t-shirts in England with that number printed on the front.");

            string[] loremTokens = Tokenize(@"Lorem ipsum dolor sit amet,  Nulla nec tortor. Donec id elit quis purus consectetur consequat. Nam congue semper tellus. Sed erat dolor, dapibus sit amet, venenatis ornare, ultrices ut, nisi. Aliquam ante. Suspendisse scelerisque dui nec velit. Duis augue augue, gravida euismod, vulputate ac, facilisis id, sem. Morbi in orci. Nulla purus lacus, pulvinar vel, malesuada ac, mattis nec, quam. Nam molestie scelerisque quam. Nullam feugiat cursus lacus.orem ipsum dolor sit amet.");

            // Their respective classes
            string[] classes = { "spam", "lorem" };


            // Create a new Bag-of-Words for the texts
            BagOfWords bow = new BagOfWords(spamTokens, loremTokens)
            {
                // Limit the maximum number of occurrences in 
                // the feature vector to a single instance
                MaximumOccurance = 1
            };

            // Define the symbols for the Naïve Bayes
            int[] symbols = new int[bow.NumberOfWords];
            for (int i = 0; i < symbols.Length; i++)
                symbols[i] = bow.MaximumOccurance + 1;

            // Create input and outputs for training
            int[][] inputs =
            {
                bow.GetFeatureVector(spamTokens),
                bow.GetFeatureVector(loremTokens)
            };

            int[] outputs =
            {
                0, // spam
                1, // lorem
            };

            // Create the naïve Bayes model
            NaiveBayes bayes = new NaiveBayes(2, symbols);

            for (int i = 0; i < bayes.ClassCount; i++)
                for (int j = 0; j < bayes.SymbolCount.Length; j++)
                    for (int k = 0; k < bayes.SymbolCount[j]; k++)
                        bayes.Distributions[i, j][k] = 1e-10;

            // Estimate the model
            bayes.Estimate(inputs, outputs);

            // Initialize with prior probabilities
            for (int i = 0; i < bayes.ClassCount; i++)
                for (int j = 0; j < bayes.SymbolCount.Length; j++)
                {
                    double sum = bayes.Distributions[i, j].Sum();
                    Assert.AreEqual(1, sum, 1e-5);
                }

            // Consume the model
            {
                // First an example to classify as lorem
                int[] input = bow.GetFeatureVector(loremTokens);
                int answer = bayes.Compute(input);
                string result = classes[answer];

                Assert.AreEqual("lorem", result);
            }

            {
                // Then an example to classify as spam
                int[] input = bow.GetFeatureVector(spamTokens);
                int answer = bayes.Compute(input);
                string result = classes[answer];

                Assert.AreEqual("spam", result);
            }

        }