public void learn_test_mitchell()
{
#region doc_mitchell_1
// We will represent Mitchell's Tennis example using a DataTable. However,
// the use of a DataTable is not required in order to use the Naive Bayes.
// Please take a look at the other examples below for simpler approaches.
DataTable data = new DataTable("Mitchell's Tennis Example");
data.Columns.Add("Day", "Outlook", "Temperature", "Humidity", "Wind", "PlayTennis");
// We will set Temperature and Humidity to be continuous
data.Columns["Temperature"].DataType = typeof(double);
data.Columns["Humidity"].DataType = typeof(double);
// Add some data
data.Rows.Add("D1", "Sunny", 38.0, 96.0, "Weak", "No");
data.Rows.Add("D2", "Sunny", 39.0, 90.0, "Strong", "No");
data.Rows.Add("D3", "Overcast", 38.0, 75.0, "Weak", "Yes");
data.Rows.Add("D4", "Rain", 25.0, 87.0, "Weak", "Yes");
data.Rows.Add("D5", "Rain", 12.0, 30.0, "Weak", "Yes");
data.Rows.Add("D6", "Rain", 11.0, 35.0, "Strong", "No");
data.Rows.Add("D7", "Overcast", 10.0, 40.0, "Strong", "Yes");
data.Rows.Add("D8", "Sunny", 24.0, 90.0, "Weak", "No");
data.Rows.Add("D9", "Sunny", 12.0, 26.0, "Weak", "Yes");
data.Rows.Add("D10", "Rain", 25, 30.0, "Weak", "Yes");
data.Rows.Add("D11", "Sunny", 26.0, 40.0, "Strong", "Yes");
data.Rows.Add("D12", "Overcast", 27.0, 97.0, "Strong", "Yes");
data.Rows.Add("D13", "Overcast", 39.0, 41.0, "Weak", "Yes");
data.Rows.Add("D14", "Rain", 23.0, 98.0, "Strong", "No");
#endregion
#region doc_mitchell_2
// Create a new codification codebook to
// convert strings into discrete symbols
Codification codebook = new Codification(data);
#endregion
#region doc_mitchell_3
// Some distributions require constructor parameters, and as such, cannot
// be automatically initialized by the learning algorithm. For this reason,
// we might need to specify how each component should be initialized:
IUnivariateFittableDistribution[] priors =
{
new GeneralDiscreteDistribution(codebook["Outlook"].Symbols), // 3 possible values (Sunny, overcast, rain)
new NormalDistribution(), // Continuous value (Celsius)
new NormalDistribution(), // Continuous value (percentage)
new GeneralDiscreteDistribution(codebook["Wind"].Symbols) // 2 possible values (Weak, strong)
};
// Create a new Naive Bayes classifiers for the two classes
var learner = new NaiveBayesLearning<IUnivariateFittableDistribution>()
{
// Tell the learner how to initialize the distributions
Distribution = (classIndex, variableIndex) => priors[variableIndex]
};
// Extract symbols from data and train the classifier
DataTable symbols = codebook.Apply(data);
double[][] inputs = symbols.ToArray("Outlook", "Temperature", "Humidity", "Wind");
int[] outputs = symbols.ToArray<int>("PlayTennis");
// Learn the Naive Bayes model
var naiveBayes = learner.Learn(inputs, outputs);
#endregion
#region doc_mitchell_4
// Create an instance representing a "sunny, cool, humid and windy day":
double[] instance = new double[]
{
codebook.Translate(columnName:"Outlook", value:"Sunny"), //n 0
12.0,
90.0,
codebook.Translate(columnName:"Wind", value:"Strong") // 1
};
// We can obtain a class prediction using
int predicted = naiveBayes.Decide(instance);
// Or compute probabilities of each class using
double[] probabilities = naiveBayes.Probabilities(instance);
// Or obtain the log-likelihood of prediction
double ll = naiveBayes.LogLikelihood(instance);
// Finally, the result can be translated back using
string result = codebook.Translate("PlayTennis", predicted); // Should be "No"
#endregion
Assert.AreEqual("No", result);
Assert.AreEqual(0, predicted);
Assert.AreEqual(0.840, probabilities[0], 1e-3);
Assert.AreEqual(-10.493243476691351, ll, 1e-6);
Assert.AreEqual(1, probabilities.Sum(), 1e-10);
Assert.AreEqual(2, probabilities.Length);
}