public void LargeRunTest()
{
#region doc_nursery
// This example uses the Nursery Database available from the University of
// California Irvine repository of machine learning databases, available at
//
// http://archive.ics.uci.edu/ml/machine-learning-databases/nursery/nursery.names
//
// The description paragraph is listed as follows.
//
// Nursery Database was derived from a hierarchical decision model
// originally developed to rank applications for nursery schools. It
// was used during several years in 1980's when there was excessive
// enrollment to these schools in Ljubljana, Slovenia, and the
// rejected applications frequently needed an objective
// explanation. The final decision depended on three subproblems:
// occupation of parents and child's nursery, family structure and
// financial standing, and social and health picture of the family.
// The model was developed within expert system shell for decision
// making DEX (M. Bohanec, V. Rajkovic: Expert system for decision
// making. Sistemica 1(1), pp. 145-157, 1990.).
//
// Let's begin by loading the raw data. This string variable contains
// the contents of the nursery.data file as a single, continuous text.
//
string nurseryData = Resources.nursery;
// Those are the input columns available in the data
//
string[] inputColumns =
{
"parents", "has_nurs", "form", "children",
"housing", "finance", "social", "health"
};
// And this is the output, the last column of the data.
//
string outputColumn = "output";
// Let's populate a data table with this information.
//
DataTable table = new DataTable("Nursery");
table.Columns.Add(inputColumns);
table.Columns.Add(outputColumn);
string[] lines = nurseryData.Split(
new[] { Environment.NewLine }, StringSplitOptions.None);
foreach (var line in lines)
table.Rows.Add(line.Split(','));
// Now, we have to convert the textual, categorical data found
// in the table to a more manageable discrete representation.
//
// For this, we will create a codebook to translate text to
// discrete integer symbols:
//
Codification codebook = new Codification(table);
// And then convert all data into symbols
//
DataTable symbols = codebook.Apply(table);
double[][] inputs = symbols.ToArray(inputColumns);
int[] outputs = symbols.ToArray<int>(outputColumn);
// From now on, we can start creating the decision tree.
//
var attributes = DecisionVariable.FromCodebook(codebook, inputColumns);
DecisionTree tree = new DecisionTree(attributes, classes: 5);
// Now, let's create the C4.5 algorithm
C45Learning c45 = new C45Learning(tree);
// and learn a decision tree. The value of
// the error variable below should be 0.
//
double error = c45.Run(inputs, outputs);
// To compute a decision for one of the input points,
// such as the 25-th example in the set, we can use
//
int y = tree.Compute(inputs[25]);
#endregion
Assert.AreEqual(0, error);
for (int i = 0; i < inputs.Length; i++)
{
int expected = outputs[i];
int actual = tree.Compute(inputs[i]);
Assert.AreEqual(expected, actual);
}
#if !NET35
// Finally, we can also convert our tree to a native
// function, improving efficiency considerably, with
//
Func<double[], int> func = tree.ToExpression().Compile();
// Again, to compute a new decision, we can just use
//
int z = func(inputs[25]);
for (int i = 0; i < inputs.Length; i++)
{
int expected = outputs[i];
int actual = func(inputs[i]);
Assert.AreEqual(expected, actual);
}
#endif
}