public override IModel Generate(Matrix X, Vector y)
{
if (Descriptor == null)
throw new InvalidOperationException("Cannot build naive bayes model without type knowledge!");
// create answer probabilities
if (!Descriptor.Label.Discrete)
throw new InvalidOperationException("Need to use regression for non-discrete labels!");
this.Preprocess(X);
// compute Y probabilities
Statistic[] statistics = GetLabelStats(y);
Measure root = new Measure
{
Discrete = true,
Label = Descriptor.Label.Name,
Probabilities = statistics
};
// collect feature ranges
Measure[] features = GetBaseConditionals(X);
// compute conditional counts
for (int i = 0; i < y.Length; i++)
{
var stat = statistics.Where(s => s.X.Min == y[i]).First();
if (stat.Conditionals == null)
stat.Conditionals = CloneMeasure(features);
for (int j = 0; j < X.Cols; j++)
{
var s = stat.Conditionals[j];
s.Increment(X[i, j]);
}
}
// normalize into probabilities
for (int i = 0; i < statistics.Length; i++)
{
var cond = statistics[i];
for (int j = 0; j < cond.Conditionals.Length; j++)
cond.Conditionals[j].Normalize();
}
// label ids
LabelIds(root);
return new NaiveBayesModel
{
Descriptor = Descriptor,
NormalizeFeatures = base.NormalizeFeatures,
FeatureNormalizer = base.FeatureNormalizer,
FeatureProperties = base.FeatureProperties,
Root = root
};
}