/// <summary>
/// Generate a multi-class classification model using a specialist classifier for each class label.
/// </summary>
/// <param name="generator">The generator to use for each individual classifier.</param>
/// <param name="examples">Training examples of any number of classes</param>
/// <param name="trainingPercentage">Percentage of training examples to use, i.e. 70% = 0.7</param>
/// <param name="mixingPercentage">Percentage to mix positive and negative exmaples, i.e. 50% will add an additional 50% of
/// <paramref name="trainingPercentage"/> of negative examples into each classifier when training.</param>
/// <param name="isMultiClass">Determines whether each class is mutually inclusive.
/// <para>For example: If True, each class takes on a number of classes and does not necessarily belong to one specific class.</para>
/// <para>The ouput would then be a number of predicted classes for a single prediction. E.g. A song would be True as it may belong to classes: vocals, rock as well as bass.</para>
/// </param>
/// <returns></returns>
public static ClassificationModel Learn(IGenerator generator, IEnumerable <object> examples, double trainingPercentage, double mixingPercentage = 0.5, bool isMultiClass = true)
{
Descriptor descriptor = generator.Descriptor;
trainingPercentage = (trainingPercentage > 1.0 ? trainingPercentage / 100 : trainingPercentage);
mixingPercentage = (mixingPercentage > 1.0 ? mixingPercentage / 100 : mixingPercentage);
var classGroups = examples.Select(s => new
{
Label = generator.Descriptor.GetValue(s, descriptor.Label),
Item = s
})
.GroupBy(g => g.Label)
.ToDictionary(k => k.Key, v => v.Select(s => s.Item).ToArray());
int classes = classGroups.Count();
Dictionary <object, IClassifier> models = null;
Score finalScore = new Score();
if (classes > 2)
{
models = new Dictionary <object, IClassifier>(classes);
Task <Tuple <IClassifier, Score, object> >[] learningTasks = new Task <Tuple <IClassifier, Score, object> > [classes];
for (int y = 0; y < classes; y++)
{
models.Add(classGroups.ElementAt(y).Key, null);
int mix = (int)System.Math.Ceiling(((classGroups.ElementAt(y).Value.Count() * trainingPercentage) * mixingPercentage) / classes);
object label = classGroups.ElementAt(y).Key;
object[] truthExamples = classGroups.ElementAt(y).Value;
object[] falseExamples = classGroups.Where(w => w.Key != classGroups.Keys.ElementAt(y))
.SelectMany(s => s.Value.Take(mix).ToArray())
.ToArray();
learningTasks[y] = Task.Factory.StartNew(
() => MultiClassLearner.GenerateModel(generator, truthExamples, falseExamples, label, trainingPercentage, label)
);
}
Task.WaitAll(learningTasks);
Score[] scores = new Score[learningTasks.Count()];
for (int c = 0; c < learningTasks.Count(); c++)
{
models[learningTasks[c].Result.Item3] = learningTasks[c].Result.Item1;
scores[c] = learningTasks[c].Result.Item2;
}
finalScore = Score.CombineScores(scores);
}
else
{
// fallback to single classifier for two class classification
var dataset = descriptor.Convert(examples, true).ToExamples();
var positives = examples.Slice(dataset.Y.Indices(f => f == 1d)).ToArray();
var negatives = examples.Slice(dataset.Y.Indices(w => w != 1d)).ToArray();
var label = generator.Descriptor.GetValue(positives.First(), descriptor.Label);
var model = MultiClassLearner.GenerateModel(generator, positives, negatives, label, trainingPercentage, label);
finalScore = model.Item2;
models = new Dictionary <object, IClassifier>()
{
{ label, model.Item1 }
};
}
ClassificationModel classificationModel = new ClassificationModel()
{
Generator = generator,
Classifiers = models,
IsMultiClass = isMultiClass,
Score = finalScore
};
return(classificationModel);
}