numl.MultiClassLearner.GenerateModel C# (CSharp) Method

GenerateModel() private static method

Generates and returns a new Tuple of objects: IClassifier, Score and object state
private static GenerateModel ( IGenerator generator, object truthExamples, object falseExamples, object truthLabel, double trainingPct, object state = null ) : Tuple
generator IGenerator Generator to use for the model.
truthExamples object True examples.
falseExamples object False examples.
truthLabel object Truth label object.
trainingPct double Training percentage.
state object Object state
return Tuple
        private static Tuple<IClassifier, Score, object> GenerateModel(IGenerator generator, object[] truthExamples, object[] falseExamples, 
                                                                                object truthLabel, double trainingPct, object state = null)
        {
            Descriptor descriptor = generator.Descriptor;

            object[] examples = truthExamples.Union(falseExamples).Shuffle().ToArray(); // changed from .Shuffle()

            int total = examples.Count();

            int trainingCount = (int)System.Math.Floor((double)total * trainingPct);

            //// 100 - trainingPercentage for testing
            int[] testingSlice = Learner.GetTestPoints(total - trainingCount, total).ToArray();
            int[] trainingSlice = Learner.GetTrainingPoints(testingSlice, total).ToArray();

            var training = generator.Descriptor.Convert(examples.Slice(trainingSlice).ToArray(), true).ToExamples();

            // convert label to 1's and 0's
            Vector y = MultiClassLearner.ChangeClassLabels(examples.ToArray(), descriptor, truthLabel);

            IModel model = generator.Generate(training.Item1, y.Slice(trainingSlice));

            Score score = new Score();

            if (testingSlice.Count() > 0)
            {
                object[] testExamples = examples.Slice(testingSlice).ToArray();
                var testing = generator.Descriptor.Convert(testExamples, true).ToExamples();

                Vector y_pred = new Vector(testExamples.Length);

                // make sure labels are 1 / 0 based
                Vector y_test = MultiClassLearner.ChangeClassLabels(testExamples.ToArray(), descriptor, truthLabel);

                for (int i = 0; i < testExamples.Length; i++)
                {
                    double result = model.Predict(testing.Item1[i, VectorType.Row]);

                    y_pred[i] = result;
                }

                score = Score.ScorePredictions(y_pred, y_test);
            }
            return new Tuple<IClassifier, Score, object>((IClassifier)model, score, state);
        }

Usage Example

Example #1
0
        /// <summary>
        /// Generate a multi-class classification model using a specialist classifier for each class label.
        /// </summary>
        /// <param name="generator">The generator to use for each individual classifier.</param>
        /// <param name="examples">Training examples of any number of classes</param>
        /// <param name="trainingPercentage">Percentage of training examples to use, i.e. 70% = 0.7</param>
        /// <param name="mixingPercentage">Percentage to mix positive and negative exmaples, i.e. 50% will add an additional 50% of
        ///   <paramref name="trainingPercentage"/> of negative examples into each classifier when training.</param>
        /// <param name="isMultiClass">Determines whether each class is mutually inclusive.
        ///   <para>For example: If True, each class takes on a number of classes and does not necessarily belong to one specific class.</para>
        ///   <para>The ouput would then be a number of predicted classes for a single prediction.  E.g. A song would be True as it may belong to classes: vocals, rock as well as bass.</para>
        /// </param>
        /// <returns></returns>
        public static ClassificationModel Learn(IGenerator generator, IEnumerable <object> examples, double trainingPercentage, double mixingPercentage = 0.5, bool isMultiClass = true)
        {
            Descriptor descriptor = generator.Descriptor;

            trainingPercentage = (trainingPercentage > 1.0 ? trainingPercentage / 100 : trainingPercentage);
            mixingPercentage   = (mixingPercentage > 1.0 ? mixingPercentage / 100 : mixingPercentage);

            var classGroups = examples.Select(s => new
            {
                Label = generator.Descriptor.GetValue(s, descriptor.Label),
                Item  = s
            })
                              .GroupBy(g => g.Label)
                              .ToDictionary(k => k.Key, v => v.Select(s => s.Item).ToArray());

            int classes = classGroups.Count();

            Dictionary <object, IClassifier> models = null;

            Score finalScore = new Score();

            if (classes > 2)
            {
                models = new Dictionary <object, IClassifier>(classes);

                Task <Tuple <IClassifier, Score, object> >[] learningTasks = new Task <Tuple <IClassifier, Score, object> > [classes];

                for (int y = 0; y < classes; y++)
                {
                    models.Add(classGroups.ElementAt(y).Key, null);

                    int      mix           = (int)System.Math.Ceiling(((classGroups.ElementAt(y).Value.Count() * trainingPercentage) * mixingPercentage) / classes);
                    object   label         = classGroups.ElementAt(y).Key;
                    object[] truthExamples = classGroups.ElementAt(y).Value;
                    object[] falseExamples = classGroups.Where(w => w.Key != classGroups.Keys.ElementAt(y))
                                             .SelectMany(s => s.Value.Take(mix).ToArray())
                                             .ToArray();

                    learningTasks[y] = Task.Factory.StartNew(
                        () => MultiClassLearner.GenerateModel(generator, truthExamples, falseExamples, label, trainingPercentage, label)
                        );
                }

                Task.WaitAll(learningTasks);

                Score[] scores = new Score[learningTasks.Count()];

                for (int c = 0; c < learningTasks.Count(); c++)
                {
                    models[learningTasks[c].Result.Item3] = learningTasks[c].Result.Item1;
                    scores[c] = learningTasks[c].Result.Item2;
                }

                finalScore = Score.CombineScores(scores);
            }
            else
            {
                // fallback to single classifier for two class classification

                var dataset   = descriptor.Convert(examples, true).ToExamples();
                var positives = examples.Slice(dataset.Y.Indices(f => f == 1d)).ToArray();
                var negatives = examples.Slice(dataset.Y.Indices(w => w != 1d)).ToArray();

                var label = generator.Descriptor.GetValue(positives.First(), descriptor.Label);

                var model = MultiClassLearner.GenerateModel(generator, positives, negatives, label, trainingPercentage, label);
                finalScore = model.Item2;

                models = new Dictionary <object, IClassifier>()
                {
                    { label, model.Item1 }
                };
            }

            ClassificationModel classificationModel = new ClassificationModel()
            {
                Generator    = generator,
                Classifiers  = models,
                IsMultiClass = isMultiClass,
                Score        = finalScore
            };

            return(classificationModel);
        }