public void SimpleGestureRecognitionTest()
{
// Let's say we would like to do a very simple mechanism for
// gesture recognition. In this example, we will be trying to
// create a classifier that can distinguish between the words
// "hello", "car", and "wardrobe".
// Let's say we decided to acquire some data, and we asked some
// people to perform those words in front of a Kinect camera, and,
// using Microsoft's SDK, we were able to captured the x and y
// coordinates of each hand while the word was being performed.
// Let's say we decided to represent our frames as:
//
// double[] frame = { leftHandX, leftHandY, rightHandX, rightHandY };
//
// Since we captured words, this means we captured sequences of
// frames as we described above. Let's write some of those as
// rough examples to explain how gesture recognition can be done:
double[][] hello =
{
new double[] { 1.0, 0.1, 0.0, 0.0 }, // let's say the word
new double[] { 0.0, 1.0, 0.1, 0.1 }, // hello took 6 frames
new double[] { 0.0, 1.0, 0.1, 0.1 }, // to be recorded.
new double[] { 0.0, 0.0, 1.0, 0.0 },
new double[] { 0.0, 0.0, 1.0, 0.0 },
new double[] { 0.0, 0.0, 0.1, 1.1 },
};
double[][] car =
{
new double[] { 0.0, 0.0, 0.0, 1.0 }, // the car word
new double[] { 0.1, 0.0, 1.0, 0.1 }, // took only 4.
new double[] { 0.0, 0.0, 0.1, 0.0 },
new double[] { 1.0, 0.0, 0.0, 0.0 },
};
double[][] wardrobe =
{
new double[] { 0.0, 0.0, 1.0, 0.0 }, // same for the
new double[] { 0.1, 0.0, 1.0, 0.1 }, // wardrobe word.
new double[] { 0.0, 0.1, 1.0, 0.0 },
new double[] { 0.1, 0.0, 1.0, 0.1 },
};
// Here, please note that a real-world example would involve *lots*
// of samples for each word. Here, we are considering just one from
// each class which is clearly sub-optimal and should _never_ be done
// on practice. For example purposes, however, please disregard this.
// Those are the words we have in our vocabulary:
//
double[][][] words = { hello, car, wardrobe };
// Now, let's associate integer labels with them. This is needed
// for the case where there are multiple samples for each word.
//
int[] labels = { 0, 1, 2 };
// We will create our classifiers assuming an independent
// Gaussian distribution for each component in our feature
// vectors (like assuming a Naive Bayes assumption).
var initial = new Independent<NormalDistribution>
(
new NormalDistribution(0, 1),
new NormalDistribution(0, 1),
new NormalDistribution(0, 1),
new NormalDistribution(0, 1)
);
// Now, we can proceed and create our classifier.
//
int numberOfWords = 3; // we are trying to distinguish between 3 words
int numberOfStates = 5; // this value can be found by trial-and-error
var hmm = new HiddenMarkovClassifier<Independent<NormalDistribution>>
(
classes: numberOfWords,
topology: new Forward(numberOfStates), // word classifiers should use a forward topology
initial: initial
);
// Create a new learning algorithm to train the sequence classifier
var teacher = new HiddenMarkovClassifierLearning<Independent<NormalDistribution>>(hmm,
// Train each model until the log-likelihood changes less than 0.001
modelIndex => new BaumWelchLearning<Independent<NormalDistribution>>(hmm.Models[modelIndex])
{
Tolerance = 0.001,
Iterations = 100,
// This is necessary so the code doesn't blow up when it realize
// there is only one sample per word class. But this could also be
// needed in normal situations as well.
//
FittingOptions = new IndependentOptions()
{
InnerOption = new NormalOptions() { Regularization = 1e-5 }
}
}
);
// Finally, we can run the learning algorithm!
double logLikelihood = teacher.Run(words, labels);
// At this point, the classifier should be successfully
// able to distinguish between our three word classes:
//
int tc1 = hmm.Compute(hello);
int tc2 = hmm.Compute(car);
int tc3 = hmm.Compute(wardrobe);
Assert.AreEqual(0, tc1);
Assert.AreEqual(1, tc2);
Assert.AreEqual(2, tc3);
// Now, we can use the Markov classifier to initialize a HCRF
var function = new MarkovMultivariateFunction(hmm);
var hcrf = new HiddenConditionalRandomField<double[]>(function);
// We can check that both are equivalent, although they have
// formulations that can be learned with different methods
//
for (int i = 0; i < words.Length; i++)
{
// Should be the same
int expected = hmm.Compute(words[i]);
int actual = hcrf.Compute(words[i]);
// Should be the same
double h0 = hmm.LogLikelihood(words[i], 0);
double c0 = hcrf.LogLikelihood(words[i], 0);
double h1 = hmm.LogLikelihood(words[i], 1);
double c1 = hcrf.LogLikelihood(words[i], 1);
double h2 = hmm.LogLikelihood(words[i], 2);
double c2 = hcrf.LogLikelihood(words[i], 2);
Assert.AreEqual(expected, actual);
Assert.AreEqual(h0, c0, 1e-10);
Assert.IsTrue(h1.IsRelativelyEqual(c1, 1e-10));
Assert.IsTrue(h2.IsRelativelyEqual(c2, 1e-10));
Assert.IsFalse(double.IsNaN(c0));
Assert.IsFalse(double.IsNaN(c1));
Assert.IsFalse(double.IsNaN(c2));
}
// Now we can learn the HCRF using one of the best learning
// algorithms available, Resilient Backpropagation learning:
// Create a learning algorithm
var rprop = new HiddenResilientGradientLearning<double[]>(hcrf)
{
Iterations = 50,
Tolerance = 1e-5
};
// Run the algorithm and learn the models
double error = rprop.Run(words, labels);
// At this point, the HCRF should be successfully
// able to distinguish between our three word classes:
//
int hc1 = hcrf.Compute(hello);
int hc2 = hcrf.Compute(car);
int hc3 = hcrf.Compute(wardrobe);
Assert.AreEqual(0, hc1);
Assert.AreEqual(1, hc2);
Assert.AreEqual(2, hc3);
}