public void learn_success()
{
#region doc_learn_1
// Reproducing Lindsay Smith's "Tutorial on Principal Component Analysis"
// using the framework's default method. The tutorial can be found online
// at http://www.sccg.sk/~haladova/principal_components.pdf
// Step 1. Get some data
// ---------------------
double[][] data =
{
new double[] { 2.5, 2.4 },
new double[] { 0.5, 0.7 },
new double[] { 2.2, 2.9 },
new double[] { 1.9, 2.2 },
new double[] { 3.1, 3.0 },
new double[] { 2.3, 2.7 },
new double[] { 2.0, 1.6 },
new double[] { 1.0, 1.1 },
new double[] { 1.5, 1.6 },
new double[] { 1.1, 0.9 }
};
// Step 2. Subtract the mean
// -------------------------
// Note: The framework does this automatically. By default, the framework
// uses the "Center" method, which only subtracts the mean. However, it is
// also possible to remove the mean *and* divide by the standard deviation
// (thus performing the correlation method) by specifying "Standardize"
// instead of "Center" as the AnalysisMethod.
var method = PrincipalComponentMethod.Center; // PrincipalComponentMethod.Standardize
// Step 3. Compute the covariance matrix
// -------------------------------------
// Note: Accord.NET does not need to compute the covariance
// matrix in order to compute PCA. The framework uses the SVD
// method which is more numerically stable, but may require
// more processing or memory. In order to replicate the tutorial
// using covariance matrices, please see the next unit test.
// Create the analysis using the selected method
var pca = new KernelPrincipalComponentAnalysis(new Linear(), method);
// Compute it
pca.Learn(data);
// Step 4. Compute the eigenvectors and eigenvalues of the covariance matrix
// -------------------------------------------------------------------------
// Note: Since Accord.NET uses the SVD method rather than the Eigendecomposition
// method, the Eigenvalues are computed from the singular values. However, it is
// not the Eigenvalues themselves which are important, but rather their proportion:
// Those are the expected eigenvalues, in descending order:
double[] eigenvalues = { 1.28402771, 0.0490833989 };
// And this will be their proportion:
double[] proportion = eigenvalues.Divide(eigenvalues.Sum());
Assert.IsTrue(proportion.IsEqual(pca.ComponentProportions, rtol: 1e-9));
Assert.IsTrue(eigenvalues.IsEqual(pca.Eigenvalues.Divide(data.GetLength(0) - 1), rtol: 1e-5));
// Step 5. Deriving the new data set
// ---------------------------------
double[][] actual = pca.Transform(data);
// transformedData shown in pg. 18
double[,] expected = new double[,]
{
{ 0.827970186, -0.175115307 },
{ -1.77758033, 0.142857227 },
{ 0.992197494, 0.384374989 },
{ 0.274210416, 0.130417207 },
{ 1.67580142, -0.209498461 },
{ 0.912949103, 0.175282444 },
{ -0.099109437, -0.349824698 },
{ -1.14457216, 0.046417258 },
{ -0.438046137, 0.017764629 },
{ -1.22382056, -0.162675287 },
}.Multiply(-1);
// Everything is correct (up to 8 decimal places)
Assert.IsTrue(expected.IsEqual(actual, atol: 1e-8));
// Finally, we can project all the data
double[][] output1 = pca.Transform(data);
// Or just its first components by setting
// NumberOfOutputs to the desired components:
pca.NumberOfOutputs = 1;
// And then calling transform again:
double[][] output2 = pca.Transform(data);
// We can also limit to 80% of explained variance:
pca.ExplainedVariance = 0.8;
// And then call transform again:
double[][] output3 = pca.Transform(data);
#endregion
actual = pca.Transform(data);
// transformedData shown in pg. 18
expected = new double[,]
{
{ 0.827970186 },
{ -1.77758033, },
{ 0.992197494 },
{ 0.274210416 },
{ 1.67580142, },
{ 0.912949103 },
{ -0.099109437 },
{ -1.14457216, },
{ -0.438046137 },
{ -1.22382056, },
}.Multiply(-1);
// Everything is correct (up to 8 decimal places)
Assert.IsTrue(expected.IsEqual(actual, atol: 1e-8));
// Create the analysis using the selected method
pca = new KernelPrincipalComponentAnalysis()
{
Kernel = new Linear(),
Method = method,
NumberOfOutputs = 1
};
// Compute it
pca.Learn(data);
actual = pca.Transform(data);
// transformedData shown in pg. 18
expected = new double[,]
{
{ 0.827970186 },
{ -1.77758033, },
{ 0.992197494 },
{ 0.274210416 },
{ 1.67580142, },
{ 0.912949103 },
{ -0.099109437 },
{ -1.14457216, },
{ -0.438046137 },
{ -1.22382056, },
}.Multiply(-1);
// Everything is correct (up to 8 decimal places)
Assert.IsTrue(expected.IsEqual(actual, atol: 1e-8));
}