public void learn_success()
{
#region doc_learn_2
// Reproducing Lindsay Smith's "Tutorial on Principal Component Analysis"
// using the framework's default method. The tutorial can be found online
// at http://www.sccg.sk/~haladova/principal_components.pdf
// Step 1. Get some data
// ---------------------
double[][] data =
{
new[] { 2.5, 2.4 },
new[] { 0.5, 0.7 },
new[] { 2.2, 2.9 },
new[] { 1.9, 2.2 },
new[] { 3.1, 3.0 },
new[] { 2.3, 2.7 },
new[] { 2.0, 1.6 },
new[] { 1.0, 1.1 },
new[] { 1.5, 1.6 },
new[] { 1.1, 0.9 }
};
// Step 2. Subtract the mean
// -------------------------
// Note: The framework does this automatically. By default, the framework
// uses the "Center" method, which only subtracts the mean. However, it is
// also possible to remove the mean *and* divide by the standard deviation
// (thus performing the correlation method) by specifying "Standardize"
// instead of "Center" as the AnalysisMethod.
var method = PrincipalComponentMethod.Center;
// var method = PrincipalComponentMethod.Standardize
// Step 3. Compute the covariance matrix
// -------------------------------------
// Note: Accord.NET does not need to compute the covariance
// matrix in order to compute PCA. The framework uses the SVD
// method which is more numerically stable, but may require
// more processing or memory. In order to replicate the tutorial
// using covariance matrices, please see the next unit test.
// Create the analysis using the selected method
var pca = new PrincipalComponentAnalysis(method);
// Compute it
pca.Learn(data);
// Step 4. Compute the eigenvectors and eigenvalues of the covariance matrix
// -------------------------------------------------------------------------
// Note: Since Accord.NET uses the SVD method rather than the Eigendecomposition
// method, the Eigenvalues are computed from the singular values. However, it is
// not the Eigenvalues themselves which are important, but rather their proportion:
// Those are the expected eigenvalues, in descending order:
double[] eigenvalues = { 1.28402771, 0.0490833989 };
// And this will be their proportion:
double[] proportion = eigenvalues.Divide(eigenvalues.Sum());
// Those are the expected eigenvectors,
// in descending order of eigenvalues:
double[,] eigenvectors =
{
{ -0.677873399, -0.735178656 },
{ -0.735178656, 0.677873399 }
};
// Now, here is the place most users get confused. The fact is that
// the Eigenvalue decomposition (EVD) is not unique, and both the SVD
// and EVD routines used by the framework produces results which are
// numerically different from packages such as STATA or MATLAB, but
// those are correct.
// If v is an eigenvector, a multiple of this eigenvector (such as a*v, with
// a being a scalar) will also be an eigenvector. In the Lindsay case, the
// framework produces a first eigenvector with inverted signs. This is the same
// as considering a=-1 and taking a*v. The result is still correct.
// Retrieve the first expected eigenvector
double[] v = eigenvectors.GetColumn(0);
// Multiply by a scalar and store it back
eigenvectors.SetColumn(0, v.Multiply(-1));
// Everything is alright (up to the 9 decimal places shown in the tutorial)
Assert.IsTrue(eigenvectors.IsEqual(pca.ComponentMatrix, rtol: 1e-9));
Assert.IsTrue(proportion.IsEqual(pca.ComponentProportions, rtol: 1e-9));
Assert.IsTrue(eigenvalues.IsEqual(pca.Eigenvalues, rtol: 1e-5));
// Step 5. Deriving the new data set
// ---------------------------------
double[][] actual = pca.Transform(data);
// transformedData shown in pg. 18
double[,] expected = new double[,]
{
{ 0.827970186, -0.175115307 },
{ -1.77758033, 0.142857227 },
{ 0.992197494, 0.384374989 },
{ 0.274210416, 0.130417207 },
{ 1.67580142, -0.209498461 },
{ 0.912949103, 0.175282444 },
{ -0.099109437, -0.349824698 },
{ -1.14457216, 0.046417258 },
{ -0.438046137, 0.017764629 },
{ -1.22382056, -0.162675287 },
};
// Everything is correct (up to 8 decimal places)
Assert.IsTrue(expected.IsEqual(actual, atol: 1e-8));
// Let's say we would like to project down to one
// principal component. It suffices to set:
pca.NumberOfOutputs = 1;
// And then do the transform
actual = pca.Transform(data);
// transformedData shown in pg. 18
expected = new double[,]
{
{ 0.827970186 },
{ -1.77758033, },
{ 0.992197494 },
{ 0.274210416 },
{ 1.67580142, },
{ 0.912949103 },
{ -0.099109437 },
{ -1.14457216, },
{ -0.438046137 },
{ -1.22382056, },
};
// Everything is correct (up to 8 decimal places)
Assert.IsTrue(expected.IsEqual(actual, atol: 1e-8));
#endregion
// Create the analysis using the selected method
pca = new PrincipalComponentAnalysis(method, numberOfOutputs: 1);
// Compute it
pca.Learn(data);
actual = pca.Transform(data);
// transformedData shown in pg. 18
expected = new double[,]
{
{ 0.827970186 },
{ -1.77758033, },
{ 0.992197494 },
{ 0.274210416 },
{ 1.67580142, },
{ 0.912949103 },
{ -0.099109437 },
{ -1.14457216, },
{ -0.438046137 },
{ -1.22382056, },
};
// Everything is correct (up to 8 decimal places)
Assert.IsTrue(expected.IsEqual(actual, atol: 1e-8));
}