public void ConstructorTest2()
{
// Reproducing Lindsay Smith's "Tutorial on Principal Component Analysis"
// using the paper's original method. The tutorial can be found online
// at http://www.sccg.sk/~haladova/principal_components.pdf
// Step 1. Get some data
// ---------------------
double[,] data =
{
{ 2.5, 2.4 },
{ 0.5, 0.7 },
{ 2.2, 2.9 },
{ 1.9, 2.2 },
{ 3.1, 3.0 },
{ 2.3, 2.7 },
{ 2.0, 1.6 },
{ 1.0, 1.1 },
{ 1.5, 1.6 },
{ 1.1, 0.9 }
};
// Step 2. Subtract the mean
// -------------------------
// Note: The framework does this automatically
// when computing the covariance matrix. In this
// step we will only compute the mean vector.
double[] mean = Measures.Mean(data, dimension: 0);
// Step 3. Compute the covariance matrix
// -------------------------------------
double[,] covariance = Measures.Covariance(data, mean);
// Create the analysis using the covariance matrix
var pca = PrincipalComponentAnalysis.FromCovarianceMatrix(mean, covariance);
// Compute it
pca.Compute();
// Step 4. Compute the eigenvectors and eigenvalues of the covariance matrix
//--------------------------------------------------------------------------
// Those are the expected eigenvalues, in descending order:
double[] eigenvalues = { 1.28402771, 0.0490833989 };
// And this will be their proportion:
double[] proportion = eigenvalues.Divide(eigenvalues.Sum());
// Those are the expected eigenvectors,
// in descending order of eigenvalues:
double[,] eigenvectors =
{
{ -0.677873399, -0.735178656 },
{ -0.735178656, 0.677873399 }
};
// Now, here is the place most users get confused. The fact is that
// the Eigenvalue decomposition (EVD) is not unique, and both the SVD
// and EVD routines used by the framework produces results which are
// numerically different from packages such as STATA or MATLAB, but
// those are correct.
// If v is an eigenvector, a multiple of this eigenvector (such as a*v, with
// a being a scalar) will also be an eigenvector. In the Lindsay case, the
// framework produces a first eigenvector with inverted signs. This is the same
// as considering a=-1 and taking a*v. The result is still correct.
// Retrieve the first expected eigenvector
double[] v = eigenvectors.GetColumn(0);
// Multiply by a scalar and store it back
eigenvectors.SetColumn(0, v.Multiply(-1));
// Everything is alright (up to the 9 decimal places shown in the tutorial)
Assert.IsTrue(eigenvectors.IsEqual(pca.ComponentMatrix, rtol: 1e-9));
Assert.IsTrue(proportion.IsEqual(pca.ComponentProportions, rtol: 1e-9));
Assert.IsTrue(eigenvalues.IsEqual(pca.Eigenvalues, rtol: 1e-8));
// Step 5. Deriving the new data set
// ---------------------------------
double[,] actual = pca.Transform(data);
// transformedData shown in pg. 18
double[,] expected = new double[,]
{
{ 0.827970186, -0.175115307 },
{ -1.77758033, 0.142857227 },
{ 0.992197494, 0.384374989 },
{ 0.274210416, 0.130417207 },
{ 1.67580142, -0.209498461 },
{ 0.912949103, 0.175282444 },
{ -0.099109437, -0.349824698 },
{ -1.14457216, 0.046417258 },
{ -0.438046137, 0.017764629 },
{ -1.22382056, -0.162675287 },
};
// Everything is correct (up to 8 decimal places)
Assert.IsTrue(expected.IsEqual(actual, atol: 1e-8));
}