/// <summary>
/// Linear regression simulation for homework Q5-Q6 of the
/// 2nd week of the CS1156x "Learning From Data" at eDX
/// </summary>
static void RunQ5Q6Simulation()
{
const int EXPERIMENT_COUNT = 1000, N = 100;
Random rnd = new Random();
double avgEin = 0, avgEout = 0;
for (int i = 1; i <= EXPERIMENT_COUNT; i++)
{
//pick a random line y = a1 * x + b1
double x1 = rnd.NextDouble(), y1 = rnd.NextDouble(), x2 = rnd.NextDouble(), y2 = rnd.NextDouble();
double a = (y1 - y2) / (x1 - x2), b = y1 - a * x1;
Func<double, double, int> f = (x, y) => a * x + b >= y ? 1 : -1;
//generate training set of N random points
var X = new DenseMatrix(N, 3);
var Y = new DenseVector(N);
for (int j = 0; j < N; j++)
{
X[j, 0] = 1;
X[j, 1] = rnd.NextDouble() * 2 - 1;
X[j, 2] = rnd.NextDouble() * 2 - 1;
Y[j] = f(X[j, 1], X[j, 2]);
}
var W = X.QR().Solve(DenseMatrix.Identity(X.RowCount)).Multiply(Y);
Func<double, double, int> h = (x, y) => W[0] + W[1] * x + W[2] * y >= 0 ? 1 : -1;
//find Ein
int count = 0;
for (int j = 0; j < N; j++) if (h(X[j, 1], X[j, 2]) != Y[j]) count++;
avgEin += (count + 0.0) / N;
//find p: f != g
const int P_SAMPLE_COUNT = 1000;
count = 0;
for (int j = 1; j <= P_SAMPLE_COUNT; j++)
{
double xx = rnd.NextDouble() * 2 - 1;
double yy = rnd. NextDouble() * 2 - 1;
if (f(xx, yy) != h(xx, yy)) count++;
}
avgEout += (count + 0.0) / P_SAMPLE_COUNT;
}
Console.Out.WriteLine("HW2 Q5:");
Console.Out.WriteLine("\tEin = {0}", avgEin / EXPERIMENT_COUNT);
Console.Out.WriteLine("HW2 Q6:");
Console.Out.WriteLine("\tEout = {0}", avgEout / EXPERIMENT_COUNT);
}