public void learn_new_mechanism()
{
#region doc_log_reg_1
// Suppose we have the following data about some patients.
// The first variable is continuous and represent patient
// age. The second variable is dichotomic and give whether
// they smoke or not (This is completely fictional data).
// We also know if they have had lung cancer or not, and
// we would like to know whether smoking has any connection
// with lung cancer (This is completely fictional data).
double[][] input =
{ // age, smokes?, had cancer?
new double[] { 55, 0 }, // false - no cancer
new double[] { 28, 0 }, // false
new double[] { 65, 1 }, // false
new double[] { 46, 0 }, // true - had cancer
new double[] { 86, 1 }, // true
new double[] { 56, 1 }, // true
new double[] { 85, 0 }, // false
new double[] { 33, 0 }, // false
new double[] { 21, 1 }, // false
new double[] { 42, 1 }, // true
};
bool[] output = // Whether each patient had lung cancer or not
{
false, false, false, true, true, true, false, false, false, true
};
// To verify this hypothesis, we are going to create a logistic
// regression model for those two inputs (age and smoking), learned
// using a method called "Iteratively Reweighted Least Squares":
var learner = new IterativeReweightedLeastSquares<LogisticRegression>()
{
Tolerance = 1e-4, // Let's set some convergence parameters
Iterations = 100, // maximum number of iterations to perform
Regularization = 0
};
// Now, we can use the learner to finally estimate our model:
LogisticRegression regression = learner.Learn(input, output);
// At this point, we can compute the odds ratio of our variables.
// In the model, the variable at 0 is always the intercept term,
// with the other following in the sequence. Index 1 is the age
// and index 2 is whether the patient smokes or not.
// For the age variable, we have that individuals with
// higher age have 1.021 greater odds of getting lung
// cancer controlling for cigarette smoking.
double ageOdds = regression.GetOddsRatio(1); // 1.0208597028836701
// For the smoking/non smoking category variable, however, we
// have that individuals who smoke have 5.858 greater odds
// of developing lung cancer compared to those who do not
// smoke, controlling for age (remember, this is completely
// fictional and for demonstration purposes only).
double smokeOdds = regression.GetOddsRatio(2); // 5.8584748789881331
// If we would like to use the model to predict a probability for
// each patient regarding whether they are at risk of cancer or not,
// we can use the Probability function:
double[] scores = regression.Probability(input);
// Finally, if we would like to arrive at a conclusion regarding
// each patient, we can use the Decide method, which will transform
// the probabilities (from 0 to 1) into actual true/false values:
bool[] actual = regression.Decide(input);
#endregion
double[] expected =
{
0.21044171560168326,
0.13242527535212373,
0.65747803433771812,
0.18122484822324372,
0.74755661773156912,
0.61450041841477232,
0.33116705418194975,
0.14474110902457912,
0.43627109657399382,
0.54419383282533118
};
for (int i = 0; i < scores.Length; i++)
Assert.AreEqual(expected[i], scores[i], 1e-8);
double[] transform = regression.Transform(input, scores);
for (int i = 0; i < scores.Length; i++)
Assert.AreEqual(expected[i], transform[i], 1e-8);
Assert.AreEqual(1.0208597028836701, ageOdds, 1e-10);
Assert.AreEqual(5.8584748789881331, smokeOdds, 1e-6);
Assert.AreEqual(-2.4577464307294092, regression.Intercept, 1e-8);
Assert.AreEqual(-2.4577464307294092, regression.Coefficients[0], 1e-8);
Assert.AreEqual(0.020645118265359252, regression.Coefficients[1], 1e-10);
Assert.AreEqual(1.7678893101571855, regression.Coefficients[2], 1e-8);
Assert.IsFalse(actual[0]);
Assert.IsFalse(actual[1]);
Assert.IsTrue(actual[2]);
Assert.IsFalse(actual[3]);
Assert.IsTrue(actual[4]);
Assert.IsTrue(actual[5]);
Assert.IsFalse(actual[6]);
Assert.IsFalse(actual[7]);
Assert.IsFalse(actual[8]);
Assert.IsTrue(actual[9]);
}