Accord.Tests.Statistics.LogisticRegressionTest.learn_new_mechanism C# (CSharp) Method

learn_new_mechanism() private method

private learn_new_mechanism ( ) : void
return void
        public void learn_new_mechanism()
        {
            #region doc_log_reg_1
            // Suppose we have the following data about some patients.
            // The first variable is continuous and represent patient
            // age. The second variable is dichotomic and give whether
            // they smoke or not (This is completely fictional data).

            // We also know if they have had lung cancer or not, and 
            // we would like to know whether smoking has any connection
            // with lung cancer (This is completely fictional data).

            double[][] input =
            {              // age, smokes?, had cancer?
                new double[] { 55,    0  }, // false - no cancer
                new double[] { 28,    0  }, // false
                new double[] { 65,    1  }, // false
                new double[] { 46,    0  }, // true  - had cancer
                new double[] { 86,    1  }, // true
                new double[] { 56,    1  }, // true
                new double[] { 85,    0  }, // false
                new double[] { 33,    0  }, // false
                new double[] { 21,    1  }, // false
                new double[] { 42,    1  }, // true
            };

            bool[] output = // Whether each patient had lung cancer or not
            {
                false, false, false, true, true, true, false, false, false, true
            };


            // To verify this hypothesis, we are going to create a logistic
            // regression model for those two inputs (age and smoking), learned
            // using a method called "Iteratively Reweighted Least Squares":

            var learner = new IterativeReweightedLeastSquares<LogisticRegression>()
            {
                Tolerance = 1e-4,  // Let's set some convergence parameters
                Iterations = 100,  // maximum number of iterations to perform
                Regularization = 0
            };

            // Now, we can use the learner to finally estimate our model:
            LogisticRegression regression = learner.Learn(input, output);

            // At this point, we can compute the odds ratio of our variables.
            // In the model, the variable at 0 is always the intercept term, 
            // with the other following in the sequence. Index 1 is the age
            // and index 2 is whether the patient smokes or not.

            // For the age variable, we have that individuals with
            //   higher age have 1.021 greater odds of getting lung
            //   cancer controlling for cigarette smoking.
            double ageOdds = regression.GetOddsRatio(1); // 1.0208597028836701

            // For the smoking/non smoking category variable, however, we
            //   have that individuals who smoke have 5.858 greater odds
            //   of developing lung cancer compared to those who do not 
            //   smoke, controlling for age (remember, this is completely
            //   fictional and for demonstration purposes only).
            double smokeOdds = regression.GetOddsRatio(2); // 5.8584748789881331

            // If we would like to use the model to predict a probability for
            // each patient regarding whether they are at risk of cancer or not,
            // we can use the Probability function:

            double[] scores = regression.Probability(input);

            // Finally, if we would like to arrive at a conclusion regarding
            // each patient, we can use the Decide method, which will transform
            // the probabilities (from 0 to 1) into actual true/false values:

            bool[] actual = regression.Decide(input);
            #endregion

            double[] expected = 
            {
                0.21044171560168326,
                0.13242527535212373,
                0.65747803433771812,
                0.18122484822324372,
                0.74755661773156912,
                0.61450041841477232,
                0.33116705418194975,
                0.14474110902457912,
                0.43627109657399382,
                0.54419383282533118
            };

            for (int i = 0; i < scores.Length; i++)
                Assert.AreEqual(expected[i], scores[i], 1e-8);

            double[] transform = regression.Transform(input, scores);
            for (int i = 0; i < scores.Length; i++)
                Assert.AreEqual(expected[i], transform[i], 1e-8);

            Assert.AreEqual(1.0208597028836701, ageOdds, 1e-10);
            Assert.AreEqual(5.8584748789881331, smokeOdds, 1e-6);

            Assert.AreEqual(-2.4577464307294092, regression.Intercept, 1e-8);
            Assert.AreEqual(-2.4577464307294092, regression.Coefficients[0], 1e-8);
            Assert.AreEqual(0.020645118265359252, regression.Coefficients[1], 1e-10);
            Assert.AreEqual(1.7678893101571855, regression.Coefficients[2], 1e-8);

            Assert.IsFalse(actual[0]);
            Assert.IsFalse(actual[1]);
            Assert.IsTrue(actual[2]);
            Assert.IsFalse(actual[3]);
            Assert.IsTrue(actual[4]);
            Assert.IsTrue(actual[5]);
            Assert.IsFalse(actual[6]);
            Assert.IsFalse(actual[7]);
            Assert.IsFalse(actual[8]);
            Assert.IsTrue(actual[9]);
        }