Accord.MachineLearning.BinarySplit.Learn C# (CSharp) Method

Learn() public method

Learns a model that can map the given inputs to the desired outputs.
public Learn ( double x, double weights = null ) : KMeansClusterCollection
x double The model inputs.
weights double The weight of importance for each input sample.
return KMeansClusterCollection
        public override KMeansClusterCollection Learn(double[][] x, double[] weights = null)
        {
            // Initial argument checking
            if (x == null)
                throw new ArgumentNullException("x");

            if (x.Length < K)
                throw new ArgumentException("Not enough points. There should be more points than the number K of clusters.");

            if (weights == null)
                weights = Vector.Ones(x.Length);

            if (x.Length != weights.Length)
                throw new ArgumentException("Data weights vector must be the same length as data samples.");

            double weightSum = weights.Sum();
            if (weightSum <= 0)
                throw new ArgumentException("Not enough points. There should be more points than the number K of clusters.");

            int cols = x[0].Length;
            for (int i = 0; i < x.Length; i++)
                if (x[0].Length != cols)
                    throw new DimensionMismatchException("data", "The points matrix should be rectangular. The vector at position {} has a different length than previous ones.");
            

            int k = Clusters.Count;

            KMeans kmeans = new KMeans(2)
            {
                Distance = (IDistance<double[]>)Clusters.Distance,
                ComputeError = false,
                ComputeCovariances = false,
                UseSeeding = UseSeeding,
                Tolerance = Tolerance,
                MaxIterations = MaxIterations,
            };

            double[][] centroids = Clusters.Centroids;
            double[][][] clusters = new double[k][][];
            double[] distortions = new double[k];

            // 1. Start with all data points in one cluster
            clusters[0] = x;

            // 2. Repeat steps 3 to 6 (k-1) times to obtain K centroids
            for (int current = 1; current < k; current++)
            {
                // 3. Choose cluster with largest distortion
                int choosen; distortions.Max(current, out choosen);

                // 4. Split cluster into two sub-clusters
                var splits = split(clusters[choosen], kmeans);

                clusters[choosen] = splits.Item1;
                clusters[current] = splits.Item2;

                // 5. Replace chosen centroid and add a new one
                centroids[choosen] = kmeans.Clusters.Centroids[0];
                centroids[current] = kmeans.Clusters.Centroids[1];

                // Recompute distortions for the updated clusters
                distortions[choosen] = kmeans.Clusters[0].Distortion(clusters[choosen]);
                distortions[current] = kmeans.Clusters[1].Distortion(clusters[current]);

                // 6. Increment cluster count (current = current + 1)
            }


            return Clusters;
        }

Usage Example

Esempio n. 1
0
        public void binary_split_new_method()
        {
            #region doc_sample1
            // Use a fixed seed for reproducibility
            Accord.Math.Random.Generator.Seed = 0;

            // Declare some data to be clustered
            double[][] input = 
            {
                new double[] { -5, -2, -1 },
                new double[] { -5, -5, -6 },
                new double[] {  2,  1,  1 },
                new double[] {  1,  1,  2 },
                new double[] {  1,  2,  2 },
                new double[] {  3,  1,  2 },
                new double[] { 11,  5,  4 },
                new double[] { 15,  5,  6 },
                new double[] { 10,  5,  6 },
            };

            // Create a new binary split with 3 clusters 
            BinarySplit binarySplit = new BinarySplit(3);

            // Learn a data partitioning using the Binary Split algorithm
            KMeansClusterCollection clustering = binarySplit.Learn(input);

            // Predict group labels for each point
            int[] output = clustering.Decide(input);

            // As a result, the first two observations should belong to the
            //  same cluster (thus having the same label). The same should
            //  happen to the next four observations and to the last three.
            #endregion

            Assert.AreEqual(output[0], output[1]);

            Assert.AreEqual(output[2], output[3]);
            Assert.AreEqual(output[2], output[4]);
            Assert.AreEqual(output[2], output[5]);

            Assert.AreEqual(output[6], output[7]);
            Assert.AreEqual(output[6], output[8]);

            Assert.AreNotEqual(output[0], output[2]);
            Assert.AreNotEqual(output[2], output[6]);
            Assert.AreNotEqual(output[0], output[6]);

            int[] labels2 = binarySplit.Clusters.Nearest(input);

            Assert.IsTrue(output.IsEqual(labels2));
        }