CanvasClean.CanvasClean.NormalizeVarianceByGC C# (CSharp) Метод

NormalizeVarianceByGC() статический приватный Метод

Perform variance stabilization by GC bins.
static private NormalizeVarianceByGC ( List bins, NexteraManifest manifest = null ) : bool
bins List Bins whose counts are to be normalized.
manifest NexteraManifest
Результат bool
        static bool NormalizeVarianceByGC(List<GenomicBin> bins, NexteraManifest manifest = null)
        {
            // DebugPrintCountsByGC(bins, "CountsByGCVariance-Before.txt");
            // An array of lists. Each array element (0-100) will hold a list of counts whose bins have the same GC content.
            List<float>[] countsByGC;
            // Will hold all of the autosomal counts present in 'bins'
            List<float> counts;
            EnrichmentUtilities.GetCountsByGC(bins, manifest, out countsByGC, out counts);

            // Estimate quartiles of all bins genomewide
            var globalQuartiles = Utilities.Quartiles(counts);
            // Will hold interquartile range (IQR) separately for each GC bin
            List<float> localIQR = new List<float>(countsByGC.Length);
            // Will hold quartiles separately for each GC bin
            List<Tuple<float, float, float>> localQuartiles = new List<Tuple<float, float, float>>(countsByGC.Length);

            // calculate interquartile range (IQR) for GC bins and populate localQuartiles list
            for (int i = 0; i < countsByGC.Length; i++)
            {
                if (countsByGC[i].Count == 0)
                {
                    localIQR.Add(-1f);
                    localQuartiles.Add(new Tuple<float, float, float>(-1f, -1f, -1f));
                }
                else if (countsByGC[i].Count >= defaultMinNumberOfBinsPerGC)
                {
                    localQuartiles.Add(Utilities.Quartiles(countsByGC[i]));
                    localIQR.Add(localQuartiles[i].Item3 - localQuartiles[i].Item1);
                }
                else
                {
                    List<Tuple<float, float>> weightedCounts = GetWeightedCounts(countsByGC, i);
                    double[] quartiles = Utilities.WeightedQuantiles(weightedCounts, new List<float>() { 0.25f, 0.5f, 0.75f });
                    localQuartiles.Add(new Tuple<float, float, float>((float)quartiles[0], (float)quartiles[1], (float)quartiles[2]));
                    localIQR.Add((float)(quartiles[2] - quartiles[0]));
                }
            }

            // Identify if particular GC bins have IQR twice as large as IQR genomewide
            float globalIQR = globalQuartiles.Item3 - globalQuartiles.Item1;
            // Holder for GC bins with large IQR (compared to genomewide IQR)
            int significantIQRcounter = 0;
            for (int i = 10; i < 90; i++)
            {
                if (globalIQR < localIQR[i] * 2f)
                    significantIQRcounter++;
            }

            if (significantIQRcounter <= 0)
                return false;

            // Divide each count by the median count of bins with the same GC content
            foreach (GenomicBin bin in bins)
            {
                var scaledLocalIqr = localIQR[bin.GC] * 0.8f;
                if (globalIQR >= scaledLocalIqr) continue;

                // ratio of GC bins and global IQRs
                float iqrRatio = scaledLocalIqr / globalIQR;
                var medianGCCount = localQuartiles[bin.GC].Item2;
                bin.Count = medianGCCount + (bin.Count - medianGCCount) / iqrRatio;
            }

            // DebugPrintCountsByGC(bins, "CountsByGCVariance-After.txt");
            return true;
        }