CanvasNormalize.CanvasNormalize.GetBestLR2BinCount C# (CSharp) Method

GetBestLR2BinCount() private static method

Pick the best normal control that has the smallest mean squared log-ratios (LR2s).
private static GetBestLR2BinCount ( string tumorBinnedPath, IEnumerable normalBinnedPaths, string bestBinnedPath, NexteraManifest manifest = null ) : void
tumorBinnedPath string
normalBinnedPaths IEnumerable
bestBinnedPath string
manifest NexteraManifest
return void
        private static void GetBestLR2BinCount(string tumorBinnedPath, IEnumerable<string> normalBinnedPaths, string bestBinnedPath,
            NexteraManifest manifest = null)
        {
            int bestNormalSampleIndex = 0;
            int normalSampleCount = normalBinnedPaths.Count();
            if (normalSampleCount > 1) // find the best normal
            {
                List<double[]> binCountsByNormalSample = new List<double[]>();
                for (int normalSampleIndex = 0; normalSampleIndex < normalSampleCount; normalSampleIndex++)
                {
                    string normalBinnedPath = normalBinnedPaths.ElementAt(normalSampleIndex);
                    var binCounts = new BinCounts(normalBinnedPath, manifest: manifest);
                    List<double> counts = binCounts.OnTargetCounts;
                    double median = binCounts.OnTargetMedianBinCount;
                    // If a manifest is available, get the median of bins overlapping the targeted regions only.
                    // For small panels, there could be a lot of bins with zero count and the median would be 0 if taken over all the bins, resulting in division by zero.
                    double weight = median > 0 ? 1.0 / median : 0;
                    binCountsByNormalSample.Add(counts.Select(cnt => cnt * weight).ToArray());
                }
                double[] tumorBinCounts;
                {
                    var binCounts = new BinCounts(tumorBinnedPath, manifest: manifest);
                    List<double> counts = binCounts.OnTargetCounts;
                    double tumorMedian = binCounts.OnTargetMedianBinCount;
                    double tumorWeight = tumorMedian > 0 ? 1.0 / tumorMedian : 0;
                    tumorBinCounts = counts.Select(cnt => cnt * tumorWeight).ToArray();
                }

                // Find the best normal sample
                bestNormalSampleIndex = -1;
                double minMeanSquaredLogRatios = double.PositiveInfinity;
                for (int normalSampleIndex = 0; normalSampleIndex < normalSampleCount; normalSampleIndex++)
                {
                    // Get the sum of squared log ratios
                    var result = GetMeanSquaredLogRatios(tumorBinCounts, binCountsByNormalSample[normalSampleIndex]);
                    double meanSquaredLogRatios = result.Item1;
                    int ignoredBinCount = result.Item2;
                    // TODO: Skip a (bad) normal sample if too many bins were ignored.
                    //       Donavan's script skips a normal sample if more than 100 log ratios is NA.
                    //       The cut-off is likely panel-dependent.
                    if (meanSquaredLogRatios < minMeanSquaredLogRatios)
                    {
                        minMeanSquaredLogRatios = meanSquaredLogRatios;
                        bestNormalSampleIndex = normalSampleIndex;
                    }
                }
            }

            // copy file
            string srcBinnedPath = normalBinnedPaths.ElementAt(bestNormalSampleIndex);
            if (File.Exists(srcBinnedPath))
            {
                if (File.Exists(bestBinnedPath)) { File.Delete(bestBinnedPath); }
                File.Copy(srcBinnedPath, bestBinnedPath);
            }
        }