CanvasClean.CanvasClean.Main C# (CSharp) Method

Main() static private method

static private Main ( string args ) : int
args string
return int
        static int Main(string[] args)
        {
            Utilities.LogCommandLine(args);
            string inFile = null;
            string outFile = null;
            bool doGCnorm = false;
            bool doSizeFilter = false;
            bool doOutlierRemoval = false;
            string ffpeOutliersFile = null;
            string manifestFile = null;
            CanvasGCNormalizationMode gcNormalizationMode = CanvasGCNormalizationMode.MedianByGC;
            string modeDescription = String.Format("gc normalization mode. Available modes: {0}. Default: {1}",
                String.Join(", ", Enum.GetValues(typeof(CanvasGCNormalizationMode)).Cast<CanvasGCNormalizationMode>()),
                gcNormalizationMode);
            bool needHelp = false;

            OptionSet p = new OptionSet()
            {
                { "i|infile=",        "input file - usually generated by CanvasBin",      v => inFile = v },
                { "o|outfile=",       "text file to output containing cleaned bins",      v => outFile = v },
                { "g|gcnorm",         "perform GC normalization",                         v => doGCnorm = v != null },
                { "s|filtsize",       "filter out genomically large bins",                v => doSizeFilter = v != null },
                { "r|outliers",       "filter outlier points",                            v => doOutlierRemoval = v != null },
                { "f|ffpeoutliers=",   "filter regions of FFPE biases",                   v => ffpeOutliersFile = v },
                { "t|manifest=",      "Nextera manifest file",                            v => manifestFile = v },
                { "w|weightedmedian=", "Minimum number of bins per GC required to calculate weighted median", v => minNumberOfBinsPerGCForWeightedMedian = int.Parse(v) },
                { "m|mode=",          modeDescription,                                    v => gcNormalizationMode = Utilities.ParseCanvasGCNormalizationMode(v) },
                { "h|help",           "show this message and exit",                       v => needHelp = v != null },
            };

            List<string> extraArgs = p.Parse(args);

            if (needHelp)
            {
                ShowHelp(p);
                return 0;
            }

            if (inFile == null || outFile == null)
            {
                ShowHelp(p);
                return 0;
            }

            // Does the input file exist?
            if (!File.Exists(inFile))
            {
                Console.WriteLine("CanvasClean.exe: File {0} does not exist! Exiting.", inFile);
                return 1;
            }

            List<GenomicBin> bins = CanvasIO.ReadFromTextFile(inFile);

            if (doOutlierRemoval)
                bins = RemoveOutliers(bins);

            if (doSizeFilter)
                bins = RemoveBigBins(bins);

            // do not run FFPE outlier removal on targeted/low coverage data
            if (ffpeOutliersFile != null && bins.Count < 50000)
            {
                ffpeOutliersFile = null;
            }

            // estimate localSD metric to use in doFFPEOutlierRemoval later and write to a text file
            double LocalSD = -1.0;
            if (ffpeOutliersFile != null)
            {
                LocalSD = getLocalStandardDeviation(bins);
                CanvasIO.WriteLocalSDToTextFile(ffpeOutliersFile, LocalSD);
            }

            if (doGCnorm)
            {
                NexteraManifest manifest = manifestFile == null ? null : new NexteraManifest(manifestFile, null, Console.WriteLine);
                List<GenomicBin> strippedBins = gcNormalizationMode == CanvasGCNormalizationMode.MedianByGC
                    ? RemoveBinsWithExtremeGC(bins, defaultMinNumberOfBinsPerGC, manifest: manifest)
                    : bins;
                if (strippedBins.Count == 0)
                {
                    Console.Error.WriteLine("Warning in CanvasClean: Coverage too low to perform GC correction; proceeding without GC correction");
                }
                else
                {
                    bins = strippedBins;
                    NormalizeByGC(bins, manifest, gcNormalizationMode);
                    // Use variance normalization only on large exome panels and whole genome sequencing
                    // The treshold is set to 10% of an average number of bins on CanvasClean data
                    if (ffpeOutliersFile != null && bins.Count > 500000)
                    {
                        bool isNormalizeVarianceByGC = NormalizeVarianceByGC(bins, manifest: manifest);
                        // If normalization by variance was run (isNormalizeVarianceByGC), perform mean centering by using NormalizeByGC
                        if (isNormalizeVarianceByGC)
                            NormalizeByGC(bins, manifest, gcNormalizationMode);
                    }

                }
            }

            if (ffpeOutliersFile != null)
            {
                // threshold 20 is derived to separate FF and noisy FFPE samples (derived from a training set of approx. 40 samples)
                List<GenomicBin> LocalMadstrippedBins = RemoveBinsWithExtremeLocalSD(bins, LocalSD, 20, outFile);
                bins = LocalMadstrippedBins;
            }

            CanvasIO.WriteToTextFile(outFile, bins);
            return 0;
        }