static int Main(string[] args)
{
Utilities.LogCommandLine(args);
string inFile = null;
string outFile = null;
bool doGCnorm = false;
bool doSizeFilter = false;
bool doOutlierRemoval = false;
string ffpeOutliersFile = null;
string manifestFile = null;
CanvasGCNormalizationMode gcNormalizationMode = CanvasGCNormalizationMode.MedianByGC;
string modeDescription = String.Format("gc normalization mode. Available modes: {0}. Default: {1}",
String.Join(", ", Enum.GetValues(typeof(CanvasGCNormalizationMode)).Cast<CanvasGCNormalizationMode>()),
gcNormalizationMode);
bool needHelp = false;
OptionSet p = new OptionSet()
{
{ "i|infile=", "input file - usually generated by CanvasBin", v => inFile = v },
{ "o|outfile=", "text file to output containing cleaned bins", v => outFile = v },
{ "g|gcnorm", "perform GC normalization", v => doGCnorm = v != null },
{ "s|filtsize", "filter out genomically large bins", v => doSizeFilter = v != null },
{ "r|outliers", "filter outlier points", v => doOutlierRemoval = v != null },
{ "f|ffpeoutliers=", "filter regions of FFPE biases", v => ffpeOutliersFile = v },
{ "t|manifest=", "Nextera manifest file", v => manifestFile = v },
{ "w|weightedmedian=", "Minimum number of bins per GC required to calculate weighted median", v => minNumberOfBinsPerGCForWeightedMedian = int.Parse(v) },
{ "m|mode=", modeDescription, v => gcNormalizationMode = Utilities.ParseCanvasGCNormalizationMode(v) },
{ "h|help", "show this message and exit", v => needHelp = v != null },
};
List<string> extraArgs = p.Parse(args);
if (needHelp)
{
ShowHelp(p);
return 0;
}
if (inFile == null || outFile == null)
{
ShowHelp(p);
return 0;
}
// Does the input file exist?
if (!File.Exists(inFile))
{
Console.WriteLine("CanvasClean.exe: File {0} does not exist! Exiting.", inFile);
return 1;
}
List<GenomicBin> bins = CanvasIO.ReadFromTextFile(inFile);
if (doOutlierRemoval)
bins = RemoveOutliers(bins);
if (doSizeFilter)
bins = RemoveBigBins(bins);
// do not run FFPE outlier removal on targeted/low coverage data
if (ffpeOutliersFile != null && bins.Count < 50000)
{
ffpeOutliersFile = null;
}
// estimate localSD metric to use in doFFPEOutlierRemoval later and write to a text file
double LocalSD = -1.0;
if (ffpeOutliersFile != null)
{
LocalSD = getLocalStandardDeviation(bins);
CanvasIO.WriteLocalSDToTextFile(ffpeOutliersFile, LocalSD);
}
if (doGCnorm)
{
NexteraManifest manifest = manifestFile == null ? null : new NexteraManifest(manifestFile, null, Console.WriteLine);
List<GenomicBin> strippedBins = gcNormalizationMode == CanvasGCNormalizationMode.MedianByGC
? RemoveBinsWithExtremeGC(bins, defaultMinNumberOfBinsPerGC, manifest: manifest)
: bins;
if (strippedBins.Count == 0)
{
Console.Error.WriteLine("Warning in CanvasClean: Coverage too low to perform GC correction; proceeding without GC correction");
}
else
{
bins = strippedBins;
NormalizeByGC(bins, manifest, gcNormalizationMode);
// Use variance normalization only on large exome panels and whole genome sequencing
// The treshold is set to 10% of an average number of bins on CanvasClean data
if (ffpeOutliersFile != null && bins.Count > 500000)
{
bool isNormalizeVarianceByGC = NormalizeVarianceByGC(bins, manifest: manifest);
// If normalization by variance was run (isNormalizeVarianceByGC), perform mean centering by using NormalizeByGC
if (isNormalizeVarianceByGC)
NormalizeByGC(bins, manifest, gcNormalizationMode);
}
}
}
if (ffpeOutliersFile != null)
{
// threshold 20 is derived to separate FF and noisy FFPE samples (derived from a training set of approx. 40 samples)
List<GenomicBin> LocalMadstrippedBins = RemoveBinsWithExtremeLocalSD(bins, LocalSD, 20, outFile);
bins = LocalMadstrippedBins;
}
CanvasIO.WriteToTextFile(outFile, bins);
return 0;
}