/// <summary>
/// Perform a simple GC normalization.
/// </summary>
/// <param name="bins">Bins whose counts are to be normalized.</param>
/// <param name="manifest"></param>
static void NormalizeByGC(List <SampleGenomicBin> bins, NexteraManifest manifest = null)
{
// DebugPrintCountsByGC(bins, "CountsByGC-Before.txt");
// An array of lists. Each array element (0-100) will hold a list of counts whose bins have the same GC content.
List <float>[] countsByGC;
// Will hold all of the autosomal counts present in 'bins'
List <float> counts;
EnrichmentUtilities.GetCountsByGC(bins, manifest, out countsByGC, out counts);
double globalMedian = Utilities.Median(counts);
double?[] medians = new double?[countsByGC.Length];
// Compute the median count for each GC bin
for (int gcBinIndex = 0; gcBinIndex < countsByGC.Length; gcBinIndex++)
{
if (countsByGC[gcBinIndex].Count >= defaultMinNumberOfBinsPerGC)
{
medians[gcBinIndex] = Utilities.Median(countsByGC[gcBinIndex]);
}
else
{
List <Tuple <float, float> > weightedCounts = GetWeightedCounts(countsByGC, gcBinIndex);
medians[gcBinIndex] = Utilities.WeightedMedian(weightedCounts);
}
}
// Divide each count by the median count of bins with the same GC content
for (int gcBinIndex = 0; gcBinIndex < bins.Count; gcBinIndex++)
{
double?median = medians[bins[gcBinIndex].GenomicBin.GC];
if (median != null && median > 0)
{
bins[gcBinIndex].Count = (float)(globalMedian * (double)bins[gcBinIndex].Count / median);
}
}
// DebugPrintCountsByGC(bins, "CountsByGC-After.txt");
}