public void Normalize()
{
// Find the best bandwidth without chrY
double[] gcsNoChrY = withoutChrY.Select(i => gcs[i]).ToArray();
double[] countsNoChrY = withoutChrY.Select(i => counts[i]).ToArray();
double bestBandwidth = findBestBandwith(0.3, 0.75, gcsNoChrY, countsNoChrY);
// Fit LOESS
double medianY = Utilities.Median(counts);
int minGC = (int)gcs.Min();
int maxGC = (int)gcs.Max();
LoessInterpolator loess = new LoessInterpolator(bestBandwidth, 0);
var model = loess.Train(gcs, counts, 1, computeFitted: false);
double[] fittedByGC = model.Predict(Enumerable.Range(minGC, maxGC).Select(i => (double)i));
// Smooth
foreach (GenomicBin bin in bins)
{
int i = Math.Min(fittedByGC.Length - 1, Math.Max(0, bin.GC - minGC));
double smoothed = countTransformer(bin.Count) - fittedByGC[i] + medianY;
bin.Count = invCountTransformer(smoothed);
}
}
/// <summary> /// Perform GC normalization depending on the mode /// </summary> /// <param name="bins">Bins whose counts are to be normalized</param> /// <param name="manifest"></param> /// <param name="mode">GC normalization mode</param> static void NormalizeByGC(List <SampleGenomicBin> bins, NexteraManifest manifest, CanvasGCNormalizationMode mode) { switch (mode) { case CanvasGCNormalizationMode.MedianByGC: NormalizeByGC(bins, manifest: manifest); break; case CanvasGCNormalizationMode.LOESS: var normalizer = new LoessGCNormalizer(bins, manifest, robustnessIter: 0, countTransformer: x => (double)Math.Log(x), invCountTransformer: x => (float)Math.Exp(x)); normalizer.Normalize(); break; default: throw new Illumina.Common.IlluminaException("Unsupported Canvas GC normalization mode: " + mode.ToString()); } }