static List<GenomicBin> BinCounts(string referenceFile, int binSize, CanvasCoverageMode coverageMode, NexteraManifest manifest,
Dictionary<string, BitArray> possibleAlignments,
Dictionary<string, HitArray> observedAlignments,
Dictionary<string, Int16[]> fragmentLengths,
Dictionary<string, List<GenomicBin>> predefinedBins,
string outFile)
{
bool debugGCCorrection = false; // write value of GC bins and correction factor
Dictionary<string, GenericRead> fastaEntries = new Dictionary<string, GenericRead>();
List<string> chromosomes = new List<string>();
Int16 meanFragmentSize = 0;
Int16 meanFragmentCutoff = 3;
if (coverageMode == CanvasCoverageMode.GCContentWeighted)
meanFragmentSize = MeanFragmentSize(fragmentLengths);
using (FastaReader reader = new FastaReader(referenceFile))
{
GenericRead fastaEntry = new GenericRead();
// Loop through each chromosome in the reference.
while (reader.GetNextEntry(ref fastaEntry))
{
chromosomes.Add(fastaEntry.Name);
fastaEntries[fastaEntry.Name] = fastaEntry;
fastaEntry = new GenericRead();
}
}
// calculate GC content of the forward read at every position along the genome
Dictionary<string, byte[]> readGCContent = new Dictionary<string, byte[]>();
if (coverageMode == CanvasCoverageMode.GCContentWeighted)
{
byte gcCap = (byte)numberOfGCbins;
List<ThreadStart> normalizationTasks = new List<ThreadStart>();
foreach (KeyValuePair<string, Int16[]> fragmentLengthsKVP in fragmentLengths)
{
string chr = fragmentLengthsKVP.Key;
GenericRead fastaEntry = fastaEntries[chr];
normalizationTasks.Add(new ThreadStart(() =>
{
// contains GC content of the forward read at every position for current chr
byte[] gcContent = new byte[fastaEntry.Bases.Length];
uint gcCounter = 0;
// Iteratively calculate GC content of "reads" using fasta genome reference
for (int pos = 0; pos < fastaEntry.Bases.Length - meanFragmentSize * meanFragmentCutoff - 1; pos++)
{
Int16 currentFragment = 0;
if (fragmentLengthsKVP.Value[pos] == 0)
currentFragment = meanFragmentSize;
else
currentFragment = Convert.ToInt16(Math.Min(fragmentLengthsKVP.Value[pos], meanFragmentSize * meanFragmentCutoff));
for (int i = pos; i < pos + currentFragment; i++)
{
switch (fastaEntry.Bases[i])
{
case 'C':
case 'c':
case 'G':
case 'g':
gcCounter++;
break;
default:
break;
}
}
gcContent[pos] = (byte)Math.Min(100 * gcCounter / currentFragment, gcCap);
gcCounter = 0;
}
lock (readGCContent)
{
readGCContent[chr] = gcContent;
}
}));
}
Console.WriteLine("{0} Launching normalization tasks.", DateTime.Now);
Console.Out.Flush();
Isas.Shared.Utilities.DoWorkParallelThreads(normalizationTasks);
Console.WriteLine("{0} Normalization tasks complete.", DateTime.Now);
Console.Out.Flush();
}
// populate observed and expected read GC bin vectors
float[] observedVsExpectedGC = new float[0];
if (coverageMode == CanvasCoverageMode.GCContentWeighted)
observedVsExpectedGC = ComputeObservedVsExpectedGC(observedAlignments, readGCContent, manifest, debugGCCorrection, outFile);
Dictionary<string, List<GenomicBin>> perChromosomeBins = new Dictionary<string, List<GenomicBin>>();
List<ThreadStart> binningTasks = new List<ThreadStart>();
foreach (KeyValuePair<string, GenericRead> fastaEntryKVP in fastaEntries)
{
string chr = fastaEntryKVP.Key;
if (!possibleAlignments.ContainsKey(chr)) continue;
if (predefinedBins != null && !predefinedBins.ContainsKey(chr)) continue;
BinTaskArguments args = new BinTaskArguments();
args.FastaEntry = fastaEntryKVP.Value;
args.Chromosome = chr;
args.PossibleAlignments = possibleAlignments[chr];
args.ObservedAlignments = observedAlignments[chr];
args.CoverageMode = coverageMode;
perChromosomeBins[chr] = predefinedBins == null ? new List<GenomicBin>() : predefinedBins[chr];
args.Bins = perChromosomeBins[chr];
args.BinSize = binSize;
if (coverageMode == CanvasCoverageMode.GCContentWeighted)
args.ReadGCContent = readGCContent[chr];
else
args.ReadGCContent = null;
args.ObservedVsExpectedGC = observedVsExpectedGC;
binningTasks.Add(new ThreadStart(() => { BinCountsForChromosome(args); }));
}
Console.WriteLine("{0} Launch BinCountsForChromosome jobs...", DateTime.Now);
Console.Out.WriteLine();
//Parallel.ForEach(binningTasks, t => { t.Invoke(); });
Isas.Shared.Utilities.DoWorkParallelThreads(binningTasks);
Console.WriteLine("{0} Completed BinCountsForChromosome jobs.", DateTime.Now);
Console.Out.WriteLine();
List<GenomicBin> finalBins = new List<GenomicBin>();
foreach (string chr in chromosomes)
{
if (!perChromosomeBins.ContainsKey(chr)) continue;
finalBins.AddRange(perChromosomeBins[chr]);
}
return finalBins;
}