static void BinCountsForChromosome(BinTaskArguments arguments)
{
List<GenomicBin> bins = arguments.Bins;
bool usePredefinedBins = bins.Any();
int predefinedBinIndex = 0;
GenericRead fastaEntry = arguments.FastaEntry; //fastaEntryKVP.Value;
BinState currentBin = new BinState();
string chr = arguments.Chromosome;
BitArray possibleAlignments = arguments.PossibleAlignments;
HitArray observedAlignments = arguments.ObservedAlignments;
CanvasCoverageMode coverageMode = arguments.CoverageMode;
int pos = usePredefinedBins ? bins[predefinedBinIndex].Start : 0;
// Skip past leading Ns
while (fastaEntry.Bases[pos].Equals('n'))
pos++;
List<float> binPositions = new List<float>();
List<int> binObservations = new List<int>();
for (; pos < fastaEntry.Bases.Length; pos++)
{
// Sets the start of the bin
if (currentBin.StartPosition == -1)
currentBin.StartPosition = pos;
if (!fastaEntry.Bases[pos].Equals("n"))
currentBin.NucleotideCount++;
//if (Utilities.IsGC(fastaEntry.Bases[pos]))
// currentBin.GCCount++;
switch (fastaEntry.Bases[pos])
{
case 'C':
case 'c':
case 'G':
case 'g':
currentBin.GCCount++;
break;
}
if (possibleAlignments[pos])
{
currentBin.PossibleCount++;
currentBin.ObservedCount += observedAlignments.Data[pos];
binObservations.Add(observedAlignments.Data[pos]);
if (coverageMode == CanvasCoverageMode.GCContentWeighted)
binPositions.Add(arguments.ObservedVsExpectedGC[arguments.ReadGCContent[pos]]);
}
// We've seen the desired number of possible alignment positions.
if ((!usePredefinedBins && currentBin.PossibleCount == arguments.BinSize)
|| (usePredefinedBins && pos == bins[predefinedBinIndex].Stop - 1))
{
if (coverageMode == CanvasCoverageMode.TruncatedDynamicRange) // Truncated dynamic range
{
currentBin.ObservedCount = 0;
foreach (int Value in binObservations)
{
currentBin.ObservedCount += Math.Min(10, Value);
}
}
if (coverageMode == CanvasCoverageMode.GCContentWeighted) // read GC content weighted
{
currentBin.ObservedCount = 0;
float tmpObservedCount = 0;
for (int i = 0; i < binObservations.Count; i++)
{
tmpObservedCount += Math.Min(10, (float)binObservations[i] / binPositions[i]);
}
currentBin.ObservedCount = (int)Math.Round(tmpObservedCount);
}
int gc = (int)(100 * currentBin.GCCount / currentBin.NucleotideCount);
if (usePredefinedBins)
{
bins[predefinedBinIndex].GC = gc;
bins[predefinedBinIndex].Count = currentBin.ObservedCount;
predefinedBinIndex++;
if (predefinedBinIndex >= bins.Count) { break; } // we have processed all the bins
pos = bins[predefinedBinIndex].Start - 1; // jump to right before the next predefined bin
}
else
{
// Note the pos + 1 to make the first three conform to bed specification
GenomicBin bin = new GenomicBin(chr, currentBin.StartPosition, pos + 1, gc, currentBin.ObservedCount);
bins.Add(bin);
}
// Reset all relevant variables
currentBin.Reset();
binObservations.Clear();
binPositions.Clear();
}
}
}