public static int Run(CanvasBinParameters parameters)
{
// Will hold a bunch of BitArrays, one for each chromosome.
// Each one's length corresponds to the length of the chromosome it represents.
// A position will be marked 'true' if the mer starting at that position is unique in the genome.
Dictionary<string, BitArray> possibleAlignments = new Dictionary<string, BitArray>();
// Will hold a bunch of HitArrays, one for each chromosome.
// Each one's length corresponds to the length of the chromosome it represents.
// A position will be marked with the number of times the mer starting at that position
// is observed in the SAM file.
Dictionary<string, HitArray> observedAlignments = new Dictionary<string, HitArray>();
// Will hold a bunch of byte arrays, one for each chromosome.
// Each one's length corresponds to the length of the chromosome it represents.
// A value at a given index will represents fragment length of the read starting at that index
Dictionary<string, Int16[]> fragmentLengths = new Dictionary<string, Int16[]>();
if (parameters.intermediatePaths.Count == 0)
{
BinOneGenomicInterval(parameters, possibleAlignments, observedAlignments, fragmentLengths);
return 0;
}
//load our intermediate data files
List<string> inputFiles = new List<string>(parameters.intermediatePaths);
Object semaphore = new object(); // control access to possibleAlignments, observedAlignments, fragmentLengths
// retrieve the number of processors
//int processorCoreCount = Environment.ProcessorCount;
int processorCoreCount = 1; // Limit # of deserialization threads to avoid (rare) protobuf issue.
List<Thread> threads = new List<Thread>();
Console.WriteLine("Start deserialization:");
Console.Out.Flush();
while (threads.Count > 0 || inputFiles.Count > 0)
{
// Remove defunct threads:
threads.RemoveAll(t => !t.IsAlive);
if (threads.Count == processorCoreCount)
{
Thread.Sleep(1000);
continue;
}
while (inputFiles.Count > 0 && threads.Count < processorCoreCount)
{
string inputFile = inputFiles.First();
ThreadStart threadDelegate = new ThreadStart(() => DeserializeCanvasData(inputFile, possibleAlignments, observedAlignments, fragmentLengths, semaphore, parameters.coverageMode));
Thread newThread = new Thread(threadDelegate);
threads.Add(newThread);
newThread.Name = "CanvasBin " + inputFiles[0];
Console.WriteLine(newThread.Name);
newThread.Start();
inputFiles.RemoveAt(0);
}
}
Console.WriteLine("{0} Deserialization complete", DateTime.Now);
Console.Out.Flush();
NexteraManifest manifest = parameters.manifestFile == null ? null : new NexteraManifest(parameters.manifestFile, null, Console.WriteLine);
if (parameters.binSize == -1)
{
// Turn the desired # of alignments per bin into the number of possible alignments expected per bin.
parameters.binSize = CalculateNumberOfPossibleAlignmentsPerBin(parameters.countsPerBin, possibleAlignments, observedAlignments,
manifest: manifest);
}
if (parameters.binSizeOnly)
{
// Write bin size to file
System.IO.File.WriteAllText(parameters.outFile + ".binsize", "" + parameters.binSize);
return 0;
}
Dictionary<string, List<GenomicBin>> predefinedBins = null;
if (parameters.predefinedBinsFile != null)
{
// Read predefined bins
predefinedBins = Utilities.LoadBedFile(parameters.predefinedBinsFile);
}
// Bin alignments.
List<GenomicBin> bins = BinCounts(parameters.referenceFile, parameters.binSize, parameters.coverageMode, manifest,
possibleAlignments, observedAlignments, fragmentLengths, predefinedBins, parameters.outFile);
// Output!
Console.WriteLine("{0} Output binned counts:", DateTime.Now);
CanvasIO.WriteToTextFile(parameters.outFile, bins);
Console.WriteLine("{0} Output complete", DateTime.Now);
Console.Out.Flush();
return 0;
}