CanvasBin.CanvasBin.BinCounts C# (CSharp) Метод

BinCounts() статический приватный Метод

Bin alignments.
static private BinCounts ( string referenceFile, int binSize, CanvasCoverageMode coverageMode, NexteraManifest manifest, BitArray>.Dictionary possibleAlignments, HitArray>.Dictionary observedAlignments, Int16[]>.Dictionary fragmentLengths, Dictionary predefinedBins, string outFile ) : List
referenceFile string Reference fasta file.
binSize int Desired number of alignments per bin.
coverageMode CanvasCoverageMode
manifest NexteraManifest
possibleAlignments BitArray>.Dictionary BitArrays of possible alignments.
observedAlignments HitArray>.Dictionary BitArrays of observed alignments.
fragmentLengths Int16[]>.Dictionary
predefinedBins Dictionary Pre-defined bins. null if not available.
outFile string
Результат List
        static List<GenomicBin> BinCounts(string referenceFile, int binSize, CanvasCoverageMode coverageMode, NexteraManifest manifest,
            Dictionary<string, BitArray> possibleAlignments,
            Dictionary<string, HitArray> observedAlignments,
            Dictionary<string, Int16[]> fragmentLengths,
            Dictionary<string, List<GenomicBin>> predefinedBins,
            string outFile)
        {
            bool debugGCCorrection = false; // write value of GC bins and correction factor
            Dictionary<string, GenericRead> fastaEntries = new Dictionary<string, GenericRead>();
            List<string> chromosomes = new List<string>();
            Int16 meanFragmentSize = 0;
            Int16 meanFragmentCutoff = 3;
            if (coverageMode == CanvasCoverageMode.GCContentWeighted)
                meanFragmentSize = MeanFragmentSize(fragmentLengths);

            using (FastaReader reader = new FastaReader(referenceFile))
            {
                GenericRead fastaEntry = new GenericRead();

                // Loop through each chromosome in the reference.
                while (reader.GetNextEntry(ref fastaEntry))
                {
                    chromosomes.Add(fastaEntry.Name);
                    fastaEntries[fastaEntry.Name] = fastaEntry;
                    fastaEntry = new GenericRead();
                }
            }

            // calculate GC content of the forward read at every position along the genome
            Dictionary<string, byte[]> readGCContent = new Dictionary<string, byte[]>();
            if (coverageMode == CanvasCoverageMode.GCContentWeighted)
            {
                byte gcCap = (byte)numberOfGCbins;
                List<ThreadStart> normalizationTasks = new List<ThreadStart>();
                foreach (KeyValuePair<string, Int16[]> fragmentLengthsKVP in fragmentLengths)
                {
                    string chr = fragmentLengthsKVP.Key;
                    GenericRead fastaEntry = fastaEntries[chr];

                    normalizationTasks.Add(new ThreadStart(() =>
                    {
                        // contains GC content of the forward read at every position for current chr
                        byte[] gcContent = new byte[fastaEntry.Bases.Length];

                        uint gcCounter = 0;

                        // Iteratively calculate GC content of "reads" using fasta genome reference
                        for (int pos = 0; pos < fastaEntry.Bases.Length - meanFragmentSize * meanFragmentCutoff - 1; pos++)
                        {
                            Int16 currentFragment = 0;

                            if (fragmentLengthsKVP.Value[pos] == 0)
                                currentFragment = meanFragmentSize;
                            else
                                currentFragment = Convert.ToInt16(Math.Min(fragmentLengthsKVP.Value[pos], meanFragmentSize * meanFragmentCutoff));
                            for (int i = pos; i < pos + currentFragment; i++)
                            {
                                switch (fastaEntry.Bases[i])
                                {
                                    case 'C':
                                    case 'c':
                                    case 'G':
                                    case 'g':
                                        gcCounter++;
                                        break;
                                    default:
                                        break;
                                }
                            }
                            gcContent[pos] = (byte)Math.Min(100 * gcCounter / currentFragment, gcCap);
                            gcCounter = 0;
                        }
                        lock (readGCContent)
                        {
                            readGCContent[chr] = gcContent;
                        }
                    }));
                }

                Console.WriteLine("{0} Launching normalization tasks.", DateTime.Now);
                Console.Out.Flush();
                Isas.Shared.Utilities.DoWorkParallelThreads(normalizationTasks);
                Console.WriteLine("{0} Normalization tasks complete.", DateTime.Now);
                Console.Out.Flush();
            }

            // populate observed and expected read GC bin vectors
            float[] observedVsExpectedGC = new float[0];
            if (coverageMode == CanvasCoverageMode.GCContentWeighted)
                observedVsExpectedGC = ComputeObservedVsExpectedGC(observedAlignments, readGCContent, manifest, debugGCCorrection, outFile);

            Dictionary<string, List<GenomicBin>> perChromosomeBins = new Dictionary<string, List<GenomicBin>>();
            List<ThreadStart> binningTasks = new List<ThreadStart>();
            foreach (KeyValuePair<string, GenericRead> fastaEntryKVP in fastaEntries)
            {
                string chr = fastaEntryKVP.Key;
                if (!possibleAlignments.ContainsKey(chr)) continue;
                if (predefinedBins != null && !predefinedBins.ContainsKey(chr)) continue;

                BinTaskArguments args = new BinTaskArguments();
                args.FastaEntry = fastaEntryKVP.Value;
                args.Chromosome = chr;
                args.PossibleAlignments = possibleAlignments[chr];
                args.ObservedAlignments = observedAlignments[chr];
                args.CoverageMode = coverageMode;
                perChromosomeBins[chr] = predefinedBins == null ? new List<GenomicBin>() : predefinedBins[chr];
                args.Bins = perChromosomeBins[chr];
                args.BinSize = binSize;
                if (coverageMode == CanvasCoverageMode.GCContentWeighted)
                    args.ReadGCContent = readGCContent[chr];
                else
                    args.ReadGCContent = null;
                args.ObservedVsExpectedGC = observedVsExpectedGC;
                binningTasks.Add(new ThreadStart(() => { BinCountsForChromosome(args); }));
            }
            Console.WriteLine("{0} Launch BinCountsForChromosome jobs...", DateTime.Now);
            Console.Out.WriteLine();
            //Parallel.ForEach(binningTasks, t => { t.Invoke(); });
            Isas.Shared.Utilities.DoWorkParallelThreads(binningTasks);
            Console.WriteLine("{0} Completed BinCountsForChromosome jobs.", DateTime.Now);
            Console.Out.WriteLine();

            List<GenomicBin> finalBins = new List<GenomicBin>();
            foreach (string chr in chromosomes)
            {
                if (!perChromosomeBins.ContainsKey(chr)) continue;
                finalBins.AddRange(perChromosomeBins[chr]);
            }
            return finalBins;
        }