Coon.Compass.ProteinHoarder.ProteinHoarder.GetAllUniquePeptides C# (CSharp) Метод

GetAllUniquePeptides() приватный Метод

Loads all the unique peptide sequences (L / I ambiguous) from the OMSSA csv files supplied. Keeps track of all the psms for those peptides as well.
private GetAllUniquePeptides ( IEnumerable csvFiles ) : Peptide>.Dictionary
csvFiles IEnumerable The OMSSA .csv Files to read the PSMs from
Результат Peptide>.Dictionary
        private Dictionary<string, Peptide> GetAllUniquePeptides(IEnumerable<CsvFile> csvFiles)
        {
            Log("Reading in unique peptides sequences from all .csv files...");
            Dictionary<string, Peptide> peptides = new Dictionary<string, Peptide>(1 << 16);
            Proteases = new HashSet<Protease>();
            int psmCount = 0;

            // Loop over each input file and read its contents
            foreach (CsvFile csvfile in csvFiles)
            {
                // Keep a list of all the proteases used
                Proteases.Add(Protease.GetProtease(csvfile.Protease));

                // Counter for the number of PSMs loaded in this csvfile
                int csvPsmCount = 0;

                string sequenceString = "Peptide";
                string pvalueString = "P-value";
                bool proteomeDiscover = false;

                // Open up the csvfile and read its contents, skipping the header
                using (CsvReader reader = new CsvReader(new StreamReader(csvfile.FilePath), true))
                {
                    if (reader.GetFieldHeaders().Contains("XCorr"))
                    {
                        sequenceString = "Sequence";
                        pvalueString = "PEP";
                        proteomeDiscover = true;
                    }

                    // Read each line of the csv
                    while (reader.ReadNextRecord())
                    {
                        // Remove leucine / isoleucine ambiguity
                        string leuSeq = reader[sequenceString].ToUpper().Replace('I', 'L');

                        double rt = 0;
                        int specNum = 0;
                        if (proteomeDiscover)
                        {
                            if (ProteinsPerMinute)
                                rt = double.Parse(reader["RT [min]"]);
                        }
                        else
                        {
                            specNum = int.Parse(reader["Spectrum number"]);
                            if (ProteinsPerMinute)
                                rt = double.Parse(omssaRTRegex.Match(reader["Filename/id"]).Groups[1].Value);
                        }

                        double pvalue = double.Parse(reader[pvalueString]);

                        // Create a new peptide spectral match
                        PSM psm = new PSM(csvfile, specNum, rt, pvalue);

                        // Add to the list of the all the unique peptides
                        Peptide realPep;
                        if (peptides.TryGetValue(leuSeq, out realPep))  // Faster than contains key since you only try to hash once
                        {
                            realPep.PSMs.Add(psm);
                        }
                        else
                        {
                            realPep = new Peptide(leuSeq);
                            realPep.PSMs.Add(psm);

                            peptides.Add(leuSeq, realPep);

                            // Check to see if the peptide was the biggest or smallest
                            if (leuSeq.Length < _smallestPeptide)
                            {
                                _smallestPeptide = leuSeq.Length;
                            }
                            if (leuSeq.Length > _largestPeptide)
                            {
                                _largestPeptide = leuSeq.Length;
                            }
                        }

                        // General psm counters;
                        csvPsmCount++;
                    }
                }

                // Total psms loaded
                psmCount += csvPsmCount;

                Log("{0:N0} PSMs were loaded from {1}", csvPsmCount, csvfile);
            }

            Log("{0:N0} unique peptides were found from the {1:N0} PSMs loaded ({2:F1}%). (I/L ambiguity removed)", peptides.Count, psmCount, 100.0 * ((double)peptides.Count / psmCount));
            return peptides;
        }