CSMSL.Examples.ProteinGroupingExample.Start C# (CSharp) Method

Start() public static method

public static Start ( IProtease protease, double percentIdentified = 0.05, int maxMissed = 3, int minLength = 5, int maxLength = 35 ) : void
protease IProtease
percentIdentified double
maxMissed int
minLength int
maxLength int
return void
        public static void Start(IProtease protease, double percentIdentified = 0.05, int maxMissed = 3, int minLength = 5, int maxLength = 35)
        {
            Console.WriteLine("**Start Protein Grouping**");
            Stopwatch watch = new Stopwatch();
            watch.Start();
            List<Peptide> peps = new List<Peptide>();
            List<Protein> proteins = new List<Protein>();

            using (FastaReader reader = new FastaReader("Resources/yeast_uniprot_120226.fasta"))
            {
                foreach (Protein protein in reader.ReadNextProtein())
                {
                    foreach (Peptide peptide in protein.Digest(protease, maxMissed, minLength, maxLength))
                    {
                        peps.Add(peptide);
                    }
                    proteins.Add(protein);
                }
            }
            Console.WriteLine("Loaded {0:N0} peptides from {1:N0} proteins in {2} ms", peps.Count, proteins.Count, watch.ElapsedMilliseconds);

            // Fixed seed to make it reproducible
            Random random = new Random(480912341);

            // Take the first x % to act as our identified peptides
            List<Peptide> identifiedPeptides = peps.OrderBy(x => random.Next()).Take((int) (peps.Count*percentIdentified)).ToList();

            List<ProteinGroup> proteinGroups = ProteinGroup.GroupProteins(proteins, protease, identifiedPeptides, new AminoAcidLeucineSequenceComparer(), maxMissed).ToList();

            watch.Stop();
            Console.WriteLine("{0:N0} proteins produced {1:N0} protein groups from {2:N0} identified sequences", proteins.Count, proteinGroups.Count, identifiedPeptides.Count);
            Console.WriteLine("Time elapsed: {0}", watch.Elapsed);
            Console.WriteLine("Memory used: {0:N0} MB", System.Environment.WorkingSet/(1024*1024));
            Console.WriteLine("**END Protein Grouping**");
        }