private void DoSearch()
{
StreamWriter overall_log = null;
StreamWriter summary = null;
StreamWriter log = null;
FileStream proteome_database = null;
#if !NO_EXCEPTION_HANDLING
try
{
#endif
DateTime overall_start = DateTime.Now;
OnUpdateStatus(new StatusEventArgs("Initializing..."));
OnReportTaskWithoutProgress(EventArgs.Empty);
OnUpdateProgress(new ProgressEventArgs(0));
// convert all paths to absolute for outputs
for(int i = 0; i < dataFilepaths.Count; i++)
{
dataFilepaths[i] = Path.GetFullPath(dataFilepaths[i]);
}
proteomeDatabaseFilepath = Path.GetFullPath(proteomeDatabaseFilepath);
outputFolder = Path.GetFullPath(outputFolder);
PeptideSpectrumMatch.SetPrecursorMassType(precursorMassType);
AminoAcidPolymer.SetProductMassType(productMassType);
proteome_database = new FileStream(proteomeDatabaseFilepath, FileMode.Open, FileAccess.Read, FileShare.Read);
int target_proteins;
int decoy_proteins;
int on_the_fly_decoy_proteins;
int total_proteins = ProteomeDatabaseReader.CountProteins(proteome_database, onTheFlyDecoys, out target_proteins, out decoy_proteins, out on_the_fly_decoy_proteins);
double decoys_over_targets_protein_ratio = (double)(decoy_proteins + on_the_fly_decoy_proteins) / target_proteins;
int num_target_peptides = 0;
int num_decoy_peptides = 0;
double decoys_over_targets_peptide_ratio = double.NaN;
string fixed_modifications = null;
foreach(Modification fixed_modification in fixedModifications)
{
fixed_modifications += fixed_modification.ToString() + ", ";
}
if(fixed_modifications != null)
{
fixed_modifications = fixed_modifications.Substring(0, fixed_modifications.Length - 2);
}
else
{
fixed_modifications = "none";
}
string variable_modifications = null;
foreach(Modification variable_modification in variableModifications)
{
variable_modifications += variable_modification.ToString() + ", ";
}
if(variable_modifications != null)
{
variable_modifications = variable_modifications.Substring(0, variable_modifications.Length - 2);
}
else
{
variable_modifications = "none";
}
int total_spectra = 0;
List<PeptideSpectrumMatch> aggregate_psms = null;
SortedList<string, HashSet<string>> parents = null;
Dictionary<string, int> num_spectra = null;
Dictionary<string, List<PeptideSpectrumMatch>> grouped_aggregate_psms = null;
Dictionary<string, Modification> known_variable_modifications = new Dictionary<string, Modification>();
List<Modification> unknown_variable_modifications = new List<Modification>();
foreach(Modification variable_modification in variableModifications)
{
if(variable_modification.Known)
{
known_variable_modifications.Add(variable_modification.Description, variable_modification);
}
else
{
unknown_variable_modifications.Add(variable_modification);
}
}
if(dataFilepaths.Count > 1)
{
overall_log = new StreamWriter(Path.Combine(outputFolder, "log.txt"));
overall_log.AutoFlush = true;
overall_log.WriteLine(Program.GetProductNameAndVersion() + " LOG");
overall_log.WriteLine();
overall_log.WriteLine("PARAMETERS");
string data_filepaths = null;
foreach(string data_filepath in dataFilepaths)
{
data_filepaths += data_filepath.ToString() + ", ";
}
data_filepaths = data_filepaths.Substring(0, data_filepaths.Length - 2);
overall_log.WriteLine("Input Data Files: " + data_filepaths);
overall_log.WriteLine("Unknown Precursor Charge State Range: " + minimumAssumedPrecursorChargeState.ToString("+0;-0;0") + ".." + maximumAssumedPrecursorChargeState.ToString("+0;-0;0"));
overall_log.WriteLine("Absolute MS/MS Intensity Threshold: " + (absoluteThreshold >= 0.0 ? absoluteThreshold.ToString(CultureInfo.InvariantCulture) : "disabled"));
overall_log.WriteLine("Relative MS/MS Intensity Threshold: " + (relativeThresholdPercent >= 0.0 ? relativeThresholdPercent.ToString(CultureInfo.InvariantCulture) + '%' : "disabled"));
overall_log.WriteLine("Maximum Number of MS/MS Peaks: " + (maximumNumberOfPeaks >= 0 ? maximumNumberOfPeaks.ToString() : "disabled"));
overall_log.WriteLine("Assign Charge States: " + assignChargeStates.ToString().ToLower());
overall_log.WriteLine("De-isotope: " + deisotope.ToString().ToLower());
overall_log.WriteLine("Proteome Database: " + proteomeDatabaseFilepath);
overall_log.WriteLine("Create Target–Decoy Database On The Fly: " + onTheFlyDecoys.ToString().ToLower());
overall_log.WriteLine("Protease: " + protease.ToString());
overall_log.WriteLine("Maximum Missed Cleavages: " + maximumMissedCleavages.ToString());
overall_log.WriteLine("Initiator Methionine Behavior: " + initiatorMethionineBehavior.ToString().ToLower());
overall_log.WriteLine("Fixed Modifications: " + fixed_modifications);
overall_log.WriteLine("Variable Modifications: " + variable_modifications);
overall_log.WriteLine("Maximum Variable Modification Isoforms Per Peptide: " + maximumVariableModificationIsoforms.ToString());
overall_log.WriteLine("Precursor Mass Tolerance: ±" + precursorMassTolerance.Value.ToString(CultureInfo.InvariantCulture) + ' ' + precursorMassTolerance.Units.ToString() + " (" + precursorMassType.ToString().ToLower() + ')');
overall_log.WriteLine("Accepted Precursor Mass Errors: " + string.Join("; ", acceptedPrecursorMassErrors) + " Da");
overall_log.WriteLine("Product Mass Tolerance: ±" + productMassTolerance.Value.ToString(CultureInfo.InvariantCulture) + ' ' + productMassTolerance.Units.ToString() + " (" + productMassType.ToString().ToLower() + ')');
overall_log.WriteLine("Maximum False Discovery Rate: " + (maximumFalseDiscoveryRate * 100).ToString(CultureInfo.InvariantCulture) + '%');
overall_log.WriteLine("Consider Modified Forms as Unique Peptides: " + considerModifiedFormsAsUniquePeptides.ToString().ToLower());
overall_log.WriteLine("Maximum Threads: " + maximumThreads.ToString());
overall_log.WriteLine("Minimize Memory Usage: " + minimizeMemoryUsage.ToString().ToLower());
overall_log.WriteLine("Output Folder: " + outputFolder.ToString());
overall_log.WriteLine();
overall_log.WriteLine("RESULTS");
overall_log.WriteLine(total_proteins.ToString("N0") + " total (" + target_proteins.ToString("N0") + " target + " + decoy_proteins.ToString("N0") + " decoy + " + on_the_fly_decoy_proteins.ToString("N0") + " on-the-fly decoy) proteins");
aggregate_psms = new List<PeptideSpectrumMatch>();
parents = DetermineSemiAggregateParentFolders(dataFilepaths);
if(parents.Count > 0)
{
num_spectra = new Dictionary<string, int>(dataFilepaths.Count);
grouped_aggregate_psms = new Dictionary<string, List<PeptideSpectrumMatch>>(dataFilepaths.Count);
}
}
summary = new StreamWriter(Path.Combine(outputFolder, "summary.tsv"));
summary.AutoFlush = true;
summary.WriteLine("Dataset\tProteins\tMS/MS Spectra\tPSM Morpheus Score Threshold\tTarget PSMs\tDecoy PSMs\tPSM FDR (%)\tUnique Peptide Morpheus Score Threshold\tUnique Target Peptides\tUnique Decoy Peptides\tUnique Peptide FDR (%)\tProtein Group Summed Morpheus Score Threshold\tTarget Protein Groups\tDecoy Protein Groups\tProtein Group FDR (%)");
foreach(string data_filepath in dataFilepaths)
{
DateTime start = DateTime.Now;
OnStartingFile(new FilepathEventArgs(data_filepath));
OnUpdateProgress(new ProgressEventArgs(0));
log = new StreamWriter(Path.Combine(outputFolder, Path.GetFileNameWithoutExtension(data_filepath) + ".log.txt"));
log.AutoFlush = true;
log.WriteLine(Program.GetProductNameAndVersion() + " LOG");
log.WriteLine();
log.WriteLine("PARAMETERS");
log.WriteLine("Input Data File: " + data_filepath);
log.WriteLine("Unknown Precursor Charge State Range: " + minimumAssumedPrecursorChargeState.ToString("+0;-0;0") + ".." + maximumAssumedPrecursorChargeState.ToString("+0;-0;0"));
log.WriteLine("Absolute MS/MS Intensity Threshold: " + (absoluteThreshold >= 0.0 ? absoluteThreshold.ToString(CultureInfo.InvariantCulture) : "disabled"));
log.WriteLine("Relative MS/MS Intensity Threshold: " + (relativeThresholdPercent >= 0.0 ? relativeThresholdPercent.ToString(CultureInfo.InvariantCulture) + '%' : "disabled"));
log.WriteLine("Maximum Number of MS/MS Peaks: " + (maximumNumberOfPeaks >= 0 ? maximumNumberOfPeaks.ToString() : "disabled"));
log.WriteLine("Assign Charge States: " + assignChargeStates.ToString().ToLower());
log.WriteLine("De-isotope: " + deisotope.ToString().ToLower());
log.WriteLine("Proteome Database: " + proteomeDatabaseFilepath);
log.WriteLine("Create Target–Decoy Database On The Fly: " + onTheFlyDecoys.ToString().ToLower());
log.WriteLine("Protease: " + protease.ToString());
log.WriteLine("Maximum Missed Cleavages: " + maximumMissedCleavages.ToString());
log.WriteLine("Initiator Methionine Behavior: " + initiatorMethionineBehavior.ToString().ToLower());
log.WriteLine("Fixed Modifications: " + fixed_modifications);
log.WriteLine("Variable Modifications: " + variable_modifications);
log.WriteLine("Maximum Variable Modification Isoforms Per Peptide: " + maximumVariableModificationIsoforms.ToString());
log.WriteLine("Precursor Mass Tolerance: ±" + precursorMassTolerance.Value.ToString(CultureInfo.InvariantCulture) + ' ' + precursorMassTolerance.Units.ToString() + " (" + precursorMassType.ToString().ToLower() + ')');
log.WriteLine("Accepted Precursor Mass Errors: " + string.Join("; ", acceptedPrecursorMassErrors) + " Da");
log.WriteLine("Product Mass Tolerance: ±" + productMassTolerance.Value.ToString(CultureInfo.InvariantCulture) + ' ' + productMassTolerance.Units.ToString() + " (" + productMassType.ToString().ToLower() + ')');
log.WriteLine("Maximum False Discovery Rate: " + (maximumFalseDiscoveryRate * 100).ToString(CultureInfo.InvariantCulture) + '%');
log.WriteLine("Consider Modified Forms as Unique Peptides: " + considerModifiedFormsAsUniquePeptides.ToString().ToLower());
log.WriteLine("Maximum Threads: " + maximumThreads.ToString());
log.WriteLine("Minimize Memory Usage: " + minimizeMemoryUsage.ToString().ToLower());
log.WriteLine("Output Folder: " + outputFolder.ToString());
log.WriteLine();
log.WriteLine("RESULTS");
log.WriteLine(total_proteins.ToString("N0") + " total (" + target_proteins.ToString("N0") + " target + " + decoy_proteins.ToString("N0") + " decoy + " + on_the_fly_decoy_proteins.ToString("N0") + " on-the-fly decoy) proteins");
OnUpdateStatus(new StatusEventArgs("Extracting and preprocessing MS/MS spectra..."));
OnReportTaskWithProgress(EventArgs.Empty);
OnUpdateProgress(new ProgressEventArgs(0));
TandemMassSpectra spectra = new TandemMassSpectra();
spectra.ReportTaskWithoutProgress += HandleReportTaskWithoutProgress;
spectra.ReportTaskWithProgress += HandleReportTaskWithProgress;
spectra.UpdateProgress += HandleUpdateProgress;
spectra.Load(data_filepath, minimumAssumedPrecursorChargeState, maximumAssumedPrecursorChargeState,
absoluteThreshold, relativeThresholdPercent, maximumNumberOfPeaks,
assignChargeStates, deisotope, productMassTolerance, maximumThreads);
spectra.ReportTaskWithoutProgress -= HandleReportTaskWithoutProgress;
spectra.ReportTaskWithProgress -= HandleReportTaskWithProgress;
spectra.UpdateProgress -= HandleUpdateProgress;
if(dataFilepaths.Count > 1)
{
total_spectra += spectra.Count;
if(parents.Count > 0)
{
num_spectra.Add(data_filepath, spectra.Count);
}
}
OnUpdateStatus(new StatusEventArgs("Searching MS/MS spectra..."));
OnReportTaskWithProgress(EventArgs.Empty);
OnUpdateProgress(new ProgressEventArgs(0));
PeptideSpectrumMatch[] psms = null;
if(spectra.Count > 0)
{
int max_spectrum_number = 0;
foreach(TandemMassSpectrum spectrum in spectra)
{
if(spectrum.SpectrumNumber > max_spectrum_number)
{
max_spectrum_number = spectrum.SpectrumNumber;
}
}
psms = new PeptideSpectrumMatch[max_spectrum_number];
spectra.Sort(TandemMassSpectrum.AscendingPrecursorMassComparison);
}
Dictionary<string, bool> peptides_observed = null;
if(!minimizeMemoryUsage)
{
peptides_observed = new Dictionary<string, bool>();
}
num_target_peptides = 0;
num_decoy_peptides = 0;
#if NON_MULTITHREADED
int proteins = 0;
int old_progress = 0;
foreach(Protein protein in ProteomeDatabaseReader.ReadProteins(proteome_database, onTheFlyDecoys, known_variable_modifications))
{
foreach(Peptide peptide in protein.Digest(protease, maximumMissedCleavages, initiatorMethionineBehavior, null, null))
{
if(peptide.Target)
{
num_target_peptides++;
}
else
{
num_decoy_peptides++;
}
if(!minimizeMemoryUsage)
{
// This block of code is to ensure that (1) we don't re-search the same base leucine peptide sequence more than we need to,
// and (2) that we are maximally conservative by calling PSMs decoy whenever possible.
// If we haven't already seen this base leucine peptide sequence, add it to the dictionary with a value indicating whether it was decoy or not.
// Then perform the search as usual.
// If we have already seen it and it was decoy or this time it is target, we don't need to search it again, skip the peptide.
// Otherwise, update the dictionary to reflect that we have now seen it as a decoy and perform the search.
bool observed_as_decoy = false;
if(!peptides_observed.TryGetValue(peptide.BaseLeucineSequence, out observed_as_decoy))
{
peptides_observed.Add(peptide.BaseLeucineSequence, peptide.Decoy);
}
else
{
if(observed_as_decoy || peptide.Target)
{
// if the peptide has no known mods we have already searched all its isoforms, skip it
if(peptide.KnownModifications == null || peptide.KnownModifications.Count == 0)
{
continue;
}
}
else
{
peptides_observed[peptide.BaseLeucineSequence] = true;
}
}
}
peptide.SetFixedModifications(fixedModifications);
foreach(Peptide modified_peptide in peptide.GetVariablyModifiedPeptides(unknown_variable_modifications, maximumVariableModificationIsoforms))
{
foreach(TandemMassSpectrum spectrum in precursorMonoisotopicPeakCorrection ?
spectra.GetTandemMassSpectraInMassRange(precursorMassType == MassType.Average ? modified_peptide.AverageMass : modified_peptide.MonoisotopicMass, precursorMassTolerance, minimumPrecursorMonoisotopicPeakOffset, maximumPrecursorMonoisotopicPeakOffset) :
spectra.GetTandemMassSpectraInMassRange(precursorMassType == MassType.Average ? modified_peptide.AverageMass : modified_peptide.MonoisotopicMass, precursorMassTolerance))
{
PeptideSpectrumMatch psm = new PeptideSpectrumMatch(spectrum, modified_peptide, productMassTolerance);
PeptideSpectrumMatch current_best_psm = psms[spectrum.SpectrumNumber - 1];
if(current_best_psm == null || PeptideSpectrumMatch.DescendingMorpheusScoreComparison(psm, current_best_psm) < 0)
{
psms[spectrum.SpectrumNumber - 1] = psm;
}
}
}
}
proteins++;
int new_progress = (int)((double)proteins / total_proteins * 100);
if(new_progress > old_progress)
{
OnUpdateProgress(new ProgressEventArgs(new_progress));
old_progress = new_progress;
}
}
#else
object progress_lock = new object();
int proteins = 0;
int old_progress = 0;
ParallelOptions parallel_options = new ParallelOptions();
parallel_options.MaxDegreeOfParallelism = maximumThreads;
Parallel.ForEach(ProteomeDatabaseReader.ReadProteins(proteome_database, onTheFlyDecoys, known_variable_modifications), parallel_options, protein =>
{
foreach(Peptide peptide in protein.Digest(protease, maximumMissedCleavages, initiatorMethionineBehavior, null, null))
{
if(peptide.Target)
{
Interlocked.Increment(ref num_target_peptides);
}
else
{
Interlocked.Increment(ref num_decoy_peptides);
}
if(!minimizeMemoryUsage)
{
// This block of code is to ensure that (1) we don't re-search the same base leucine peptide sequence more than we need to,
// and (2) that we are maximally conservative by calling PSMs decoy whenever possible.
// If we haven't already seen this base leucine peptide sequence, add it to the dictionary with a value indicating whether it was decoy or not.
// Then perform the search as usual.
// If we have already seen it and it was decoy or this time it is target, we don't need to search it again, skip the peptide.
// Otherwise, update the dictionary to reflect that we have now seen it as a decoy and perform the search.
lock(peptides_observed)
{
bool observed_as_decoy = false;
if(!peptides_observed.TryGetValue(peptide.BaseLeucineSequence, out observed_as_decoy))
{
peptides_observed.Add(peptide.BaseLeucineSequence, peptide.Decoy);
}
else
{
if(observed_as_decoy || peptide.Target)
{
// if the peptide has no known mods we have already searched all its isoforms, skip it
if(peptide.KnownModifications == null || peptide.KnownModifications.Count == 0)
{
continue;
}
}
else
{
peptides_observed[peptide.BaseLeucineSequence] = true;
}
}
}
}
peptide.SetFixedModifications(fixedModifications);
foreach(Peptide modified_peptide in peptide.GetVariablyModifiedPeptides(unknown_variable_modifications, maximumVariableModificationIsoforms))
{
foreach (TandemMassSpectrum spectrum in spectra.GetTandemMassSpectraInMassRanges(precursorMassType == MassType.Average ? modified_peptide.AverageMass : modified_peptide.MonoisotopicMass, acceptedPrecursorMassErrors, precursorMassTolerance))
{
PeptideSpectrumMatch psm = new PeptideSpectrumMatch(spectrum, modified_peptide, productMassTolerance);
lock(psms)
{
PeptideSpectrumMatch current_best_psm = psms[spectrum.SpectrumNumber - 1];
if(current_best_psm == null || PeptideSpectrumMatch.DescendingMorpheusScoreComparison(psm, current_best_psm) < 0)
{
psms[spectrum.SpectrumNumber - 1] = psm;
}
}
}
}
}
lock(progress_lock)
{
proteins++;
int new_progress = (int)((double)proteins / total_proteins * 100);
if(new_progress > old_progress)
{
OnUpdateProgress(new ProgressEventArgs(new_progress));
old_progress = new_progress;
}
}
});
#endif
OnUpdateStatus(new StatusEventArgs("Performing post-search analyses..."));
OnReportTaskWithoutProgress(EventArgs.Empty);
OnUpdateProgress(new ProgressEventArgs(0));
log.WriteLine((num_target_peptides + num_decoy_peptides).ToString("N0") + " total (" + num_target_peptides.ToString("N0") + " target + " + num_decoy_peptides.ToString("N0") + " decoy) non-unique peptides");
decoys_over_targets_peptide_ratio = (double)num_decoy_peptides / num_target_peptides;
log.WriteLine(spectra.Count.ToString("N0") + " MS/MS spectra");
List<PeptideSpectrumMatch> psms_no_nulls;
if(psms != null)
{
psms_no_nulls = new List<PeptideSpectrumMatch>(psms.Length);
foreach(PeptideSpectrumMatch psm in psms)
{
if(psm != null)
{
psms_no_nulls.Add(psm);
}
}
if(dataFilepaths.Count > 1)
{
aggregate_psms.AddRange(psms_no_nulls);
if(parents.Count > 0)
{
grouped_aggregate_psms.Add(data_filepath, psms_no_nulls);
}
}
}
else
{
psms_no_nulls = new List<PeptideSpectrumMatch>(0);
}
List<PeptideSpectrumMatch> sorted_psms = new List<PeptideSpectrumMatch>(psms_no_nulls);
sorted_psms.Sort(PeptideSpectrumMatch.DescendingMorpheusScoreComparison);
IEnumerable<IdentificationWithFalseDiscoveryRate<PeptideSpectrumMatch>> psms_with_fdr = FalseDiscoveryRate.DoFalseDiscoveryRateAnalysis(sorted_psms, decoys_over_targets_peptide_ratio);
Exporters.WriteToTabDelimitedTextFile(psms_with_fdr, Path.Combine(outputFolder, Path.GetFileNameWithoutExtension(data_filepath) + ".PSMs.tsv"));
double psm_score_threshold = double.NegativeInfinity;
int target_psms = sorted_psms.Count;
int decoy_psms = 0;
double psm_fdr = double.NaN;
if(decoys_over_targets_peptide_ratio == 0.0)
{
log.WriteLine(sorted_psms.Count.ToString("N0") + " PSMs (unknown FDR)");
}
else
{
FalseDiscoveryRate.DetermineMaximumIdentifications(psms_with_fdr, false, maximumFalseDiscoveryRate, out psm_score_threshold, out target_psms, out decoy_psms, out psm_fdr);
log.WriteLine(target_psms.ToString("N0") + " target (" + decoy_psms.ToString("N0") + " decoy) PSMs at " + psm_fdr.ToString("0.000%") + " PSM FDR (" + psm_score_threshold.ToString("0.000") + " Morpheus score threshold)");
}
Exporters.WritePsmsToPepXmlFile(Path.Combine(outputFolder, Path.GetFileNameWithoutExtension(data_filepath) + ".pep.xml"),
data_filepath,
minimumAssumedPrecursorChargeState, maximumAssumedPrecursorChargeState,
absoluteThreshold, relativeThresholdPercent, maximumNumberOfPeaks,
assignChargeStates, deisotope,
proteomeDatabaseFilepath, onTheFlyDecoys, target_proteins,
protease, maximumMissedCleavages, initiatorMethionineBehavior,
fixedModifications, fixed_modifications, variableModifications, variable_modifications, maximumVariableModificationIsoforms,
precursorMassTolerance, precursorMassType,
acceptedPrecursorMassErrors,
productMassTolerance, productMassType,
maximumFalseDiscoveryRate, considerModifiedFormsAsUniquePeptides,
maximumThreads, minimizeMemoryUsage,
outputFolder,
psms_with_fdr);
Dictionary<string, PeptideSpectrumMatch> peptides = new Dictionary<string, PeptideSpectrumMatch>();
foreach(PeptideSpectrumMatch psm in sorted_psms)
{
if(!peptides.ContainsKey(considerModifiedFormsAsUniquePeptides ? psm.Peptide.LeucineSequence : psm.Peptide.BaseLeucineSequence))
{
peptides.Add(considerModifiedFormsAsUniquePeptides ? psm.Peptide.LeucineSequence : psm.Peptide.BaseLeucineSequence, psm);
}
}
List<PeptideSpectrumMatch> sorted_peptides = new List<PeptideSpectrumMatch>(peptides.Values);
sorted_peptides.Sort(PeptideSpectrumMatch.DescendingMorpheusScoreComparison);
IEnumerable<IdentificationWithFalseDiscoveryRate<PeptideSpectrumMatch>> peptides_with_fdr = FalseDiscoveryRate.DoFalseDiscoveryRateAnalysis(sorted_peptides, decoys_over_targets_peptide_ratio);
Exporters.WriteToTabDelimitedTextFile(peptides_with_fdr, Path.Combine(outputFolder, Path.GetFileNameWithoutExtension(data_filepath) + ".unique_peptides.tsv"));
double peptide_score_threshold = double.NegativeInfinity;
int target_peptides = sorted_peptides.Count;
int decoy_peptides = 0;
double peptide_fdr = double.NaN;
if(decoys_over_targets_peptide_ratio == 0.0)
{
log.WriteLine(sorted_peptides.Count.ToString("N0") + " unique peptides (unknown FDR)");
}
else
{
FalseDiscoveryRate.DetermineMaximumIdentifications(peptides_with_fdr, false, maximumFalseDiscoveryRate, out peptide_score_threshold, out target_peptides, out decoy_peptides, out peptide_fdr);
log.WriteLine(target_peptides.ToString("N0") + " unique target (" + decoy_peptides.ToString("N0") + " decoy) peptides at " + peptide_fdr.ToString("0.000%") + " unique peptide FDR (" + peptide_score_threshold.ToString("0.000") + " Morpheus score threshold)");
}
List<ProteinGroup> protein_groups = ProteinGroup.ApplyProteinParsimony(sorted_psms, peptide_score_threshold, proteome_database, onTheFlyDecoys, known_variable_modifications, protease, maximumMissedCleavages, initiatorMethionineBehavior, maximumThreads);
IEnumerable<IdentificationWithFalseDiscoveryRate<ProteinGroup>> protein_groups_with_fdr = FalseDiscoveryRate.DoFalseDiscoveryRateAnalysis(protein_groups, decoys_over_targets_protein_ratio);
Exporters.WriteToTabDelimitedTextFile(protein_groups_with_fdr, Path.Combine(outputFolder, Path.GetFileNameWithoutExtension(data_filepath) + ".protein_groups.tsv"));
double protein_group_score_threshold = double.NegativeInfinity;
int target_protein_groups = protein_groups.Count;
int decoy_protein_groups = 0;
double protein_group_fdr = double.NaN;
if(decoys_over_targets_protein_ratio == 0.0)
{
log.WriteLine(protein_groups.Count.ToString("N0") + " protein groups (unknown FDR)");
}
else
{
FalseDiscoveryRate.DetermineMaximumIdentifications(protein_groups_with_fdr, false, maximumFalseDiscoveryRate, out protein_group_score_threshold, out target_protein_groups, out decoy_protein_groups, out protein_group_fdr);
log.WriteLine(target_protein_groups.ToString("N0") + " target (" + decoy_protein_groups.ToString("N0") + " decoy) protein groups at " + protein_group_fdr.ToString("0.000%") + " protein group FDR (" + protein_group_score_threshold.ToString("0.000") + " summed Morpheus score threshold)");
}
Exporters.WriteMZIdentMLFile(Path.Combine(outputFolder, Path.GetFileNameWithoutExtension(data_filepath) + ".mzid"),
new string[] { data_filepath },
minimumAssumedPrecursorChargeState, maximumAssumedPrecursorChargeState,
absoluteThreshold, relativeThresholdPercent, maximumNumberOfPeaks,
assignChargeStates, deisotope,
proteomeDatabaseFilepath, proteome_database, onTheFlyDecoys, target_proteins,
protease, maximumMissedCleavages, initiatorMethionineBehavior,
fixedModifications, variableModifications, maximumVariableModificationIsoforms,
precursorMassTolerance, precursorMassType,
acceptedPrecursorMassErrors,
productMassTolerance, productMassType,
maximumFalseDiscoveryRate, considerModifiedFormsAsUniquePeptides,
maximumThreads, minimizeMemoryUsage,
outputFolder,
psms_with_fdr,
protein_groups_with_fdr);
DateTime stop = DateTime.Now;
log.WriteLine((stop - start).TotalMinutes.ToString("0.00") + " minutes to analyze");
log.Close();
summary.Write(data_filepath + '\t');
summary.Write(proteins.ToString() + '\t');
summary.Write(spectra.Count.ToString() + '\t');
summary.Write(psm_score_threshold.ToString("0.000") + '\t');
summary.Write(target_psms.ToString() + '\t');
summary.Write(decoy_psms.ToString() + '\t');
summary.Write(psm_fdr.ToString("0.000%") + '\t');
summary.Write(peptide_score_threshold.ToString("0.000") + '\t');
summary.Write(target_peptides.ToString() + '\t');
summary.Write(decoy_peptides.ToString() + '\t');
summary.Write(peptide_fdr.ToString("0.000%") + '\t');
summary.Write(protein_group_score_threshold.ToString("0.000") + '\t');
summary.Write(target_protein_groups.ToString() + '\t');
summary.Write(decoy_protein_groups.ToString() + '\t');
summary.Write(protein_group_fdr.ToString("0.000%") + '\t');
summary.WriteLine();
OnFinishedFile(new FilepathEventArgs(data_filepath));
}
if(dataFilepaths.Count > 1)
{
OnUpdateStatus(new StatusEventArgs("Performing aggregate post-search analyses..."));
OnReportTaskWithoutProgress(EventArgs.Empty);
OnUpdateProgress(new ProgressEventArgs(0));
overall_log.WriteLine((num_target_peptides + num_decoy_peptides).ToString("N0") + " total (" + num_target_peptides.ToString("N0") + " target + " + num_decoy_peptides.ToString("N0") + " decoy) non-unique peptides");
HashSet<string> prefixes = new HashSet<string>();
prefixes.Add("aggregate");
foreach(KeyValuePair<string, HashSet<string>> kvp in parents)
{
DirectoryInfo directory_info = new DirectoryInfo(kvp.Key.Replace("*", null));
string prefix = directory_info.Name.Replace(@":\", null);
int id = 1;
while(prefixes.Contains(prefix))
{
prefix = directory_info.Name + '#' + id.ToString();
id++;
}
int semi_aggregate_spectra = 0;
List<PeptideSpectrumMatch> semi_aggregate_psms = new List<PeptideSpectrumMatch>();
foreach(string data_filepath in kvp.Value)
{
semi_aggregate_spectra += num_spectra[data_filepath];
semi_aggregate_psms.AddRange(grouped_aggregate_psms[data_filepath]);
}
overall_log.WriteLine(semi_aggregate_spectra.ToString("N0") + " MS/MS spectra in " + kvp.Key);
semi_aggregate_psms.Sort(PeptideSpectrumMatch.DescendingMorpheusScoreComparison);
IEnumerable<IdentificationWithFalseDiscoveryRate<PeptideSpectrumMatch>> semi_aggregate_psms_with_fdr = FalseDiscoveryRate.DoFalseDiscoveryRateAnalysis(semi_aggregate_psms, decoys_over_targets_peptide_ratio);
Exporters.WriteToTabDelimitedTextFile(semi_aggregate_psms_with_fdr, Path.Combine(outputFolder, prefix + ".PSMs.tsv"));
double semi_aggregate_psm_score_threshold;
int semi_aggregate_target_psms;
int semi_aggregate_decoy_psms;
double semi_aggregate_psm_fdr;
FalseDiscoveryRate.DetermineMaximumIdentifications(semi_aggregate_psms_with_fdr, false, maximumFalseDiscoveryRate, out semi_aggregate_psm_score_threshold, out semi_aggregate_target_psms, out semi_aggregate_decoy_psms, out semi_aggregate_psm_fdr);
overall_log.WriteLine(semi_aggregate_target_psms.ToString("N0") + " target (" + semi_aggregate_decoy_psms.ToString("N0") + " decoy) PSMs at " + semi_aggregate_psm_fdr.ToString("0.000%") + " PSM FDR (" + semi_aggregate_psm_score_threshold.ToString("0.000") + " Morpheus score threshold) in " + kvp.Key);
Dictionary<string, PeptideSpectrumMatch> semi_aggregate_peptides = new Dictionary<string, PeptideSpectrumMatch>();
foreach(PeptideSpectrumMatch psm in semi_aggregate_psms)
{
if(!semi_aggregate_peptides.ContainsKey(considerModifiedFormsAsUniquePeptides ? psm.Peptide.LeucineSequence : psm.Peptide.BaseLeucineSequence))
{
semi_aggregate_peptides.Add(considerModifiedFormsAsUniquePeptides ? psm.Peptide.LeucineSequence : psm.Peptide.BaseLeucineSequence, psm);
}
}
List<PeptideSpectrumMatch> semi_aggregate_sorted_peptides = new List<PeptideSpectrumMatch>(semi_aggregate_peptides.Values);
semi_aggregate_sorted_peptides.Sort(PeptideSpectrumMatch.DescendingMorpheusScoreComparison);
IEnumerable<IdentificationWithFalseDiscoveryRate<PeptideSpectrumMatch>> semi_aggregate_peptides_with_fdr = FalseDiscoveryRate.DoFalseDiscoveryRateAnalysis(semi_aggregate_sorted_peptides, decoys_over_targets_peptide_ratio);
Exporters.WriteToTabDelimitedTextFile(semi_aggregate_peptides_with_fdr, Path.Combine(outputFolder, prefix + ".unique_peptides.tsv"));
double semi_aggregate_peptide_score_threshold;
int semi_aggregate_target_peptides;
int semi_aggregate_decoy_peptides;
double semi_aggregate_peptide_fdr;
FalseDiscoveryRate.DetermineMaximumIdentifications(semi_aggregate_peptides_with_fdr, false, maximumFalseDiscoveryRate, out semi_aggregate_peptide_score_threshold, out semi_aggregate_target_peptides, out semi_aggregate_decoy_peptides, out semi_aggregate_peptide_fdr);
overall_log.WriteLine(semi_aggregate_target_peptides.ToString("N0") + " unique target (" + semi_aggregate_decoy_peptides.ToString("N0") + " decoy) peptides at " + semi_aggregate_peptide_fdr.ToString("0.000%") + " unique peptide FDR (" + semi_aggregate_peptide_score_threshold.ToString("0.000") + " Morpheus score threshold) in " + kvp.Key);
List<ProteinGroup> semi_aggregate_protein_groups = ProteinGroup.ApplyProteinParsimony(semi_aggregate_psms, semi_aggregate_peptide_score_threshold, proteome_database, onTheFlyDecoys, known_variable_modifications, protease, maximumMissedCleavages, initiatorMethionineBehavior, maximumThreads);
IEnumerable<IdentificationWithFalseDiscoveryRate<ProteinGroup>> semi_aggregate_protein_groups_with_fdr = FalseDiscoveryRate.DoFalseDiscoveryRateAnalysis(semi_aggregate_protein_groups, decoys_over_targets_protein_ratio);
Exporters.WriteToTabDelimitedTextFile(semi_aggregate_protein_groups_with_fdr, Path.Combine(outputFolder, prefix + ".protein_groups.tsv"));
double semi_aggregate_protein_group_score_threshold;
int semi_aggregate_target_protein_groups;
int semi_aggregate_decoy_protein_groups;
double semi_aggregate_protein_group_fdr;
FalseDiscoveryRate.DetermineMaximumIdentifications(semi_aggregate_protein_groups_with_fdr, false, maximumFalseDiscoveryRate, out semi_aggregate_protein_group_score_threshold, out semi_aggregate_target_protein_groups, out semi_aggregate_decoy_protein_groups, out semi_aggregate_protein_group_fdr);
overall_log.WriteLine(semi_aggregate_target_protein_groups.ToString("N0") + " target (" + semi_aggregate_decoy_protein_groups.ToString("N0") + " decoy) protein groups at " + semi_aggregate_protein_group_fdr.ToString("0.000%") + " protein group FDR (" + semi_aggregate_protein_group_score_threshold.ToString("0.000") + " summed Morpheus score threshold) in " + kvp.Key);
Exporters.WriteMZIdentMLFile(Path.Combine(outputFolder, prefix + ".mzid"),
kvp.Value,
minimumAssumedPrecursorChargeState, maximumAssumedPrecursorChargeState,
absoluteThreshold, relativeThresholdPercent, maximumNumberOfPeaks,
assignChargeStates, deisotope,
proteomeDatabaseFilepath, proteome_database, onTheFlyDecoys, target_proteins,
protease, maximumMissedCleavages, initiatorMethionineBehavior,
fixedModifications, variableModifications, maximumVariableModificationIsoforms,
precursorMassTolerance, precursorMassType,
acceptedPrecursorMassErrors,
productMassTolerance, productMassType,
maximumFalseDiscoveryRate, considerModifiedFormsAsUniquePeptides,
maximumThreads, minimizeMemoryUsage,
outputFolder,
semi_aggregate_psms_with_fdr,
semi_aggregate_protein_groups_with_fdr);
summary.Write(kvp.Key + '\t');
summary.Write(total_proteins.ToString() + '\t');
summary.Write(semi_aggregate_spectra.ToString() + '\t');
summary.Write(semi_aggregate_psm_score_threshold.ToString("0.000") + '\t');
summary.Write(semi_aggregate_target_psms.ToString() + '\t');
summary.Write(semi_aggregate_decoy_psms.ToString() + '\t');
summary.Write(semi_aggregate_psm_fdr.ToString("0.000%") + '\t');
summary.Write(semi_aggregate_peptide_score_threshold.ToString("0.000") + '\t');
summary.Write(semi_aggregate_target_peptides.ToString() + '\t');
summary.Write(semi_aggregate_decoy_peptides.ToString() + '\t');
summary.Write(semi_aggregate_peptide_fdr.ToString("0.000%") + '\t');
summary.Write(semi_aggregate_protein_group_score_threshold.ToString("0.000") + '\t');
summary.Write(semi_aggregate_target_protein_groups.ToString() + '\t');
summary.Write(semi_aggregate_decoy_protein_groups.ToString() + '\t');
summary.Write(semi_aggregate_protein_group_fdr.ToString("0.000%") + '\t');
summary.WriteLine();
}
overall_log.WriteLine(total_spectra.ToString("N0") + " MS/MS spectra");
aggregate_psms.Sort(PeptideSpectrumMatch.DescendingMorpheusScoreComparison);
IEnumerable<IdentificationWithFalseDiscoveryRate<PeptideSpectrumMatch>> aggregate_psms_with_fdr = FalseDiscoveryRate.DoFalseDiscoveryRateAnalysis(aggregate_psms, decoys_over_targets_peptide_ratio);
Exporters.WriteToTabDelimitedTextFile(aggregate_psms_with_fdr, Path.Combine(outputFolder, "aggregate.PSMs.tsv"));
double aggregate_psm_score_threshold;
int aggregate_target_psms;
int aggregate_decoy_psms;
double aggregate_psm_fdr;
FalseDiscoveryRate.DetermineMaximumIdentifications(aggregate_psms_with_fdr, false, maximumFalseDiscoveryRate, out aggregate_psm_score_threshold, out aggregate_target_psms, out aggregate_decoy_psms, out aggregate_psm_fdr);
overall_log.WriteLine(aggregate_target_psms.ToString("N0") + " target (" + aggregate_decoy_psms.ToString("N0") + " decoy) aggregate PSMs at " + aggregate_psm_fdr.ToString("0.000%") + " PSM FDR (" + aggregate_psm_score_threshold.ToString("0.000") + " Morpheus score threshold)");
Dictionary<string, PeptideSpectrumMatch> aggregate_peptides = new Dictionary<string, PeptideSpectrumMatch>();
foreach(PeptideSpectrumMatch psm in aggregate_psms)
{
if(!aggregate_peptides.ContainsKey(considerModifiedFormsAsUniquePeptides ? psm.Peptide.LeucineSequence : psm.Peptide.BaseLeucineSequence))
{
aggregate_peptides.Add(considerModifiedFormsAsUniquePeptides ? psm.Peptide.LeucineSequence : psm.Peptide.BaseLeucineSequence, psm);
}
}
List<PeptideSpectrumMatch> aggregate_sorted_peptides = new List<PeptideSpectrumMatch>(aggregate_peptides.Values);
aggregate_sorted_peptides.Sort(PeptideSpectrumMatch.DescendingMorpheusScoreComparison);
IEnumerable<IdentificationWithFalseDiscoveryRate<PeptideSpectrumMatch>> aggregate_peptides_with_fdr = FalseDiscoveryRate.DoFalseDiscoveryRateAnalysis(aggregate_sorted_peptides, decoys_over_targets_peptide_ratio);
Exporters.WriteToTabDelimitedTextFile(aggregate_peptides_with_fdr, Path.Combine(outputFolder, "aggregate.unique_peptides.tsv"));
double aggregate_peptide_score_threshold;
int aggregate_target_peptides;
int aggregate_decoy_peptides;
double aggregate_peptide_fdr;
FalseDiscoveryRate.DetermineMaximumIdentifications(aggregate_peptides_with_fdr, false, maximumFalseDiscoveryRate, out aggregate_peptide_score_threshold, out aggregate_target_peptides, out aggregate_decoy_peptides, out aggregate_peptide_fdr);
overall_log.WriteLine(aggregate_target_peptides.ToString("N0") + " unique target (" + aggregate_decoy_peptides.ToString("N0") + " decoy) aggregate peptides at " + aggregate_peptide_fdr.ToString("0.000%") + " unique peptide FDR (" + aggregate_peptide_score_threshold.ToString("0.000") + " Morpheus score threshold)");
List<ProteinGroup> aggregate_protein_groups = ProteinGroup.ApplyProteinParsimony(aggregate_psms, aggregate_peptide_score_threshold, proteome_database, onTheFlyDecoys, known_variable_modifications, protease, maximumMissedCleavages, initiatorMethionineBehavior, maximumThreads);
IEnumerable<IdentificationWithFalseDiscoveryRate<ProteinGroup>> aggregate_protein_groups_with_fdr = FalseDiscoveryRate.DoFalseDiscoveryRateAnalysis(aggregate_protein_groups, decoys_over_targets_protein_ratio);
Exporters.WriteToTabDelimitedTextFile(aggregate_protein_groups_with_fdr, Path.Combine(outputFolder, "aggregate.protein_groups.tsv"));
double aggregate_protein_group_score_threshold;
int aggregate_target_protein_groups;
int aggregate_decoy_protein_groups;
double aggregate_protein_group_fdr;
FalseDiscoveryRate.DetermineMaximumIdentifications(aggregate_protein_groups_with_fdr, false, maximumFalseDiscoveryRate, out aggregate_protein_group_score_threshold, out aggregate_target_protein_groups, out aggregate_decoy_protein_groups, out aggregate_protein_group_fdr);
overall_log.WriteLine(aggregate_target_protein_groups.ToString("N0") + " target (" + aggregate_decoy_protein_groups.ToString("N0") + " decoy) aggregate protein groups at " + aggregate_protein_group_fdr.ToString("0.000%") + " protein group FDR (" + aggregate_protein_group_score_threshold.ToString("0.000") + " summed Morpheus score threshold)");
Exporters.WriteMZIdentMLFile(Path.Combine(outputFolder, "aggregate.mzid"),
dataFilepaths,
minimumAssumedPrecursorChargeState, maximumAssumedPrecursorChargeState,
absoluteThreshold, relativeThresholdPercent, maximumNumberOfPeaks,
assignChargeStates, deisotope,
proteomeDatabaseFilepath, proteome_database, onTheFlyDecoys, target_proteins,
protease, maximumMissedCleavages, initiatorMethionineBehavior,
fixedModifications, variableModifications, maximumVariableModificationIsoforms,
precursorMassTolerance, precursorMassType,
acceptedPrecursorMassErrors,
productMassTolerance, productMassType,
maximumFalseDiscoveryRate, considerModifiedFormsAsUniquePeptides,
maximumThreads, minimizeMemoryUsage,
outputFolder,
aggregate_psms_with_fdr,
aggregate_protein_groups_with_fdr);
DateTime overall_stop = DateTime.Now;
overall_log.WriteLine((overall_stop - overall_start).TotalMinutes.ToString("0.00") + " minutes to analyze");
overall_log.Close();
summary.Write("AGGREGATE" + '\t');
summary.Write(total_proteins.ToString() + '\t');
summary.Write(total_spectra.ToString() + '\t');
summary.Write(aggregate_psm_score_threshold.ToString("0.000") + '\t');
summary.Write(aggregate_target_psms.ToString() + '\t');
summary.Write(aggregate_decoy_psms.ToString() + '\t');
summary.Write(aggregate_psm_fdr.ToString("0.000%") + '\t');
summary.Write(aggregate_peptide_score_threshold.ToString("0.000") + '\t');
summary.Write(aggregate_target_peptides.ToString() + '\t');
summary.Write(aggregate_decoy_peptides.ToString() + '\t');
summary.Write(aggregate_peptide_fdr.ToString("0.000%") + '\t');
summary.Write(aggregate_protein_group_score_threshold.ToString("0.000") + '\t');
summary.Write(aggregate_target_protein_groups.ToString() + '\t');
summary.Write(aggregate_decoy_protein_groups.ToString() + '\t');
summary.Write(aggregate_protein_group_fdr.ToString("0.000%") + '\t');
summary.WriteLine();
}
proteome_database.Close();
summary.Close();
#if !NO_EXCEPTION_HANDLING
}
catch(Exception ex)
{
if(overall_log != null && overall_log.BaseStream != null && overall_log.BaseStream.CanWrite)
{
overall_log.WriteLine(ex.ToString());
}
if(log != null && log.BaseStream != null && log.BaseStream.CanWrite)
{
log.WriteLine(ex.ToString());
}
OnThrowException(new ExceptionEventArgs(ex));
}
finally
{
#endif
if(overall_log != null)
{
overall_log.Close();
}
if(summary != null)
{
summary.Close();
}
if(log != null)
{
log.Close();
}
if(proteome_database != null)
{
proteome_database.Close();
}
#if !NO_EXCEPTION_HANDLING
}
#endif
}