Skip to content

Commit c54be7f

Browse files
Rename updateProteinReferences to removeDanglingProteinReferences (OpenMS#8500)
The function removes PeptideEvidence entries that reference proteins that no longer exist in the protein hits. The new name more clearly describes this removal behavior, aligning with other IDFilter methods like removeUnreferencedProteins and removeUngroupedProteins. Changes: - Renamed all three overloads of the function - Improved documentation with detailed @param[in]/[out] annotations - Updated all usages in TOPP tools and library code - Updated Python bindings with expanded docstring - Updated unit tests 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-authored-by: Claude <noreply@anthropic.com>
1 parent 9038496 commit c54be7f

9 files changed

Lines changed: 63 additions & 30 deletions

File tree

src/openms/include/OpenMS/PROCESSING/ID/IDFilter.h

Lines changed: 36 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -782,31 +782,53 @@ namespace OpenMS
782782
static void removeUnreferencedProteins(ProteinIdentification& proteins, const PeptideIdentificationList& peptides);
783783

784784
/**
785-
@brief Removes references to missing proteins
785+
@brief Removes dangling protein references from peptide hits
786786
787-
Only PeptideEvidence entries that reference protein hits in @p proteins are kept in the peptide hits.
787+
Cleans up PeptideEvidence entries by removing references to proteins that no longer exist
788+
in the provided protein identifications. This is typically called after filtering protein hits
789+
to maintain consistency between peptide-to-protein mappings.
788790
789-
If @p remove_peptides_without_reference is set, peptide hits without any remaining protein reference are removed.
791+
@param[in,out] peptides The peptide identifications to process
792+
@param[in] proteins The protein identifications containing valid protein hits
793+
@param[in] remove_peptides_without_reference If true, peptide hits that have no remaining
794+
protein references after cleanup are also removed (default: false)
795+
796+
@note Only PeptideEvidence entries referencing protein hits in @p proteins are kept.
797+
The matching is done per identification run using the run identifier.
790798
*/
791-
static void updateProteinReferences(PeptideIdentificationList& peptides, const std::vector<ProteinIdentification>& proteins, bool remove_peptides_without_reference = false);
799+
static void removeDanglingProteinReferences(PeptideIdentificationList& peptides, const std::vector<ProteinIdentification>& proteins, bool remove_peptides_without_reference = false);
792800

793801
/**
794-
@brief Removes references to missing proteins
802+
@brief Removes dangling protein references from peptide hits in a ConsensusMap
803+
804+
Cleans up PeptideEvidence entries by removing references to proteins that no longer exist
805+
in the ConsensusMap's protein identifications. This is typically called after filtering
806+
protein hits to maintain consistency between peptide-to-protein mappings.
795807
796-
Only PeptideEvidence entries that reference protein hits in their corresponding protein run of @p cmap are kept in the peptide hits.
808+
@param[in,out] cmap The ConsensusMap containing peptide and protein identifications
809+
@param[in] remove_peptides_without_reference If true, peptide hits that have no remaining
810+
protein references after cleanup are also removed (default: false)
797811
798-
If @p remove_peptides_without_reference is set, peptide hits without any remaining protein reference are removed.
812+
@note Only PeptideEvidence entries referencing protein hits in the corresponding
813+
protein run of @p cmap are kept. The matching is done per identification run.
799814
*/
800-
static void updateProteinReferences(ConsensusMap& cmap, bool remove_peptides_without_reference = false);
815+
static void removeDanglingProteinReferences(ConsensusMap& cmap, bool remove_peptides_without_reference = false);
801816

802817
/**
803-
@brief Removes references to missing proteins
818+
@brief Removes dangling protein references from peptide hits using a reference protein run
819+
820+
Cleans up PeptideEvidence entries by removing references to proteins that do not exist
821+
in the specified reference protein run. This is typically called after filtering protein
822+
hits to maintain consistency between peptide-to-protein mappings.
804823
805-
Only PeptideEvidence entries that reference protein hits in @p ref_run are kept in the peptide hits.
824+
@param[in,out] cmap The ConsensusMap containing peptide identifications to process
825+
@param[in] ref_run The reference ProteinIdentification containing valid protein hits
826+
@param[in] remove_peptides_without_reference If true, peptide hits that have no remaining
827+
protein references after cleanup are also removed (default: false)
806828
807-
If @p remove_peptides_without_reference is set, peptide hits without any remaining protein reference are removed.
829+
@note Only PeptideEvidence entries referencing protein hits in @p ref_run are kept.
808830
*/
809-
static void updateProteinReferences(ConsensusMap& cmap, const ProteinIdentification& ref_run, bool remove_peptides_without_reference = false);
831+
static void removeDanglingProteinReferences(ConsensusMap& cmap, const ProteinIdentification& ref_run, bool remove_peptides_without_reference = false);
810832

811833
/**
812834
@brief Update protein groups after protein hits were filtered
@@ -1195,7 +1217,7 @@ namespace OpenMS
11951217
{
11961218
filterHitsByScore(peptide_id, peptide_threshold_score);
11971219
}
1198-
updateProteinReferences(annotated_data.getPeptideIdentifications(), annotated_data.getProteinIdentifications());
1220+
removeDanglingProteinReferences(annotated_data.getPeptideIdentifications(), annotated_data.getProteinIdentifications());
11991221
}
12001222

12011223
/// Filters AnnotatedMSRun by keeping the N best peptide hits for every spectrum
@@ -1223,7 +1245,7 @@ namespace OpenMS
12231245
// Since we're working with individual PeptideIdentifications, we don't need to remove empty ones
12241246
// but we still need to update protein references
12251247
temp_vec = {peptide_id};
1226-
updateProteinReferences(temp_vec, annotated_data.getProteinIdentifications());
1248+
removeDanglingProteinReferences(temp_vec, annotated_data.getProteinIdentifications());
12271249
all_peptides.push_back(peptide_id);
12281250
}
12291251
// update protein hits:

src/openms/source/ANALYSIS/ID/BasicProteinInferenceAlgorithm.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ namespace OpenMS
8686
{
8787
std::vector<ProteinIdentification> tmp(1);
8888
std::swap(tmp[0], prot_id);
89-
IDFilter::updateProteinReferences(pep_ids, tmp, true); //TODO allow keeping PSMs without evidence?
89+
IDFilter::removeDanglingProteinReferences(pep_ids, tmp, true); //TODO allow keeping PSMs without evidence?
9090
std::swap(tmp[0], prot_id);
9191
}
9292

@@ -186,7 +186,7 @@ namespace OpenMS
186186
IDFilter::removeMatchingItems<std::vector<ProteinHit>>(prot_run.getHits(),
187187
IDFilter::HasMaxMetaValue<ProteinHit>("nr_found_peptides", static_cast<int>(min_peptides_per_protein) - 1));
188188

189-
IDFilter::updateProteinReferences(cmap, prot_run, true);
189+
IDFilter::removeDanglingProteinReferences(cmap, prot_run, true);
190190
}
191191

192192
if (group)
@@ -259,7 +259,7 @@ namespace OpenMS
259259

260260
if (min_peptides_per_protein > 0) //potentially sth was filtered
261261
{
262-
IDFilter::updateProteinReferences(pep_ids, prot_ids, true); //TODO allow keeping PSMs without evidence?
262+
IDFilter::removeDanglingProteinReferences(pep_ids, prot_ids, true); //TODO allow keeping PSMs without evidence?
263263
}
264264

265265
IDScoreSwitcherAlgorithm::switchBackScoreType(pep_ids, isr); // NOP if no switch was performed

src/openms/source/PROCESSING/ID/IDFilter.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -299,7 +299,7 @@ namespace OpenMS
299299
}
300300

301301
// TODO write version where you look up in a specific run (e.g. first inference run)
302-
void IDFilter::updateProteinReferences(ConsensusMap& cmap, bool remove_peptides_without_reference)
302+
void IDFilter::removeDanglingProteinReferences(ConsensusMap& cmap, bool remove_peptides_without_reference)
303303
{
304304
vector<ProteinIdentification>& proteins = cmap.getProteinIdentifications();
305305
// collect valid protein accessions for each ID run:
@@ -336,7 +336,7 @@ namespace OpenMS
336336
cmap.applyFunctionOnPeptideIDs(check_prots_avail);
337337
}
338338

339-
void IDFilter::updateProteinReferences(ConsensusMap& cmap, const ProteinIdentification& ref_run, bool remove_peptides_without_reference)
339+
void IDFilter::removeDanglingProteinReferences(ConsensusMap& cmap, const ProteinIdentification& ref_run, bool remove_peptides_without_reference)
340340
{
341341
// collect valid protein accessions for each ID run:
342342
unordered_set<String> accessions_avail;
@@ -369,7 +369,7 @@ namespace OpenMS
369369
cmap.applyFunctionOnPeptideIDs(check_prots_avail);
370370
}
371371

372-
void IDFilter::updateProteinReferences(PeptideIdentificationList& peptides, const vector<ProteinIdentification>& proteins, bool remove_peptides_without_reference)
372+
void IDFilter::removeDanglingProteinReferences(PeptideIdentificationList& peptides, const vector<ProteinIdentification>& proteins, bool remove_peptides_without_reference)
373373
{
374374
// collect valid protein accessions for each ID run:
375375
map<String, unordered_set<String>> run_to_accessions;

src/pyOpenMS/pxds/IDFilter.pxd

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,18 @@ cdef extern from "<OpenMS/PROCESSING/ID/IDFilter.h>" namespace "OpenMS":
8181

8282
void removeUnreferencedProteins(libcpp_vector[ProteinIdentification]& proteins, PeptideIdentificationList& peptides) except + nogil # wrap-doc:Removes protein hits from the protein IDs in a 'cmap' that are not referenced by a peptide in the features or if requested in the unassigned peptide list
8383

84-
void updateProteinReferences(PeptideIdentificationList& peptides, libcpp_vector[ProteinIdentification]& proteins, bool remove_peptides_without_reference) except + nogil # wrap-doc:Removes references to missing proteins. Only PeptideEvidence entries that reference protein hits in 'proteins' are kept in the peptide hits
84+
void removeDanglingProteinReferences(PeptideIdentificationList& peptides, libcpp_vector[ProteinIdentification]& proteins, bool remove_peptides_without_reference) except + nogil
85+
# wrap-doc:
86+
# Removes dangling protein references from peptide hits
87+
#
88+
# Cleans up PeptideEvidence entries by removing references to proteins that no longer exist
89+
# in the provided protein identifications. This is typically called after filtering protein hits
90+
# to maintain consistency between peptide-to-protein mappings.
91+
#
92+
# :param peptides: The peptide identifications to process (in/out)
93+
# :param proteins: The protein identifications containing valid protein hits (in)
94+
# :param remove_peptides_without_reference: If true, peptide hits that have no remaining
95+
# protein references after cleanup are also removed (default: false) (in)
8596

8697
bool updateProteinGroups(libcpp_vector[ProteinGroup]& groups, libcpp_vector[ProteinHit]& hits) except + nogil
8798
# wrap-doc:

src/tests/class_tests/openms/source/IDFilter_test.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -239,7 +239,7 @@ START_SECTION((static void removeUnreferencedProteins(vector<ProteinIdentificati
239239
}
240240
END_SECTION
241241

242-
START_SECTION((static void updateProteinReferences(PeptideIdentificationList& peptides, const vector<ProteinIdentification>& proteins, bool remove_peptides_without_reference = false)))
242+
START_SECTION((static void removeDanglingProteinReferences(PeptideIdentificationList& peptides, const vector<ProteinIdentification>& proteins, bool remove_peptides_without_reference = false)))
243243
{
244244
vector<ProteinIdentification> proteins = global_proteins;
245245
PeptideIdentificationList peptides = global_peptides;
@@ -250,7 +250,7 @@ START_SECTION((static void updateProteinReferences(PeptideIdentificationList& pe
250250
TEST_EQUAL(peptide_hits[4].getPeptideEvidences().size(), 1);
251251
proteins[0].getHits().resize(2);
252252

253-
IDFilter::updateProteinReferences(peptides, proteins);
253+
IDFilter::removeDanglingProteinReferences(peptides, proteins);
254254
TEST_EQUAL(peptide_hits.size(), 11);
255255
for (Size i = 0; i < peptide_hits.size(); ++i)
256256
{
@@ -267,7 +267,7 @@ START_SECTION((static void updateProteinReferences(PeptideIdentificationList& pe
267267
}
268268

269269
// remove peptide hits without any reference to an existing proteins:
270-
IDFilter::updateProteinReferences(peptides, proteins, true);
270+
IDFilter::removeDanglingProteinReferences(peptides, proteins, true);
271271
TEST_EQUAL(peptide_hits.size(), 2);
272272
}
273273
END_SECTION

src/topp/FalseDiscoveryRate.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -265,7 +265,7 @@ class TOPPFalseDiscoveryRate :
265265
IDFilter::removeUnreferencedProteins(prot_ids, pep_ids);
266266
}
267267
//remove_psms_without_proteins
268-
IDFilter::updateProteinReferences(pep_ids,
268+
IDFilter::removeDanglingProteinReferences(pep_ids,
269269
prot_ids,
270270
getStringOption_("FDR:cleanup:remove_psms_without_proteins") == "true");
271271
//remove_spectra_without_psms

src/topp/IDFilter.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -807,7 +807,7 @@ class TOPPIDFilter :
807807
{
808808
OPENMS_LOG_INFO << "Removing peptide hits without protein references..." << endl;
809809
}
810-
IDFilter::updateProteinReferences(peptides, proteins, rm_pep);
810+
IDFilter::removeDanglingProteinReferences(peptides, proteins, rm_pep);
811811

812812
IDFilter::removeEmptyIdentifications(peptides);
813813
// we want to keep "empty" protein IDs because they contain search meta data

src/topp/IsobaricWorkflow.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -769,7 +769,7 @@ class TOPPIsobaricWorkflow :
769769
OPENMS_LOG_INFO << "Removing peptide hits without protein references..." << endl;
770770
}
771771

772-
IDFilter::updateProteinReferences(cmap, rm_pep);
772+
IDFilter::removeDanglingProteinReferences(cmap, rm_pep);
773773
IDFilter::removeUnreferencedProteins(cmap, true);
774774
IDFilter::updateProteinGroups(proteins.getIndistinguishableProteins(), proteins.getHits());
775775
IDFilter::updateProteinGroups(proteins.getProteinGroups(), proteins.getHits());
@@ -790,7 +790,7 @@ class TOPPIsobaricWorkflow :
790790
if (max_pro_fdr < 1.0)
791791
{
792792
IDFilter::filterHitsByScore(proteins, max_pro_fdr);
793-
IDFilter::updateProteinReferences(cmap, rm_pep);
793+
IDFilter::removeDanglingProteinReferences(cmap, rm_pep);
794794
}
795795

796796
if (max_psm_fdr < 1.0)

src/topp/ProteomicsLFQ.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1313,7 +1313,7 @@ class ProteomicsLFQ :
13131313

13141314
if (!getFlag_("PeptideQuantification:quantify_decoys"))
13151315
{ // FDR filtering removed all decoy proteins -> update references and remove all unreferenced (decoy) PSMs
1316-
IDFilter::updateProteinReferences(consensus, true);
1316+
IDFilter::removeDanglingProteinReferences(consensus, true);
13171317
IDFilter::removeUnreferencedProteins(consensus, true); // if we don't filter peptides for now, we don't need this
13181318
IDFilter::updateProteinGroups(overall_proteins.getIndistinguishableProteins(),
13191319
overall_proteins.getHits());
@@ -1338,7 +1338,7 @@ class ProteomicsLFQ :
13381338

13391339
if (max_fdr < 1. || !getFlag_("PeptideQuantification:quantify_decoys"))
13401340
{
1341-
IDFilter::updateProteinReferences(consensus, true);
1341+
IDFilter::removeDanglingProteinReferences(consensus, true);
13421342
}
13431343

13441344
if (max_psm_fdr < 1.)

0 commit comments

Comments
 (0)