From d159c7277127294c316c894b915168981d49f429 Mon Sep 17 00:00:00 2001 From: singjc Date: Tue, 5 May 2026 21:18:43 -0400 Subject: [PATCH 1/2] Add exclude_decoys option for TSV/Matrix exports and implement filtering in BaseWriter --- pyprophet/_config.py | 5 ++++- pyprophet/cli/export.py | 18 ++++++++++++++++++ pyprophet/io/_base.py | 16 ++++++++++++++++ 3 files changed, 38 insertions(+), 1 deletion(-) diff --git a/pyprophet/_config.py b/pyprophet/_config.py index 4814fe7d..318c2edf 100644 --- a/pyprophet/_config.py +++ b/pyprophet/_config.py @@ -716,4 +716,7 @@ class ExportIOConfig(BaseIOConfig): intensity_calibration: bool = True min_fragments: int = 4 keep_decoys: bool = False # Whether to keep decoy entries in the library - rt_unit: Literal["iRT", "RT"] = "iRT" \ No newline at end of file + rt_unit: Literal["iRT", "RT"] = "iRT" + + # TSV/Matrix export options + exclude_decoys: bool = True # Whether to exclude decoy entries from TSV/matrix export (default: True, exclude decoys) \ No newline at end of file diff --git a/pyprophet/cli/export.py b/pyprophet/cli/export.py index e80c3001..3a1ba83e 100644 --- a/pyprophet/cli/export.py +++ b/pyprophet/cli/export.py @@ -159,6 +159,13 @@ def export(): type=float, help="[format: matrix/legacy] Maximum PEP to consider for good alignments when use_alignment is enabled.", ) +@click.option( + "--exclude-decoys/--no-exclude-decoys", + "exclude_decoys", + default=True, + show_default=True, + help="Exclude decoy entries from the exported results. Use --no-exclude-decoys to retain decoys.", +) @measure_memory_usage_and_time def export_tsv( infile, @@ -176,6 +183,7 @@ def export_tsv( max_global_protein_qvalue, use_alignment, max_alignment_pep, + exclude_decoys, ): """ Export Proteomics/Peptidoform TSV/CSV tables @@ -207,6 +215,7 @@ def export_tsv( max_global_protein_qvalue=max_global_protein_qvalue, use_alignment=use_alignment, max_alignment_pep=max_alignment_pep, + exclude_decoys=exclude_decoys, ) reader = ReaderDispatcher.get_reader(config) @@ -329,6 +338,13 @@ def export_tsv( type=click.Choice(["none", "median", "medianmedian", "quantile"]), help="[format: matrix/legacy] Normalization method to apply to the quantification matrix.", ) +@click.option( + "--exclude-decoys/--no-exclude-decoys", + "exclude_decoys", + default=True, + show_default=True, + help="Exclude decoy entries from the exported matrix. Use --no-exclude-decoys to retain decoys.", +) @measure_memory_usage_and_time def export_matrix( infile, @@ -347,6 +363,7 @@ def export_matrix( top_n, consistent_top, normalization, + exclude_decoys, ): """ Export Proteomics/Peptidoform Quantification Matrix @@ -381,6 +398,7 @@ def export_matrix( top_n=top_n, consistent_top=consistent_top, normalization=normalization, + exclude_decoys=exclude_decoys, ) reader = ReaderDispatcher.get_reader(config) diff --git a/pyprophet/io/_base.py b/pyprophet/io/_base.py index c3c1e7a8..2a62f938 100644 --- a/pyprophet/io/_base.py +++ b/pyprophet/io/_base.py @@ -589,6 +589,14 @@ def export_results(self, data: pd.DataFrame): """ cfg = self.config + # Filter out decoys if exclude_decoys is True + if cfg.exclude_decoys and "decoy" in data.columns: + initial_count = len(data) + data = data[data["decoy"] == 0] + decoy_count = initial_count - len(data) + if decoy_count > 0: + logger.info(f"Excluded {decoy_count} decoy entries from export.") + sep = "," if cfg.out_type == "csv" else "\t" if cfg.export_format == "legacy_split": @@ -737,6 +745,14 @@ def export_quant_matrix(self, data: pd.DataFrame) -> pd.DataFrame: """ cfg = self.config + # Filter out decoys if exclude_decoys is True + if cfg.exclude_decoys and "decoy" in data.columns: + initial_count = len(data) + data = data[data["decoy"] == 0] + decoy_count = initial_count - len(data) + if decoy_count > 0: + logger.info(f"Excluded {decoy_count} decoy entries from quantification matrix.") + # Check if data is empty if data.empty: raise ValueError( From 72f97c97924954be5c798f6aa46ed91be50f36c8 Mon Sep 17 00:00:00 2001 From: Justin Sing <32938975+singjc@users.noreply.github.com> Date: Tue, 5 May 2026 21:24:31 -0400 Subject: [PATCH 2/2] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- pyprophet/io/_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyprophet/io/_base.py b/pyprophet/io/_base.py index 2a62f938..fddd0818 100644 --- a/pyprophet/io/_base.py +++ b/pyprophet/io/_base.py @@ -592,7 +592,7 @@ def export_results(self, data: pd.DataFrame): # Filter out decoys if exclude_decoys is True if cfg.exclude_decoys and "decoy" in data.columns: initial_count = len(data) - data = data[data["decoy"] == 0] + data = data.loc[data["decoy"].eq(0)].copy() decoy_count = initial_count - len(data) if decoy_count > 0: logger.info(f"Excluded {decoy_count} decoy entries from export.")