diff --git a/pyprophet/_config.py b/pyprophet/_config.py index 4814fe7..318c2ed 100644 --- a/pyprophet/_config.py +++ b/pyprophet/_config.py @@ -716,4 +716,7 @@ class ExportIOConfig(BaseIOConfig): intensity_calibration: bool = True min_fragments: int = 4 keep_decoys: bool = False # Whether to keep decoy entries in the library - rt_unit: Literal["iRT", "RT"] = "iRT" \ No newline at end of file + rt_unit: Literal["iRT", "RT"] = "iRT" + + # TSV/Matrix export options + exclude_decoys: bool = True # Whether to exclude decoy entries from TSV/matrix export (default: True, exclude decoys) \ No newline at end of file diff --git a/pyprophet/cli/export.py b/pyprophet/cli/export.py index e80c300..3a1ba83 100644 --- a/pyprophet/cli/export.py +++ b/pyprophet/cli/export.py @@ -159,6 +159,13 @@ def export(): type=float, help="[format: matrix/legacy] Maximum PEP to consider for good alignments when use_alignment is enabled.", ) +@click.option( + "--exclude-decoys/--no-exclude-decoys", + "exclude_decoys", + default=True, + show_default=True, + help="Exclude decoy entries from the exported results. Use --no-exclude-decoys to retain decoys.", +) @measure_memory_usage_and_time def export_tsv( infile, @@ -176,6 +183,7 @@ def export_tsv( max_global_protein_qvalue, use_alignment, max_alignment_pep, + exclude_decoys, ): """ Export Proteomics/Peptidoform TSV/CSV tables @@ -207,6 +215,7 @@ def export_tsv( max_global_protein_qvalue=max_global_protein_qvalue, use_alignment=use_alignment, max_alignment_pep=max_alignment_pep, + exclude_decoys=exclude_decoys, ) reader = ReaderDispatcher.get_reader(config) @@ -329,6 +338,13 @@ def export_tsv( type=click.Choice(["none", "median", "medianmedian", "quantile"]), help="[format: matrix/legacy] Normalization method to apply to the quantification matrix.", ) +@click.option( + "--exclude-decoys/--no-exclude-decoys", + "exclude_decoys", + default=True, + show_default=True, + help="Exclude decoy entries from the exported matrix. Use --no-exclude-decoys to retain decoys.", +) @measure_memory_usage_and_time def export_matrix( infile, @@ -347,6 +363,7 @@ def export_matrix( top_n, consistent_top, normalization, + exclude_decoys, ): """ Export Proteomics/Peptidoform Quantification Matrix @@ -381,6 +398,7 @@ def export_matrix( top_n=top_n, consistent_top=consistent_top, normalization=normalization, + exclude_decoys=exclude_decoys, ) reader = ReaderDispatcher.get_reader(config) diff --git a/pyprophet/io/_base.py b/pyprophet/io/_base.py index c3c1e7a..fddd081 100644 --- a/pyprophet/io/_base.py +++ b/pyprophet/io/_base.py @@ -589,6 +589,14 @@ def export_results(self, data: pd.DataFrame): """ cfg = self.config + # Filter out decoys if exclude_decoys is True + if cfg.exclude_decoys and "decoy" in data.columns: + initial_count = len(data) + data = data.loc[data["decoy"].eq(0)].copy() + decoy_count = initial_count - len(data) + if decoy_count > 0: + logger.info(f"Excluded {decoy_count} decoy entries from export.") + sep = "," if cfg.out_type == "csv" else "\t" if cfg.export_format == "legacy_split": @@ -737,6 +745,14 @@ def export_quant_matrix(self, data: pd.DataFrame) -> pd.DataFrame: """ cfg = self.config + # Filter out decoys if exclude_decoys is True + if cfg.exclude_decoys and "decoy" in data.columns: + initial_count = len(data) + data = data[data["decoy"] == 0] + decoy_count = initial_count - len(data) + if decoy_count > 0: + logger.info(f"Excluded {decoy_count} decoy entries from quantification matrix.") + # Check if data is empty if data.empty: raise ValueError(