Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion pyprophet/_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -716,4 +716,7 @@ class ExportIOConfig(BaseIOConfig):
intensity_calibration: bool = True
min_fragments: int = 4
keep_decoys: bool = False # Whether to keep decoy entries in the library
rt_unit: Literal["iRT", "RT"] = "iRT"
rt_unit: Literal["iRT", "RT"] = "iRT"

# TSV/Matrix export options
exclude_decoys: bool = True # Whether to exclude decoy entries from TSV/matrix export (default: True, exclude decoys)
Comment thread
singjc marked this conversation as resolved.
18 changes: 18 additions & 0 deletions pyprophet/cli/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,13 @@ def export():
type=float,
help="[format: matrix/legacy] Maximum PEP to consider for good alignments when use_alignment is enabled.",
)
@click.option(
"--exclude-decoys/--no-exclude-decoys",
"exclude_decoys",
default=True,
show_default=True,
help="Exclude decoy entries from the exported results. Use --no-exclude-decoys to retain decoys.",
Comment thread
singjc marked this conversation as resolved.
)
Comment thread
singjc marked this conversation as resolved.
@measure_memory_usage_and_time
def export_tsv(
infile,
Expand All @@ -176,6 +183,7 @@ def export_tsv(
max_global_protein_qvalue,
use_alignment,
max_alignment_pep,
exclude_decoys,
):
"""
Export Proteomics/Peptidoform TSV/CSV tables
Expand Down Expand Up @@ -207,6 +215,7 @@ def export_tsv(
max_global_protein_qvalue=max_global_protein_qvalue,
use_alignment=use_alignment,
max_alignment_pep=max_alignment_pep,
exclude_decoys=exclude_decoys,
)

reader = ReaderDispatcher.get_reader(config)
Expand Down Expand Up @@ -329,6 +338,13 @@ def export_tsv(
type=click.Choice(["none", "median", "medianmedian", "quantile"]),
help="[format: matrix/legacy] Normalization method to apply to the quantification matrix.",
)
@click.option(
"--exclude-decoys/--no-exclude-decoys",
"exclude_decoys",
default=True,
show_default=True,
help="Exclude decoy entries from the exported matrix. Use --no-exclude-decoys to retain decoys.",
)
@measure_memory_usage_and_time
def export_matrix(
infile,
Expand All @@ -347,6 +363,7 @@ def export_matrix(
top_n,
consistent_top,
normalization,
exclude_decoys,
):
"""
Export Proteomics/Peptidoform Quantification Matrix
Expand Down Expand Up @@ -381,6 +398,7 @@ def export_matrix(
top_n=top_n,
consistent_top=consistent_top,
normalization=normalization,
exclude_decoys=exclude_decoys,
)

reader = ReaderDispatcher.get_reader(config)
Expand Down
16 changes: 16 additions & 0 deletions pyprophet/io/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -589,6 +589,14 @@ def export_results(self, data: pd.DataFrame):
"""
cfg = self.config

# Filter out decoys if exclude_decoys is True
if cfg.exclude_decoys and "decoy" in data.columns:
initial_count = len(data)
data = data.loc[data["decoy"].eq(0)].copy()
decoy_count = initial_count - len(data)
if decoy_count > 0:
logger.info(f"Excluded {decoy_count} decoy entries from export.")

sep = "," if cfg.out_type == "csv" else "\t"

if cfg.export_format == "legacy_split":
Expand Down Expand Up @@ -737,6 +745,14 @@ def export_quant_matrix(self, data: pd.DataFrame) -> pd.DataFrame:
"""
cfg = self.config

# Filter out decoys if exclude_decoys is True
if cfg.exclude_decoys and "decoy" in data.columns:
initial_count = len(data)
data = data[data["decoy"] == 0]
Comment thread
singjc marked this conversation as resolved.
decoy_count = initial_count - len(data)
if decoy_count > 0:
logger.info(f"Excluded {decoy_count} decoy entries from quantification matrix.")

# Check if data is empty
if data.empty:
raise ValueError(
Expand Down
Loading