Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion pyprophet/_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -463,6 +463,8 @@ class IPFIOConfig(BaseIOConfig):
propagate_signal_across_runs (bool): Propagate signal across runs (requires alignment step).
ipf_max_alignment_pep (float): Maximum PEP to consider for good alignments.
across_run_confidence_threshold (float): Maximum PEP threshold for propagating signal across runs for aligned features.
use_alignment_candidates (bool): Use FEATURE_MS2_ALIGNMENT_CANDIDATE instead of FEATURE_MS2_ALIGNMENT when available.
min_alignment_mapping_confidence (float): Minimum MAPPING_CONFIDENCE required when using FEATURE_MS2_ALIGNMENT_CANDIDATE.
"""

ipf_ms1_scoring: bool = True
Expand All @@ -476,6 +478,8 @@ class IPFIOConfig(BaseIOConfig):
propagate_signal_across_runs: bool = False
ipf_max_alignment_pep: float = 0.7
across_run_confidence_threshold: float = 0.5
use_alignment_candidates: bool = False
min_alignment_mapping_confidence: float = 0.5

@classmethod
def from_cli_args(
Expand All @@ -496,6 +500,8 @@ def from_cli_args(
propagate_signal_across_runs,
ipf_max_alignment_pep,
across_run_confidence_threshold,
use_alignment_candidates=False,
min_alignment_mapping_confidence=0.5,
):
"""
Creates a configuration object from command-line arguments.
Expand All @@ -517,6 +523,8 @@ def from_cli_args(
propagate_signal_across_runs=propagate_signal_across_runs,
ipf_max_alignment_pep=ipf_max_alignment_pep,
across_run_confidence_threshold=across_run_confidence_threshold,
use_alignment_candidates=use_alignment_candidates,
min_alignment_mapping_confidence=min_alignment_mapping_confidence,
)


Expand Down Expand Up @@ -719,4 +727,4 @@ class ExportIOConfig(BaseIOConfig):
rt_unit: Literal["iRT", "RT"] = "iRT"

# TSV/Matrix export options
exclude_decoys: bool = True # Whether to exclude decoy entries from TSV/matrix export (default: True, exclude decoys)
exclude_decoys: bool = True # Whether to exclude decoy entries from TSV/matrix export (default: True, exclude decoys)
17 changes: 17 additions & 0 deletions pyprophet/cli/ipf.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,19 @@
type=float,
help="Maximum PEP to consider for propagating signal across runs for aligned features.",
)
@click.option(
"--use_alignment_candidates/--no-use_alignment_candidates",
default=False,
show_default=True,
help="Use FEATURE_MS2_ALIGNMENT_CANDIDATE for across-run alignment groups when available.",
)
@click.option(
"--min_alignment_mapping_confidence",
default=0.5,
show_default=True,
type=float,
help="Minimum MAPPING_CONFIDENCE to keep selected candidate alignments when using FEATURE_MS2_ALIGNMENT_CANDIDATE.",
)
@click.pass_context
@measure_memory_usage_and_time
@logger.catch(reraise=True)
Expand All @@ -114,6 +127,8 @@ def ipf(
propagate_signal_across_runs,
ipf_max_alignment_pep,
across_run_confidence_threshold,
use_alignment_candidates,
min_alignment_mapping_confidence,
):
"""
Infer peptidoforms after scoring of MS1, MS2 and transition-level data.
Expand Down Expand Up @@ -147,6 +162,8 @@ def ipf(
propagate_signal_across_runs,
ipf_max_alignment_pep,
across_run_confidence_threshold,
use_alignment_candidates,
min_alignment_mapping_confidence,
)
write_logfile(
ctx.obj["LOG_LEVEL"], f"{config.prefix}_pyp_ipf.log", ctx.obj["LOG_HEADER"]
Expand Down
114 changes: 114 additions & 0 deletions pyprophet/io/ipf/osw.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,62 @@ def _read_pyp_transition_duckdb(self, con):

def _fetch_alignment_features_duckdb(self, con):
pep_threshold = self.config.ipf_max_alignment_pep
use_alignment_candidates = self.config.use_alignment_candidates
min_confidence = self.config.min_alignment_mapping_confidence

if use_alignment_candidates:
if check_duckdb_table(con, "main", "FEATURE_MS2_ALIGNMENT_CANDIDATE"):
logger.info(
Comment on lines +305 to +307
"Using FEATURE_MS2_ALIGNMENT_CANDIDATE for across-run alignment groups "
f"with MAPPING_CONFIDENCE >= {min_confidence}."
)
query = f"""
SELECT
DENSE_RANK() OVER (ORDER BY merged.PRECURSOR_ID, merged.ALIGNMENT_ID) AS ALIGNMENT_GROUP_ID,
merged.ALIGNMENT_ID,
merged.FEATURE_ID,
merged.PRECURSOR_ID,
merged.FEATURE_TYPE
FROM (
SELECT DISTINCT
fmac.ALIGNMENT_ID,
fmac.REFERENCE_FEATURE_ID AS FEATURE_ID,
fmac.PRECURSOR_ID,
'REFERENCE' AS FEATURE_TYPE
FROM osw.FEATURE_MS2_ALIGNMENT_CANDIDATE AS fmac
WHERE fmac.SELECTED = 1
AND fmac.MAPPING_CONFIDENCE >= {min_confidence}
AND fmac.REFERENCE_FEATURE_ID != fmac.ALIGNED_FEATURE_ID
AND fmac.ALIGNED_FEATURE_ID != -1

UNION

SELECT DISTINCT
fmac.ALIGNMENT_ID,
fmac.ALIGNED_FEATURE_ID AS FEATURE_ID,
fmac.PRECURSOR_ID,
'QUERY' AS FEATURE_TYPE
FROM osw.FEATURE_MS2_ALIGNMENT_CANDIDATE AS fmac
WHERE fmac.SELECTED = 1
AND fmac.MAPPING_CONFIDENCE >= {min_confidence}
AND fmac.REFERENCE_FEATURE_ID != fmac.ALIGNED_FEATURE_ID
AND fmac.ALIGNED_FEATURE_ID != -1
) AS merged
ORDER BY
ALIGNMENT_GROUP_ID,
CASE merged.FEATURE_TYPE
WHEN 'REFERENCE' THEN 0
WHEN 'QUERY' THEN 1
END;
"""

df = con.execute(query).fetchdf()
return df.rename(columns=str.lower)

logger.warning(
"Requested FEATURE_MS2_ALIGNMENT_CANDIDATE for IPF propagation, "
"but the table was not found. Falling back to FEATURE_MS2_ALIGNMENT."
)
Comment on lines +307 to +357

if not check_duckdb_table(
con, "main", "FEATURE_MS2_ALIGNMENT"
Expand Down Expand Up @@ -534,6 +590,64 @@ def _read_pyp_transition_sqlite(self, con):

def _fetch_alignment_features_sqlite(self, con):
pep_threshold = self.config.ipf_max_alignment_pep
use_alignment_candidates = self.config.use_alignment_candidates
min_confidence = self.config.min_alignment_mapping_confidence

if use_alignment_candidates:
if check_sqlite_table(con, "FEATURE_MS2_ALIGNMENT_CANDIDATE"):
logger.info(
"Using FEATURE_MS2_ALIGNMENT_CANDIDATE for across-run alignment groups "
f"with MAPPING_CONFIDENCE >= {min_confidence}."
)
query = """
SELECT
DENSE_RANK() OVER (ORDER BY PRECURSOR_ID, ALIGNMENT_ID) AS ALIGNMENT_GROUP_ID,
ALIGNMENT_ID,
FEATURE_ID,
PRECURSOR_ID,
FEATURE_TYPE
FROM (
SELECT DISTINCT
ALIGNMENT_ID,
PRECURSOR_ID,
REFERENCE_FEATURE_ID AS FEATURE_ID,
'REFERENCE' AS FEATURE_TYPE
FROM FEATURE_MS2_ALIGNMENT_CANDIDATE
WHERE SELECTED = 1
AND MAPPING_CONFIDENCE >= ?
AND REFERENCE_FEATURE_ID != ALIGNED_FEATURE_ID
AND ALIGNED_FEATURE_ID != -1

Comment on lines +596 to +620
UNION

SELECT DISTINCT
ALIGNMENT_ID,
PRECURSOR_ID,
ALIGNED_FEATURE_ID AS FEATURE_ID,
'QUERY' AS FEATURE_TYPE
FROM FEATURE_MS2_ALIGNMENT_CANDIDATE
WHERE SELECTED = 1
AND MAPPING_CONFIDENCE >= ?
AND REFERENCE_FEATURE_ID != ALIGNED_FEATURE_ID
AND ALIGNED_FEATURE_ID != -1
) AS feature_list
ORDER BY
ALIGNMENT_GROUP_ID,
CASE FEATURE_TYPE
WHEN 'REFERENCE' THEN 0
WHEN 'QUERY' THEN 1
END
"""

df = pd.read_sql_query(
query, con, params=[min_confidence, min_confidence]
)
return df.rename(columns=str.lower)

logger.warning(
"Requested FEATURE_MS2_ALIGNMENT_CANDIDATE for IPF propagation, "
"but the table was not found. Falling back to FEATURE_MS2_ALIGNMENT."
)

if not check_sqlite_table(
con, "FEATURE_MS2_ALIGNMENT"
Expand Down
Loading