bigbio · ypriverol · May 26, 2026 · May 25, 2026 · May 25, 2026
diff --git a/.gitignore b/.gitignore
@@ -65,10 +65,16 @@ benchmark/*
 !benchmark/README.md
 !benchmark/ci/
 !benchmark/capture-references.sh
+!benchmark/vm/
 
 # Parity-analysis docs (iter-by-iter notes + diff CSVs) are local-only
 # development context, not part of the shipped repo.
-docs/parity-analysis/
+docs/parity-analysis/*
+!docs/parity-analysis/notes/
+!docs/parity-analysis/notes/2026-05-25-precursor-cal-ship-gates.md
+!docs/parity-analysis/notes/2026-05-25-spece-tail-exploration.md
+!docs/parity-analysis/snapshots/
+!docs/parity-analysis/snapshots/cal-shifts-2026-05-25.json
 
 # Java reference outputs from `mvn -Pcapture-references` — large; not committed.
 references/

diff --git a/DOCS.md b/DOCS.md
@@ -37,15 +37,15 @@ All flags use kebab-case long options (`--flag-name`). Several flags also accept
 | Flag | Type | Default | Description | Legacy form |
 |---|---|---|---|---|
 | `--precursor-tol-ppm` | f64 | `20.0` | Symmetric precursor mass tolerance in parts per million. | Java `-t 20ppm` |
-| `--charge-min` | u8 | `2` | Minimum precursor charge to try when the spectrum record does not specify charge. | *(no direct Java flag; set via param file in Java)* |
-| `--charge-max` | u8 | `3` | Maximum precursor charge to try when charge is missing from the spectrum. | *(same)* |
+| `--charge-min` | u8 | `2` | Minimum precursor charge to try when the spectrum record does not specify charge. Must be ≤ `--charge-max` (inverted ranges are rejected at startup). | *(no direct Java flag; set via param file in Java)* |
+| `--charge-max` | u8 | `3` | Maximum precursor charge to try when charge is missing from the spectrum. Must be ≥ `--charge-min`. | *(same)* |
 | `--enzyme-specificity` | enum | `fully` | Enzymatic cleavage enforcement at peptide termini (Number of Tolerable Termini). `fully`: both termini must be cleavage sites (Java `-ntt 2`). `semi`: at least one terminus (Java `-ntt 1`). `non-specific`: neither required (Java `-ntt 0`). | `--ntt` alias; numeric `0`/`1`/`2` |
 | `--max-missed-cleavages` | u32 | `1` | Maximum missed enzymatic cleavages allowed per candidate peptide. | Java `-maxMissedCleavages 1` |
 | `--min-length` | u32 | `6` | Minimum peptide length in residues (excluding flanking context). | Java `-minLength 6` |
 | `--max-length` | u32 | `40` | Maximum peptide length in residues. | Java `-maxLength 40` |
 | `--top-n` | u32 | `10` | Maximum PSMs retained per spectrum (ranked by SpecEValue). | Java `-n 10` |
-| `--isotope-error-min` | i8 | `-1` | Minimum isotope error offset to evaluate during precursor matching. | Java `-ti -1,2` (first value) |
-| `--isotope-error-max` | i8 | `2` | Maximum isotope error offset to evaluate. | Java `-ti -1,2` (second value) |
+| `--isotope-error-min` | i8 | `-1` | Minimum isotope error offset to evaluate during precursor matching. Must be ≤ `--isotope-error-max`. | Java `-ti -1,2` (first value) |
+| `--isotope-error-max` | i8 | `2` | Maximum isotope error offset to evaluate. Must be ≥ `--isotope-error-min`. | Java `-ti -1,2` (second value) |
 | `--min-peaks` | u32 | `10` | Minimum number of MS2 peaks required to score a spectrum; spectra below this threshold are skipped. | Java `-minNumPeaks 10` |
 
 ### Modifications
@@ -165,7 +165,7 @@ msgf-rust writes Percolator `.pin` (always) and optionally `.tsv`. Implementatio
 
 ### 3a. PIN columns
 
-Tab-separated, one header row, one row per PSM. Rows are sorted best-first within each spectrum (lowest SpecEValue first). Charge one-hot columns are emitted for every integer charge in `[--charge-min, --charge-max]`; the table below uses the default range 2–3 (`charge2`, `charge3`).
+Tab-separated, one header row, one row per PSM. Rows are sorted best-first within each spectrum (lowest SpecEValue first). With default `--charge-min 2 --charge-max 3`, the header has **36 columns**: 35 Java-parity fields plus Rust-only `EdgeScore` (before `Peptide`). Additional charge states add one `chargeN` column each.
 
 | Column | Type | Description |
 |---|---|---|
@@ -220,7 +220,7 @@ Tab-separated human-readable report. The `Title` column appears **only for MGF**
 | `Title` | string | MGF `TITLE=` field. |
 | `FragMethod` | string | Activation method name (`HCD`, `CID`, …) or `UNKNOWN`. |
 | `Precursor` | float | Precursor m/z (4 decimal places). |
-| `IsotopeError` | int | Always `0` in current release (winning offset not threaded to TSV). |
+| `IsotopeError` | int | Winning isotope offset (same value as PIN `isotope_error`). |
 | `PrecursorError(ppm)` | float | Mass error in ppm when tolerance is ppm mode; column named `PrecursorError(Da)` in Da mode. |
 | `Charge` | int | Assigned precursor charge. |
 | `Peptide` | string | Annotated peptide sequence with modifications. |
@@ -242,14 +242,16 @@ Use **PIN** when the goal is FDR calibration or rescoring: Percolator, MS²Resco
 
 ## 4. Auto-detection
 
-For mzML inputs, when `--fragmentation auto`, `--instrument low-res`, and `--protocol auto` (the CLI defaults), msgf-rust peeks the input file before loading the full dataset:
+For **mzML** inputs when `--fragmentation auto` (the default), msgf-rust peeks the input file before loading the full dataset:
 
 1. **Activation method** — histogram of `<activation>` cvParams across the first 64 MS2 spectra; dominant method wins. Mixed methods trigger an stderr warning but the dominant method is still used file-wide.
 2. **Instrument class** — scans `<instrumentConfiguration>` / analyzer cvParams via `input::detect_instrument_type`; dominant analyzer among MS2 spectra wins. `None` → `low-res` (Java `LOW_RESOLUTION_LTQ` default).
 
-MGF files carry no activation or instrument metadata → auto-detect returns `None` → bundled default `HCD_QExactive_Tryp.param` unless explicit `--fragmentation` / `--instrument` flags override.
+The CLI `--instrument` flag does **not** gate this path — only `--fragmentation auto` + mzML extension does. When peek succeeds, instrument is taken from the file; `--protocol` from the CLI is still used to pick protocol-suffixed `.param` files (e.g. `_TMT`).
 
-Explicit `--fragmentation` (non-auto) or non-default `--instrument` disables the activation peek and uses flag-based resolution directly (§1).
+MGF files carry no activation or instrument metadata → auto-detect returns `None` → bundled default `HCD_QExactive_Tryp.param` unless explicit `--fragmentation` / `--instrument` flags override via `resolve_bundled_param`.
+
+Non-auto `--fragmentation` (e.g. `HCD`, `3`) disables the activation peek and uses flag-based resolution directly (§1), including `--instrument` and `--protocol` from the CLI.
 
 ### Activation CV mapping (mzML `<activation>` cvParam accession → method)
 
@@ -452,6 +454,7 @@ msgf-rust accepts **both** canonical kebab-case flags with named enum values **a
 | `-maxMissedCleavages 1` | `--max-missed-cleavages 1` | — |
 | `-minNumPeaks 10` | `--min-peaks 10` | — |
 | `-n 10` | `--top-n 10` | — |
+| `-precursorCal auto\|on\|off` | `--precursor-cal auto\|on\|off` | — |
 | model path / `-conf` | `--param-file <FILE>` | — |
 
 ### 8b. Numeric-legacy values
@@ -476,11 +479,35 @@ On PSMs where Java and Rust agree on scan + top-1 peptide (the "agreement bucket
 
 | Feature | Divergence | Status |
 |---|---|---|
-| `lnEValue` | ~4 orders of magnitude mean shift (Rust more confident) | Deferred — `num_distinct` semantics differ (`known-divergences.md` #2) |
+| `lnEValue` | ~4 orders of magnitude mean shift (Rust more confident) | Deferred — `num_distinct` semantics differ (see item #2 below) |
 | `MeanRelErrorTop7` / `MeanErrorTop7` / `StdevRelErrorTop7` | >1% relative difference on ~99% of agreement-bucket PSMs | Deferred — error-stat normalization differs |
-| BSA charge-3 SpecEValue (BSA.fasta + test.mgf fixture) | 1–4 OOM gap depending on deconvolution iteration | Known — deconvolution implementation divergence (`known-divergences.md` #3); kept as dev-branch smoke gate |
+| BSA charge-3 SpecEValue (BSA.fasta + test.mgf fixture) | 1–4 OOM gap depending on deconvolution iteration | Known — deconvolution implementation divergence; kept as dev-branch smoke gate (`gf_java_parity` tests) |
+
+Percolator's learned weights absorb these distribution shifts; rescored FDR results remain competitive or better than Java on `--precursor-cal off` benchmarks.
+
+### 8e. precursorCal ship gates (`--precursor-cal auto`)
+
+Java `-precursorCal auto` runs a file-wide pre-pass (sampled mini-search → median ppm
+shift → tightened precursor tolerance) before the main search. msgf-rust ports this
+in `mass_calibrator.rs` / `precursor_cal.rs`.
 
-Percolator's learned weights absorb these distribution shifts; rescored FDR results remain competitive or better than Java.
+**G1 gate:** Rust @1% FDR within ±1% of Java on all three sign-off datasets (LFQ,
+Astral, TMT) with matching cal mode. Fair comparison requires explicit Rust routing
+flags — especially TMT (`--fragmentation CID --instrument high-res --protocol TMT`).
+Harness: [`benchmark/vm/run_bench_calauto_3ds.sh`](benchmark/vm/run_bench_calauto_3ds.sh).
+
+As of 2026-05-25 (fair v5 gate, `bench-calauto-v5.log`) the calibrator is
+validated (shift + tightening parity), but G1 **fails** on all three datasets:
+LFQ −2.2% (cal skipped), Astral +1.2%, TMT −5.9%. Remaining
+gaps trace to **SpecE tail / Percolator feature parity** (same class as historical
+Astral GF work), not calibrator logic. Full analysis:
+[`docs/parity-analysis/notes/2026-05-25-precursor-cal-ship-gates.md`](docs/parity-analysis/notes/2026-05-25-precursor-cal-ship-gates.md).
+
+| `--precursor-cal` | Ship recommendation |
+|---|---|
+| `off` | Yes — baseline unchanged (**CLI default** until G1) |
+| `auto` | Opt-in only — no until G1 passes |
+| `on` | Opt-in only — no until G1 passes |
 
 ---
 

diff --git a/benchmark/README.md b/benchmark/README.md
@@ -4,6 +4,11 @@ Only the **CI benchmark scaffold** is committed under this directory; heavy
 local-only harnesses and artifacts (`data/`, `results/`, prebuilt JARs, etc.)
 are intentionally gitignored and not distributed with the fork.
 
+The PXD001819 CI scripts under `ci/` were written for the Java MS-GF+ build
+(`mvn`, mzIdentML output). The Rust binary uses PIN/TSV and a separate CI
+workflow (`.github/workflows/ci.yml`); Rust benchmark automation is not yet
+wired to this scaffold.
+
 ## Datasets
 
 | Dataset | PXD | Instrument | Type | Spectra Source | FASTA / SDRF |
@@ -37,3 +42,10 @@ self-hosted runner profile (`self-hosted,linux,msgf-benchmark`) to keep
 CPU/memory comparable between runs.
 
 See [`benchmark/ci/README.md`](ci/README.md) for commands.
+
+## VM calauto gate (precursorCal)
+
+The three-dataset Java-vs-Rust sign-off harness lives under [`benchmark/vm/`](vm/).
+Use it on the self-hosted bench VM with `--precursor-cal auto` / `-precursorCal auto`.
+See [`benchmark/vm/README.md`](vm/README.md) and
+[`docs/parity-analysis/notes/2026-05-25-precursor-cal-ship-gates.md`](../docs/parity-analysis/notes/2026-05-25-precursor-cal-ship-gates.md).
diff --git a/benchmark/ci/README.md b/benchmark/ci/README.md
@@ -15,3 +15,4 @@ GitHub Actions: workflow **Benchmark PXD001819** (`workflow_dispatch`) on `self-
 | `extract_metrics.py` | Streams the mzIdentML (ElementTree `iterparse`) to count SII and PSMs at 1% FDR; also extracts RSS/CPU from `time -v` |
 | `compare_metrics.py` | Compares key=value metrics to the baseline TSV |
 | `test_compare_metrics.py` | Unit tests for the comparator |
+| `run_bench_calauto_3ds.sh` | Three-dataset precursorCal harness (LFQ/Astral/TMT). Runs `--precursor-cal auto` and writes per-dataset PINs. Pair with Percolator for the G1 ship gate. Dataset paths default to the bigbio bench VM layout — override via env vars (see script header). Status documented in `docs/parity-analysis/notes/2026-05-25-precursor-cal-ship-gates.md`. |
diff --git a/benchmark/ci/run_bench_calauto_3ds.sh b/benchmark/ci/run_bench_calauto_3ds.sh
@@ -0,0 +1,72 @@
+#!/usr/bin/env bash
+#
+# Three-dataset precursor-cal bench harness (G1 ship gate).
+#
+# Runs msgf-rust with `--precursor-cal auto` against LFQ (PXD001819), Astral,
+# and TMT fixtures. Pair with `compare_*_3arm_percolator.sh` to compute
+# Percolator @1% FDR vs the Java baseline.
+#
+# G1 ship gate: Rust @1% FDR within ±1% of Java on all three datasets.
+# See `docs/parity-analysis/notes/2026-05-25-precursor-cal-ship-gates.md`
+# for the current gate status (G1 NOT yet closed in PR-A).
+#
+# Override dataset paths via env vars; defaults match the bigbio bench VM
+# layout.
+
+set -euo pipefail
+
+LFQ_MGF="${LFQ_MGF:-/srv/data/msgf-bench/PXD001819/sample.mgf}"
+LFQ_FASTA="${LFQ_FASTA:-/srv/data/msgf-bench/PXD001819/human-uniprot-contaminants.fasta}"
+LFQ_PARAM="${LFQ_PARAM:-HCD_QExactive_Tryp.param}"
+
+ASTRAL_MZML="${ASTRAL_MZML:-/srv/data/msgf-bench/astral/sample.mzML}"
+ASTRAL_FASTA="${ASTRAL_FASTA:-/srv/data/msgf-bench/astral/human.fasta}"
+ASTRAL_PARAM="${ASTRAL_PARAM:-HCD_HighRes_Tryp.param}"
+
+TMT_MGF="${TMT_MGF:-/srv/data/msgf-bench/tmt/sample.mgf}"
+TMT_FASTA="${TMT_FASTA:-/srv/data/msgf-bench/tmt/uniprot.fasta}"
+TMT_PARAM="${TMT_PARAM:-HCD_HighRes_Tryp_TMT.param}"
+
+OUT_DIR="${OUT_DIR:-./bench-results/calauto-$(date +%Y%m%d-%H%M)}"
+MSGF_RUST="${MSGF_RUST:-./target/release/msgf-rust}"
+MODE="${MODE:-auto}"
+
+mkdir -p "${OUT_DIR}"
+
+if [ ! -x "${MSGF_RUST}" ]; then
+  echo "ERROR: msgf-rust binary not found at ${MSGF_RUST}" >&2
+  echo "Run: cargo build --release -p msgf-rust" >&2
+  exit 1
+fi
+
+run_one() {
+  local label="$1" spectra="$2" fasta="$3" param="$4" extra="${5:-}"
+  if [ ! -f "${spectra}" ]; then
+    echo "WARN: skipping ${label} (spectra missing: ${spectra})" >&2
+    return 0
+  fi
+  if [ ! -f "${fasta}" ]; then
+    echo "WARN: skipping ${label} (fasta missing: ${fasta})" >&2
+    return 0
+  fi
+  echo "=== ${label} (--precursor-cal ${MODE}) ==="
+  local pin_path="${OUT_DIR}/${label}.pin"
+  local log_path="${OUT_DIR}/${label}.log"
+  /usr/bin/time -v "${MSGF_RUST}" \
+    --spectrum "${spectra}" \
+    --database "${fasta}" \
+    --param-file "${param}" \
+    --precursor-cal "${MODE}" \
+    --output-pin "${pin_path}" \
+    ${extra} \
+    > "${log_path}" 2>&1
+  echo "wrote ${pin_path} (log: ${log_path})"
+}
+
+run_one "lfq"    "${LFQ_MGF}"    "${LFQ_FASTA}"    "${LFQ_PARAM}"
+run_one "astral" "${ASTRAL_MZML}" "${ASTRAL_FASTA}" "${ASTRAL_PARAM}"
+run_one "tmt"    "${TMT_MGF}"    "${TMT_FASTA}"    "${TMT_PARAM}"
+
+echo
+echo "Bench complete. PINs in ${OUT_DIR}/"
+echo "Next: feed each PIN to Percolator and compare 1% FDR target counts vs Java."
diff --git a/crates/model/src/peptide.rs b/crates/model/src/peptide.rs
@@ -53,6 +53,14 @@ impl Peptide {
         self.neutral_mass
     }
 
+    /// Residue-only neutral mass (no terminal `H2O`).
+    ///
+    /// Matches Java `DatabaseMatch.getPeptideMass()` and the calibrator's
+    /// `(precursor_mz - proton) * charge - H2O` observed mass convention.
+    pub fn residue_mass(&self) -> f64 {
+        self.neutral_mass - H2O
+    }
+
     pub fn nominal_mass(&self) -> i32 {
         self.nominal_mass
     }
@@ -218,6 +226,13 @@ mod tests {
         assert_eq!(p.mass().to_bits(), expected.to_bits());
     }
 
+    #[test]
+    fn residue_mass_excludes_h2o() {
+        let p = unmod_pep(b"GA");
+        assert_eq!(p.residue_mass().to_bits(), (p.mass() - H2O).to_bits());
+        assert_eq!(p.nominal_residue_mass(), nominal_from(p.residue_mass()));
+    }
+
     #[test]
     fn mass_includes_mod_deltas() {
         let oxidation = Modification {