OpenFreeEnergy · hannahbaumann · Dec 18, 2025 · Dec 18, 2025 · Dec 18, 2025 · Dec 19, 2025
diff --git a/src/openfe_analysis/__init__.py b/src/openfe_analysis/__init__.py
@@ -3,6 +3,6 @@
 from .reader import FEReader
 from .transformations import (
     Aligner,
-    Minimiser,
+    ClosestImageShift,
     NoJump,
 )
diff --git a/src/openfe_analysis/reader.py b/src/openfe_analysis/reader.py
@@ -1,4 +1,5 @@
-from typing import Optional
+import pathlib
+from typing import Literal, Optional
 
 import netCDF4 as nc
 import numpy as np
@@ -52,16 +53,20 @@ def _determine_iteration_dt(dataset) -> float:
 
 
 class FEReader(ReaderBase):
-    """A MDAnalysis Reader for NetCDF files created by
+    """
+    MDAnalysis Reader for NetCDF files created by
     `openmmtools.multistate.MultiStateReporter`
 
-    Looks along a multistate NetCDF file along one of two axes:
-      - constant state/lambda (varying replica)
-      - constant replica (varying lambda)
+    Provides a 1D trajectory along either:
+
+    - constant Hamiltonian state (`index_method="state"`)
+    - constant replica (`index_method="replica"`)
+
+    selected via the `index` argument.
     """
 
-    _state_id: Optional[int]
-    _replica_id: Optional[int]
+    _multistate_index: Optional[int]
+    _index_method: Optional[str]
     _frame_index: int
     _dataset: nc.Dataset
     _dataset_owner: bool
@@ -70,35 +75,27 @@ class FEReader(ReaderBase):
 
     units = {"time": "ps", "length": "nanometer"}
 
-    def __init__(self, filename, convert_units=True, state_id=None, replica_id=None, **kwargs):
+    def __init__(
+        self,
+        filename: str | pathlib.Path | nc.Dataset,
+        *,
+        index: int,
+        index_method: Literal["state", "replica"] = "state",
+        convert_units: bool = True,
+        **kwargs,
+    ):
         """
         Parameters
         ----------
         filename : pathlike or nc.Dataset
-          path to the .nc file
+            Path to the .nc file or an open Dataset.
+        index : int
+            Index of the state or replica to extract. May be negative.
+        index_method : {"state", "replica"}, default "state"
+            Whether `index` refers to a Hamiltonian state or a replica.
         convert_units : bool
-          convert positions to Angstrom
-        state_id : Optional[int]
-          The Hamiltonian state index to extract. Must be defined if
-          ``replica_id`` is not defined. May be negative (see notes below).
-        replica_id : Optional[int]
-          The replica index to extract. Must be defined if ``state_id``
-          is not defined. May be negative (see notes below).
-
-        Notes
-        -----
-        A negative index may be passed to either ``state_id`` or
-        ``replica_id``. This will be interpreted as indexing in reverse
-        starting from the last state/replica. For example, passing a
-        value of -2 for ``replica_id`` will select the before last replica.
+            Convert positions to Angstrom.
         """
-        if not ((state_id is None) ^ (replica_id is None)):
-            raise ValueError(
-                "Specify one and only one of state or replica, "
-                f"got state id={state_id} "
-                f"replica_id={replica_id}"
-            )
-
         super().__init__(filename, convert_units, **kwargs)
 
         if isinstance(filename, nc.Dataset):
@@ -108,15 +105,18 @@ def __init__(self, filename, convert_units=True, state_id=None, replica_id=None,
             self._dataset = nc.Dataset(filename)
             self._dataset_owner = True
 
-        # Handle the negative ID case
-        if state_id is not None and state_id < 0:
-            state_id = range(self._dataset.dimensions["state"].size)[state_id]
+        if index_method not in {"state", "replica"}:
+            raise ValueError(f"index_method must be 'state' or 'replica', got {index_method}")
+
+        self._index_method = index_method
 
-        if replica_id is not None and replica_id < 0:
-            replica_id = range(self._dataset.dimensions["replica"].size)[replica_id]
+        # Handle the negative ID case
+        if index_method == "state":
+            size = self._dataset.dimensions["state"].size
+        else:
+            size = self._dataset.dimensions["replica"].size
 
-        self._state_id = state_id
-        self._replica_id = replica_id
+        self._multistate_index = index % size
 
         self._n_atoms = self._dataset.dimensions["atom"].size
         self.ts = Timestep(self._n_atoms)
@@ -131,6 +131,10 @@ def _format_hint(thing) -> bool:
         # can pass raw nc datasets through to reduce open/close operations
         return isinstance(thing, nc.Dataset)
 
+    @property
+    def multistate_index(self) -> int:
+        return self._multistate_index
+
     @property
     def n_atoms(self) -> int:
         return self._n_atoms
@@ -139,6 +143,10 @@ def n_atoms(self) -> int:
     def n_frames(self) -> int:
         return len(self._frames)
 
+    @property
+    def index_method(self) -> str:
+        return self._index_method
+
     @staticmethod
     def parse_n_atoms(filename, **kwargs) -> int:
         with nc.Dataset(filename) as ds:
@@ -153,17 +161,19 @@ def _read_next_timestep(self, ts=None) -> Timestep:
     def _read_frame(self, frame: int) -> Timestep:
         self._frame_index = frame
 
-        if self._state_id is not None:
+        frame = self._frames[self._frame_index]
+
+        if self._index_method == "state":
             rep = multistate._state_to_replica(
-                self._dataset, self._state_id, self._frames[self._frame_index]
+                self._dataset,
+                self._multistate_index,
+                frame,
             )
         else:
-            rep = self._replica_id
+            rep = self._multistate_index
 
-        pos = multistate._replica_positions_at_frame(
-            self._dataset, rep, self._frames[self._frame_index]
-        )
-        dim = multistate._get_unitcell(self._dataset, rep, self._frames[self._frame_index])
+        pos = multistate._replica_positions_at_frame(self._dataset, rep, frame)
+        dim = multistate._get_unitcell(self._dataset, rep, frame)
 
         if pos is None:
             errmsg = (

diff --git a/src/openfe_analysis/rmsd.py b/src/openfe_analysis/rmsd.py
@@ -7,10 +7,12 @@
 import numpy as np
 import tqdm
 from MDAnalysis.analysis import rms
+from MDAnalysis.lib.mdamath import make_whole
+from MDAnalysis.transformations import unwrap
 from numpy import typing as npt
 
 from .reader import FEReader
-from .transformations import Aligner, Minimiser, NoJump
+from .transformations import Aligner, ClosestImageShift, NoJump
 
 
 def make_Universe(top: pathlib.Path, trj: nc.Dataset, state: int) -> mda.Universe:
@@ -34,24 +36,29 @@ def make_Universe(top: pathlib.Path, trj: nc.Dataset, state: int) -> mda.Univers
     u = mda.Universe(
         top,
         trj,
-        state_id=state,
+        index=state,
+        view="state",
         format=FEReader,
     )
     prot = u.select_atoms("protein and name CA")
     ligand = u.select_atoms("resname UNK")
 
     if prot:
-        # if there's a protein in the system:
-        # - make the protein not jump periodic images between frames
-        # - put the ligand in the closest periodic image as the protein
-        # - align everything to minimise protein RMSD
-        nope = NoJump(prot)
-        minnie = Minimiser(prot, ligand)
+        # Unwrap all atoms
+        unwrap_tr = unwrap(prot)
+
+        # Shift chains + ligand
+        chains = [seg.atoms for seg in prot.segments]
+        shift = ClosestImageShift(chains[0], [*chains[1:], ligand])
+        # Make each protein chain whole
+        for frag in prot.fragments:
+            make_whole(frag, reference_atom=frag[0])
+
         align = Aligner(prot)
 
         u.trajectory.add_transformations(
-            nope,
-            minnie,
+            unwrap_tr,
+            shift,
             align,
         )
     else:
@@ -129,9 +136,9 @@ def gather_rms_data(
             # TODO: Some smart guard to avoid allocating a silly amount of memory?
             prot2d = np.empty((len(u.trajectory[::skip]), len(prot), 3), dtype=np.float32)
 
-            prot_start = prot.positions
-            # prot_weights = prot.masses / np.mean(prot.masses)
-            ligand_start = ligand.positions
+            # Would this copy be safer?
+            prot_start = prot.positions.copy()
+            ligand_start = ligand.positions.copy()
             ligand_initial_com = ligand.center_of_mass()
             ligand_weights = ligand.masses / np.mean(ligand.masses)
 

diff --git a/src/openfe_analysis/tests/test_reader.py b/src/openfe_analysis/tests/test_reader.py
@@ -44,7 +44,7 @@ def test_determine_position_indices_warns_for_old_nc(tmp_path):
 
 
 def test_universe_creation(simulation_nc, hybrid_system_pdb):
-    u = mda.Universe(hybrid_system_pdb, simulation_nc, format=FEReader, state_id=0)
+    u = mda.Universe(hybrid_system_pdb, simulation_nc, format=FEReader, index=0)
 
     # Check that a Universe exists
     assert u
@@ -92,7 +92,7 @@ def test_universe_creation(simulation_nc, hybrid_system_pdb):
 
 def test_universe_from_nc_file(simulation_skipped_nc, hybrid_system_skipped_pdb):
     with nc.Dataset(simulation_skipped_nc) as ds:
-        u = mda.Universe(hybrid_system_skipped_pdb, ds, format="MultiStateReporter", state_id=0)
+        u = mda.Universe(hybrid_system_skipped_pdb, ds, format="MultiStateReporter", index=0)
 
         assert u
         assert len(u.atoms) == 9178
@@ -105,7 +105,7 @@ def test_universe_creation_noconversion(simulation_skipped_nc, hybrid_system_ski
         hybrid_system_skipped_pdb,
         simulation_skipped_nc,
         format=FEReader,
-        state_id=0,
+        index=0,
         convert_units=False,
     )
     assert u.trajectory.ts.frame == 0
@@ -124,51 +124,59 @@ def test_universe_creation_noconversion(simulation_skipped_nc, hybrid_system_ski
 
 
 def test_fereader_negative_state(simulation_skipped_nc, hybrid_system_skipped_pdb):
-    u = mda.Universe(hybrid_system_skipped_pdb, simulation_skipped_nc, format=FEReader, state_id=-1)
+    u = mda.Universe(hybrid_system_skipped_pdb, simulation_skipped_nc, format=FEReader, index=-1)
 
-    assert u.trajectory._state_id == 10
-    assert u.trajectory._replica_id is None
+    assert u.trajectory._multistate_index == 10
     u.trajectory.close()
 
 
 def test_fereader_negative_replica(simulation_skipped_nc, hybrid_system_skipped_pdb):
     u = mda.Universe(
-        hybrid_system_skipped_pdb, simulation_skipped_nc, format=FEReader, replica_id=-2
+        hybrid_system_skipped_pdb,
+        simulation_skipped_nc,
+        format=FEReader,
+        index=-2,
+        index_method="replica",
     )
 
-    assert u.trajectory._state_id is None
-    assert u.trajectory._replica_id == 9
+    assert u.trajectory._multistate_index == 9
+    assert u.trajectory._index_method == "replica"
     u.trajectory.close()
 
 
 @pytest.mark.parametrize("rep_id, state_id", [[None, None], [1, 1]])
 def test_fereader_replica_state_id_error(
     simulation_skipped_nc, hybrid_system_skipped_pdb, rep_id, state_id
 ):
-    with pytest.raises(ValueError, match="Specify one and only one"):
+    with pytest.raises(ValueError, match="index_method must be 'state'"):
         _ = mda.Universe(
             hybrid_system_skipped_pdb,
             simulation_skipped_nc,
             format=FEReader,
-            state_id=state_id,
-            replica_id=rep_id,
+            index=0,
+            index_method="wrong",
         )
 
 
 def test_simulation_skipped_nc(simulation_skipped_nc, hybrid_system_skipped_pdb):
+    from MDAnalysis.transformations import wrap
+
     u = mda.Universe(
         hybrid_system_skipped_pdb,
         simulation_skipped_nc,
         format=FEReader,
-        replica_id=0,
+        index=0,
+        index_method="replica",
     )
+    # Wrap all atoms inside the simulation box
+    u.trajectory.add_transformations(wrap(u.atoms))
     assert len(u.trajectory) == 51
     assert u.trajectory.n_frames == 51
     assert u.trajectory.dt == 100
     times = np.arange(0, 5001, 100)
     for inx, ts in enumerate(u.trajectory):
         assert ts.time == times[inx]
-        # Positions are not all zero since PBC is not removed
+        assert np.all(u.atoms.positions > 0)
         assert np.any(u.atoms.positions != 0)
     with pytest.raises(mda.exceptions.NoDataError, match="This Timestep has no velocities"):
         u.atoms.velocities