From dc028c062eeb7077c469ec0055e30280fc24a5be Mon Sep 17 00:00:00 2001 From: Jackson Burns <33505528+JacksonBurns@users.noreply.github.com> Date: Thu, 9 Apr 2026 14:07:54 -0400 Subject: [PATCH 1/4] RDKit `GetSSSR` Non-determinism Fix Goal of this PR is to resolve: https://github.com/ReactionMechanismGenerator/RMG-Py/issues/2912 The RDKit `GetSSSR` algorithm _is_ deterministic for the same input (unlike the previous RMG version) __but__ the process of converting the RMG molecule into RDKit does _not_ guarantee the same ordering of atoms, which results in non-determinism. This PR forces the RDKit molecule to have a consistent ordering, and should (hopefully) give the same results each time. To test this, we just need to run the regression tests multiple times on the same reference results (so, on the same day, without any changes to `main`). The results should be different from the `main` results, but in the same way. --- rmgpy/molecule/molecule.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/rmgpy/molecule/molecule.py b/rmgpy/molecule/molecule.py index a5d54a11d3..d3604ecad9 100644 --- a/rmgpy/molecule/molecule.py +++ b/rmgpy/molecule/molecule.py @@ -2685,7 +2685,24 @@ def get_smallest_set_of_smallest_rings(self, symmetrized=False): if symmetrized: ring_info = Chem.GetSymmSSSR(rdkit_mol) else: - ring_info = Chem.GetSSSR(rdkit_mol) + # Force deterministic SSSR by canonicalizing the atom numbering first. + # This prevents RDKit's arbitrary graph traversal from selecting + # different sets of equivalent rings across different runs/platforms. + ranks = list(Chem.CanonicalRankAtoms(rdkit_mol, breakTies=True)) + rank_to_idx = {rank: idx for idx, rank in enumerate(ranks)} + + # new_order maps the new atom index to the original atom index + new_order = [rank_to_idx[i] for i in range(rdkit_mol.GetNumAtoms())] + + canonical_mol = Chem.RenumberAtoms(rdkit_mol, new_order) + canonical_rings = Chem.GetSSSR(canonical_mol) + + # Map the resulting ring indices back to the original rdkit_mol numbering + ring_info = [] + for ring in canonical_rings: + orig_ring = tuple([new_order[idx] for idx in ring]) + ring_info.append(orig_ring) + for ring in ring_info: atom_ring = [self.atoms[idx] for idx in ring] sorted_ring = self.sort_cyclic_vertices(atom_ring) From 35e2986d459b35b4ab99c583174690d80cf1e4a5 Mon Sep 17 00:00:00 2001 From: Jackson Burns <33505528+JacksonBurns@users.noreply.github.com> Date: Thu, 9 Apr 2026 14:26:09 -0400 Subject: [PATCH 2/4] fix test collection failure --- rmgpy/molecule/molecule.py | 1 + 1 file changed, 1 insertion(+) diff --git a/rmgpy/molecule/molecule.py b/rmgpy/molecule/molecule.py index d3604ecad9..67ef943a41 100644 --- a/rmgpy/molecule/molecule.py +++ b/rmgpy/molecule/molecule.py @@ -2695,6 +2695,7 @@ def get_smallest_set_of_smallest_rings(self, symmetrized=False): new_order = [rank_to_idx[i] for i in range(rdkit_mol.GetNumAtoms())] canonical_mol = Chem.RenumberAtoms(rdkit_mol, new_order) + canonical_mol.UpdatePropertyCache(strict=False) canonical_rings = Chem.GetSSSR(canonical_mol) # Map the resulting ring indices back to the original rdkit_mol numbering From 609e867e46c045e5b44ee812d635c8d28e329e1e Mon Sep 17 00:00:00 2001 From: Jackson Burns <33505528+JacksonBurns@users.noreply.github.com> Date: Thu, 9 Apr 2026 16:25:50 -0400 Subject: [PATCH 3/4] reorder property cache update --- rmgpy/molecule/molecule.py | 32 +++++++++++--------------------- 1 file changed, 11 insertions(+), 21 deletions(-) diff --git a/rmgpy/molecule/molecule.py b/rmgpy/molecule/molecule.py index 67ef943a41..1d4f4752e4 100644 --- a/rmgpy/molecule/molecule.py +++ b/rmgpy/molecule/molecule.py @@ -2681,31 +2681,21 @@ def get_smallest_set_of_smallest_rings(self, symmetrized=False): return_mapping=False, save_order=True, ignore_bond_orders=True) - + rdkit_mol.UpdatePropertyCache(strict=False) + ranks = list(Chem.CanonicalRankAtoms(rdkit_mol, breakTies=True)) + rank_to_idx = {rank: idx for idx, rank in enumerate(ranks)} + new_order = [rank_to_idx[i] for i in range(rdkit_mol.GetNumAtoms())] + canonical_mol = Chem.RenumberAtoms(rdkit_mol, new_order) if symmetrized: - ring_info = Chem.GetSymmSSSR(rdkit_mol) + ring_info = Chem.GetSymmSSSR(canonical_mol) else: - # Force deterministic SSSR by canonicalizing the atom numbering first. - # This prevents RDKit's arbitrary graph traversal from selecting - # different sets of equivalent rings across different runs/platforms. - ranks = list(Chem.CanonicalRankAtoms(rdkit_mol, breakTies=True)) - rank_to_idx = {rank: idx for idx, rank in enumerate(ranks)} - - # new_order maps the new atom index to the original atom index - new_order = [rank_to_idx[i] for i in range(rdkit_mol.GetNumAtoms())] - - canonical_mol = Chem.RenumberAtoms(rdkit_mol, new_order) - canonical_mol.UpdatePropertyCache(strict=False) - canonical_rings = Chem.GetSSSR(canonical_mol) - - # Map the resulting ring indices back to the original rdkit_mol numbering - ring_info = [] - for ring in canonical_rings: - orig_ring = tuple([new_order[idx] for idx in ring]) - ring_info.append(orig_ring) + ring_info = Chem.GetSSSR(canonical_mol) for ring in ring_info: - atom_ring = [self.atoms[idx] for idx in ring] + # Map the new canonical indices back to the original RMG atom indices + original_idx_ring = [new_order[idx] for idx in ring] + atom_ring = [self.atoms[idx] for idx in original_idx_ring] + sorted_ring = self.sort_cyclic_vertices(atom_ring) sssr.append(sorted_ring) if symmetrized: From a0dbb44ecb741b392ecffb00ea98d26e6e3e2693 Mon Sep 17 00:00:00 2001 From: JacksonBurns Date: Mon, 13 Apr 2026 18:07:53 -0400 Subject: [PATCH 4/4] update expected atom ordering in test --- test/rmgpy/molecule/moleculeTest.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/rmgpy/molecule/moleculeTest.py b/test/rmgpy/molecule/moleculeTest.py index 43c4b4e33a..2c3896148d 100644 --- a/test/rmgpy/molecule/moleculeTest.py +++ b/test/rmgpy/molecule/moleculeTest.py @@ -3172,8 +3172,8 @@ def test_get_all_polycyclic_vertices(self): """) polycyclic_vertices = mol.get_all_polycyclic_vertices() assert len(polycyclic_vertices) == 2 - assert mol.atoms[0] is mol.get_all_polycyclic_vertices()[0] - assert mol.atoms[1] is mol.get_all_polycyclic_vertices()[1] + assert mol.atoms[0] is mol.get_all_polycyclic_vertices()[1] + assert mol.atoms[1] is mol.get_all_polycyclic_vertices()[0] # Spirocyclic molecule mol = Molecule().from_adjacency_list("""