From b7be66d9049626acb29d2d580dd3412b4f26b125 Mon Sep 17 00:00:00 2001 From: Matt Johnson Date: Fri, 27 Mar 2026 16:05:37 -0700 Subject: [PATCH 01/12] Add pysidt-rmg to environment --- environment.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/environment.yml b/environment.yml index 39a5da8a66..39f4666f0f 100644 --- a/environment.yml +++ b/environment.yml @@ -47,6 +47,7 @@ dependencies: - conda-forge::cclib >=1.6.3,<1.9 - conda-forge::openbabel >= 3 - conda-forge::rdkit >=2022.09.1 + - rmg::pysidt-rmg >=1.2 # Python tools - conda-forge::python >=3.9,<3.12 # leave as GEQ so that GitHub actions can add EQ w/o breaking (contradictory deps) From 9a8768cc7306813f8b0bde638770c688d5def06d Mon Sep 17 00:00:00 2001 From: Matt Johnson Date: Fri, 27 Mar 2026 16:09:32 -0700 Subject: [PATCH 02/12] add pysidt imports in data/thermo.py --- rmgpy/data/thermo.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/rmgpy/data/thermo.py b/rmgpy/data/thermo.py index 2ab242de81..fa93ee2f40 100644 --- a/rmgpy/data/thermo.py +++ b/rmgpy/data/thermo.py @@ -40,6 +40,9 @@ import numpy as np +from pysidt import read_nodes, MultiTargetSingleEvalSubgraphIsomorphicDecisionTree +from pysidt.utils import find_shortest_paths + import rmgpy.constants as constants import rmgpy.molecule import rmgpy.quantity From 5ca40a9b85be82f92704f4c32c3c1249345bfc6a Mon Sep 17 00:00:00 2001 From: Matt Johnson Date: Fri, 27 Mar 2026 22:28:59 -0700 Subject: [PATCH 03/12] add pysidt to conda build dependencies --- .conda/meta.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.conda/meta.yaml b/.conda/meta.yaml index c9a456c713..9961ed1664 100644 --- a/.conda/meta.yaml +++ b/.conda/meta.yaml @@ -63,6 +63,7 @@ requirements: - conda-forge::gprof2dot - conda-forge::numdifftools - conda-forge::quantities !=0.16.0,!=0.16.1 + - rmg::pysidt-rmg >=1.2 - rmg::pydas >=1.0.3 - rmg::pydqed >=1.0.3 - rmg::symmetry From 1c4a98f93c115d5ff4d63a3c5cf15ac476e19e01 Mon Sep 17 00:00:00 2001 From: Matt Johnson Date: Wed, 29 Apr 2026 20:36:31 -0700 Subject: [PATCH 04/12] handle use of adsorption corrections for training reactions --- rmgpy/rmg/main.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/rmgpy/rmg/main.py b/rmgpy/rmg/main.py index a5854c26cc..4ed8e8024c 100644 --- a/rmgpy/rmg/main.py +++ b/rmgpy/rmg/main.py @@ -400,6 +400,10 @@ def save_input(self, path=None): save_input_file(path, self) def load_database(self): + if "SIDT" in self.adsorption_groups: + Pt111_adsorption = "adsorptionSIDTPt111" + else: + Pt111_adsorption = "adsorptionPt111" self.database = RMGDatabase() self.database.load( path=self.database_directory, @@ -410,7 +414,7 @@ def load_database(self): kinetics_families=self.kinetics_families, kinetics_depositories=self.kinetics_depositories, statmech_libraries = self.statmech_libraries, - adsorption_groups='adsorptionPt111', # use Pt111 groups for training reactions + adsorption_groups=Pt111_adsorption, # use Pt111 groups for training reactions # frequenciesLibraries = self.statmech_libraries, depository=False, # Don't bother loading the depository information, as we don't use it ) From 0feb6acc7cdae6305320be93e18bad9c436f6667 Mon Sep 17 00:00:00 2001 From: Matt Johnson Date: Wed, 29 Apr 2026 20:37:18 -0700 Subject: [PATCH 05/12] handle setup and loading of thermo SIDTs --- rmgpy/data/thermo.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/rmgpy/data/thermo.py b/rmgpy/data/thermo.py index fa93ee2f40..e9c5d70e07 100644 --- a/rmgpy/data/thermo.py +++ b/rmgpy/data/thermo.py @@ -860,6 +860,8 @@ def __init__(self): self.libraries = {} self.surface = {} self.groups = {} + self.sidts = {} + self.sidt_taggings_and_decompositions = {} self.adsorption_groups = "adsorptionPt111" self.library_order = [] self.local_context = { @@ -886,6 +888,7 @@ def __reduce__(self): 'depository': self.depository, 'libraries': self.libraries, 'groups': self.groups, + 'sidts': self.sidts, 'library_order': self.library_order, 'surface' : self.surface, } @@ -898,6 +901,7 @@ def __setstate__(self, d): self.depository = d['depository'] self.libraries = d['libraries'] self.groups = d['groups'] + self.sidts = d['sidts'] self.library_order = d['library_order'] self.surface = d['surface'] @@ -912,6 +916,7 @@ def load(self, path, libraries=None, depository=True, surface=False): self.depository = {} self.load_libraries(os.path.join(path, 'libraries'), libraries) self.load_groups(os.path.join(path, 'groups')) + self.load_sidts(os.path.join(path,'sidt')) if surface: self.load_surface() From 46d16a0d5e517ca69321d102da9ea48f77ab0aa0 Mon Sep 17 00:00:00 2001 From: Matt Johnson Date: Wed, 29 Apr 2026 20:37:40 -0700 Subject: [PATCH 06/12] load adsorption correction SIDTs --- rmgpy/data/thermo.py | 65 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) diff --git a/rmgpy/data/thermo.py b/rmgpy/data/thermo.py index e9c5d70e07..1d9157d865 100644 --- a/rmgpy/data/thermo.py +++ b/rmgpy/data/thermo.py @@ -1012,6 +1012,71 @@ def load_groups(self, path): self.record_ring_generic_nodes() self.record_polycylic_generic_nodes() + + def load_sidts(self, path): + """ + Load thermo SIDTs from the given `path` on disk, where `path` + points to the top-level folder of the thermo database. + """ + logging.info('Loading thermodynamics SIDTs from {0}...'.format(path)) + categories = [ + "Pt111_monodentate_adsorption_corrections", + "Pt111_bidentate_adsorption_corrections", + "Pt111_vdw_adsorption_corrections", + ] + + def add_monodentate_tag(m): + for a in m.atoms: + if a.is_surface_site(): + adatom = list(a.bonds.keys())[0] + adatom.label = "*" + break + + def add_bidentate_tag(m): + structs = [] + sites = m.get_surface_sites() + s1 = sites[0] + s2 = sites[1] + paths = find_shortest_paths(s1,s2) + path = paths[0] + for a in path: + a.label = "*" + + def add_vdw_tag(m): + for a in m.atoms: + if a.is_surface_site(): + a.label = "*" + break + + def multidentate_decomposition(m): + structs = [] + sites = m.get_surface_sites() + for tup in itertools.combinations(sites,2): + s1 = tup[0] + s2 = tup[1] + paths = find_shortest_paths(s1,s2) + path = paths[0] + inds = [] + for p in path: + inds.append(m.atoms.index(p)) + mol = m.copy(deep=True) + for ind in inds: + mol.atoms[ind].label = "*" + structs.append(mol) + + return structs + + self.sidt_taggings_and_decompositions = { + "Pt111_monodentate_adsorption_corrections": add_monodentate_tag, + "Pt111_bidentate_adsorption_corrections": add_bidentate_tag, + "Pt111_vdw_adsorption_corrections": add_vdw_tag, + "Pt111_multidentate_adsorption_corrections": multidentate_decomposition, #special + } + + for category in categories: + nodes = read_nodes(os.path.join(path,category+".json")) + tree = MultiTargetSingleEvalSubgraphIsomorphicDecisionTree(nodes=nodes) + self.sidts[category] = tree def save(self, path): """ From 5c0314ad9397ceaa4f37b549f9afe1eaeb7f553b Mon Sep 17 00:00:00 2001 From: Matt Johnson Date: Wed, 29 Apr 2026 20:38:10 -0700 Subject: [PATCH 07/12] enable use of SIDT adsorption correction predictors --- rmgpy/data/thermo.py | 187 ++++++++++++++++++++++++++----------------- 1 file changed, 115 insertions(+), 72 deletions(-) diff --git a/rmgpy/data/thermo.py b/rmgpy/data/thermo.py index 1d9157d865..e7ed9919f3 100644 --- a/rmgpy/data/thermo.py +++ b/rmgpy/data/thermo.py @@ -1756,81 +1756,124 @@ def _add_adsorption_correction(self, adsorption_thermo, adsorption_groups, molec molecule ([Molecule]): the molecule to apply the thermo correction surface_sites ([list([Atom])]): a list of the surface site atoms in the molecule """ - number_of_surface_sites = len(surface_sites) - - matches = [] - for atom in surface_sites: - labeled_atoms = {'*': atom} - node = adsorption_groups.descend_tree(molecule, labeled_atoms) - if node is None: - # no match, so try the next surface site - continue - while node is not None and node.data is None: - node = node.parent - if node is None: - # no data, so try the next surface site - continue - data = node.data - comment = node.label - loop_count = 0 - while isinstance(data, str): - loop_count += 1 - if loop_count > 100: - raise DatabaseError("Maximum iterations reached while following thermo group data pointers. A circular" - f" reference may exist. Last node was {node.label} pointing to group called {data} in " - f"database {adsorption_groups.label}") - - for entry in adsorption_groups.entries.values(): - if entry.label == data: - data = entry.data - comment = entry.label - break - else: - raise DatabaseError(f"Node {node.label} points to a non-existing group called {data} " - f"in database {adsorption_groups.label}") - data.comment = f'{adsorption_groups.label}({comment})' - group_surface_sites = node.item.get_surface_sites() - if len(group_surface_sites) == number_of_surface_sites: - # all the surface sites are accounted for so add the adsorption group and return - add_thermo_data(adsorption_thermo, data, group_additivity=True) - return True - else: - # we have not found a full match yet, so append and keep looking - matches.append((len(group_surface_sites),data)) - - if len(matches) == 0: - raise DatabaseError(f"Could not find an adsorption correction in {adsorption_groups.label} for {molecule}") - matches.sort(key = lambda x: -x[0]) - # sort the matches by descending number of surface sites - corrections_applied = 0 - # start a counter for the number of corrections applied - for number_of_group_sites, data in matches: - if number_of_surface_sites - number_of_group_sites < 0: - # too many sites in this group, skip to the next one - continue - if not corrections_applied: - # this is the first correction, so add H298, S298, and Cp - add_thermo_data(adsorption_thermo, data, group_additivity=True) - else: - # We have already corrected S298 and Cp, so we only want to correct H298 - adsorption_thermo.H298.value_si += data.H298.value_si - adsorption_thermo.comment += ' + H298({0})'.format(data.comment) - corrections_applied += 1 - number_of_surface_sites -= number_of_group_sites - if number_of_surface_sites <= 0: - # we have corrected for all the sites - if number_of_surface_sites < 0: - adsorption_thermo.comment += ' WARNING(Too many adsorption corrections were added to the thermo!' - adsorption_thermo.comment += 'The H298 is very likely understimated as a result!)' - break - if number_of_surface_sites > 0: - adsorption_thermo.comment += ' WARNING({} surface sites were unaccounted for with adsorption corrections!'.format(number_of_surface_sites) - adsorption_thermo.comment += 'The H298 is very likely overestimated as a result!)' - - return True + if "SIDT" not in self.adsorption_groups: + matches = [] + for atom in surface_sites: + labeled_atoms = {'*': atom} + node = adsorption_groups.descend_tree(molecule, labeled_atoms) + if node is None: + # no match, so try the next surface site + continue + while node is not None and node.data is None: + node = node.parent + if node is None: + # no data, so try the next surface site + continue + data = node.data + comment = node.label + loop_count = 0 + while isinstance(data, str): + loop_count += 1 + if loop_count > 100: + raise DatabaseError("Maximum iterations reached while following thermo group data pointers. A circular" + f" reference may exist. Last node was {node.label} pointing to group called {data} in " + f"database {adsorption_groups.label}") + + for entry in adsorption_groups.entries.values(): + if entry.label == data: + data = entry.data + comment = entry.label + break + else: + raise DatabaseError(f"Node {node.label} points to a non-existing group called {data} " + f"in database {adsorption_groups.label}") + data.comment = f'{adsorption_groups.label}({comment})' + group_surface_sites = node.item.get_surface_sites() + if len(group_surface_sites) == number_of_surface_sites: + # all the surface sites are accounted for so add the adsorption group and return + add_thermo_data(adsorption_thermo, data, group_additivity=True) + return True + else: + # we have not found a full match yet, so append and keep looking + matches.append((len(group_surface_sites),data)) + + if len(matches) == 0: + raise DatabaseError(f"Could not find an adsorption correction in {adsorption_groups.label} for {molecule}") + matches.sort(key = lambda x: -x[0]) + # sort the matches by descending number of surface sites + corrections_applied = 0 + # start a counter for the number of corrections applied + for number_of_group_sites, data in matches: + if number_of_surface_sites - number_of_group_sites < 0: + # too many sites in this group, skip to the next one + continue + if not corrections_applied: + # this is the first correction, so add H298, S298, and Cp + add_thermo_data(adsorption_thermo, data, group_additivity=True) + else: + # We have already corrected S298 and Cp, so we only want to correct H298 + adsorption_thermo.H298.value_si += data.H298.value_si + adsorption_thermo.comment += ' + H298({0})'.format(data.comment) + corrections_applied += 1 + number_of_surface_sites -= number_of_group_sites + if number_of_surface_sites <= 0: + # we have corrected for all the sites + if number_of_surface_sites < 0: + adsorption_thermo.comment += ' WARNING(Too many adsorption corrections were added to the thermo!' + adsorption_thermo.comment += 'The H298 is very likely understimated as a result!)' + break + + if number_of_surface_sites > 0: + adsorption_thermo.comment += ' WARNING({} surface sites were unaccounted for with adsorption corrections!'.format(number_of_surface_sites) + adsorption_thermo.comment += 'The H298 is very likely overestimated as a result!)' + return True + else: + if number_of_surface_sites == 1: + if len(molecule.split()) == 1: + self.sidt_taggings_and_decompositions["Pt111_monodentate_adsorption_corrections"](molecule) + root = self.sidts["Pt111_monodentate_adsorption_corrections"].nodes["Root"] + data, unc, tr = self.sidts["Pt111_monodentate_adsorption_corrections"].evaluate(molecule,trace=True,estimate_uncertainty=True) + else: + self.sidt_taggings_and_decompositions["Pt111_vdw_adsorption_corrections"](molecule) + data, unc, tr = self.sidts["Pt111_vdw_adsorption_corrections"].evaluate(molecule,trace=True,estimate_uncertainty=True) + elif number_of_surface_sites == 2: + self.sidt_taggings_and_decompositions["Pt111_bidentate_adsorption_corrections"](molecule) + data, unc, tr = self.sidts["Pt111_bidentate_adsorption_corrections"].evaluate(molecule,trace=True,estimate_uncertainty=True) + else: # >2 surface sites + data = None + sts = self.sidt_taggings_and_decompositions["Pt111_multidentate_adsorption_corrections"](molecule) + for st in sts: + if data is None: + data = self.sidts["Pt111_bidentate_adsorption_corrections"].evaluate(st) + else: + data += self.sidts["Pt111_bidentate_adsorption_corrections"].evaluate(st) + data /= len(sts) + unc = [275.3710444584647, + 35.06003505844255, + 9.443946756077908, + 12.96897559887163, + 16.813134867837555, + 20.353756412672936, + 26.25846562429047, + 30.888440606215646, + 38.49498107527693] #based on tridentate data + tr = "estimators not designed to handle >2 adsorption sites corrections errors may be very high" + + molecule.clear_labeled_atoms() + + adsorption_thermo.H298.value_si += data[0]*1000.0 #kJ/mol + adsorption_thermo.H298.uncertainty_si += unc[0]*1000.0 + adsorption_thermo.S298.value_si += data[1] #J/(mol*K) + adsorption_thermo.S298.uncertainty_si += unc[1] + adsorption_thermo.Cpdata.value_si += data[2:] #J/(mol*K) + adsorption_thermo.Cpdata.uncertainty_si += unc[2:] + adsorption_thermo.comment += f'{self.adsorption_groups}({tr})' + + return True + def get_thermo_data_from_libraries(self, species, training_set=None): """ Return the thermodynamic parameters for a given :class:`Species` From d9d10fa52f47aba13a8aec3079cf704f261cb284 Mon Sep 17 00:00:00 2001 From: Matt Johnson Date: Thu, 30 Apr 2026 16:49:26 -0700 Subject: [PATCH 08/12] use the sidt_adsorption_corrections RMG-database branch --- .github/workflows/CI.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 3f5944a5b4..2507bf35ef 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -37,7 +37,7 @@ concurrency: env: # if running on RMG-Py but requiring changes on an un-merged branch of RMG-database, replace # main with the name of the branch - RMG_DATABASE_BRANCH: main + RMG_DATABASE_BRANCH: sidt_adsorption_corrections # RMS branch to use for ReactionMechanismSimulator installation RMS_BRANCH: for_rmg # RMS mode used for install_rms.sh From 4813618d41e95c17be0bb342bfc59c46486f8da6 Mon Sep 17 00:00:00 2001 From: Matt Johnson Date: Thu, 30 Apr 2026 17:57:15 -0700 Subject: [PATCH 09/12] load only files that are present this avoids issues with the testing database --- rmgpy/data/thermo.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/rmgpy/data/thermo.py b/rmgpy/data/thermo.py index e7ed9919f3..0b0833b466 100644 --- a/rmgpy/data/thermo.py +++ b/rmgpy/data/thermo.py @@ -1074,9 +1074,10 @@ def multidentate_decomposition(m): } for category in categories: - nodes = read_nodes(os.path.join(path,category+".json")) - tree = MultiTargetSingleEvalSubgraphIsomorphicDecisionTree(nodes=nodes) - self.sidts[category] = tree + if os.path.exists(os.path.join(path,category+".json")): + nodes = read_nodes(os.path.join(path,category+".json")) + tree = MultiTargetSingleEvalSubgraphIsomorphicDecisionTree(nodes=nodes) + self.sidts[category] = tree def save(self, path): """ From c6b19c5b77b4123a654dc9ae1a2ae16654530f70 Mon Sep 17 00:00:00 2001 From: Matt Johnson Date: Mon, 4 May 2026 20:12:14 -0700 Subject: [PATCH 10/12] add database tests that the SIDTs load properly and work --- test/database/databaseTest.py | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/test/database/databaseTest.py b/test/database/databaseTest.py index 3af53f5ac3..ff3e83f70e 100644 --- a/test/database/databaseTest.py +++ b/test/database/databaseTest.py @@ -46,7 +46,7 @@ from rmgpy.data.base import LogicOr from rmgpy.data.rmg import RMGDatabase from rmgpy.exceptions import ImplicitBenzeneError, UnexpectedChargeError -from rmgpy.molecule import Group +from rmgpy.molecule import Group, Molecule from rmgpy.molecule.atomtype import ATOMTYPES from rmgpy.molecule.pathfinder import find_shortest_path from rmgpy.quantity import ScalarQuantity @@ -241,7 +241,27 @@ def test_thermo(self): assert self.check_surface_thermo_libraries_have_surface_attributes( library_name, library ), "Thermo surface libraries {0}: Entry has metal attributes?".format(library_name) - + + with check: + assert len(self.database.thermo.sidts) > 0, "SIDT thermochemistry models did not load?" + + for name,tree in self.database.thermo.sidts.items(): + tagging = self.database.thermo.sidt_taggings_and_decompositions[name] + if "monodentate" in name: + tmol = Molecule(smiles="C*") + tagging(tmol) + tree.evaluate(tmol) + elif "bidentate" in name: + tmol = Molecule(smiles="*CC*") + tagging(tmol) + tree.evaluate(tmol) + elif "vdw" in name: + tmol = Molecule(smiles="C.*") + tagging(tmol) + tree.evaluate(tmol) + else: + pass + def test_solvation(self): for group_name, group in self.database.solvation.groups.items(): with check: From 3b2836428b26fbcbd1beba72bf28b6d265ef5f39 Mon Sep 17 00:00:00 2001 From: Matt Johnson Date: Wed, 6 May 2026 12:53:09 -0700 Subject: [PATCH 11/12] properly parse metal out from SIDT adsorption tree specification --- rmgpy/data/thermo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rmgpy/data/thermo.py b/rmgpy/data/thermo.py index 0b0833b466..587862fc99 100644 --- a/rmgpy/data/thermo.py +++ b/rmgpy/data/thermo.py @@ -1367,7 +1367,7 @@ def get_thermo_data(self, species, metal_to_scale_to=None, training_set=None): if species.contains_surface_site(): try: thermo0 = self.get_thermo_data_for_surface_species(species) - metal_to_scale_from = self.adsorption_groups.split('adsorption')[-1] + metal_to_scale_from = self.adsorption_groups.split('adsorption')[-1].replace("SIDT","",1) if metal_to_scale_from != metal_to_scale_to: thermo0 = self.correct_binding_energy(thermo0, species, metal_to_scale_from=metal_to_scale_from, metal_to_scale_to=metal_to_scale_to) # group adsorption values come from Pt111 return thermo0 From c12c2525942e5a14385ca8e0c6a4959e324fb688 Mon Sep 17 00:00:00 2001 From: Matt Johnson Date: Wed, 6 May 2026 12:54:33 -0700 Subject: [PATCH 12/12] handle missing tridentate substructures --- rmgpy/data/thermo.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/rmgpy/data/thermo.py b/rmgpy/data/thermo.py index 587862fc99..c84acd16f1 100644 --- a/rmgpy/data/thermo.py +++ b/rmgpy/data/thermo.py @@ -1846,12 +1846,21 @@ def _add_adsorption_correction(self, adsorption_thermo, adsorption_groups, molec else: # >2 surface sites data = None sts = self.sidt_taggings_and_decompositions["Pt111_multidentate_adsorption_corrections"](molecule) + Nsts = 0 for st in sts: if data is None: - data = self.sidts["Pt111_bidentate_adsorption_corrections"].evaluate(st) + dE = self.sidts["Pt111_bidentate_adsorption_corrections"].evaluate(st) else: - data += self.sidts["Pt111_bidentate_adsorption_corrections"].evaluate(st) - data /= len(sts) + dE = self.sidts["Pt111_bidentate_adsorption_corrections"].evaluate(st) + + if dE is not None: + if data is None: + data = dE + else: + data += dE + Nsts += 1 + + data /= Nsts unc = [275.3710444584647, 35.06003505844255, 9.443946756077908,