diff --git a/.conda/meta.yaml b/.conda/meta.yaml index c9a456c713..9961ed1664 100644 --- a/.conda/meta.yaml +++ b/.conda/meta.yaml @@ -63,6 +63,7 @@ requirements: - conda-forge::gprof2dot - conda-forge::numdifftools - conda-forge::quantities !=0.16.0,!=0.16.1 + - rmg::pysidt-rmg >=1.2 - rmg::pydas >=1.0.3 - rmg::pydqed >=1.0.3 - rmg::symmetry diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 3f5944a5b4..2507bf35ef 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -37,7 +37,7 @@ concurrency: env: # if running on RMG-Py but requiring changes on an un-merged branch of RMG-database, replace # main with the name of the branch - RMG_DATABASE_BRANCH: main + RMG_DATABASE_BRANCH: sidt_adsorption_corrections # RMS branch to use for ReactionMechanismSimulator installation RMS_BRANCH: for_rmg # RMS mode used for install_rms.sh diff --git a/environment.yml b/environment.yml index 39a5da8a66..39f4666f0f 100644 --- a/environment.yml +++ b/environment.yml @@ -47,6 +47,7 @@ dependencies: - conda-forge::cclib >=1.6.3,<1.9 - conda-forge::openbabel >= 3 - conda-forge::rdkit >=2022.09.1 + - rmg::pysidt-rmg >=1.2 # Python tools - conda-forge::python >=3.9,<3.12 # leave as GEQ so that GitHub actions can add EQ w/o breaking (contradictory deps) diff --git a/rmgpy/data/thermo.py b/rmgpy/data/thermo.py index 2ab242de81..c84acd16f1 100644 --- a/rmgpy/data/thermo.py +++ b/rmgpy/data/thermo.py @@ -40,6 +40,9 @@ import numpy as np +from pysidt import read_nodes, MultiTargetSingleEvalSubgraphIsomorphicDecisionTree +from pysidt.utils import find_shortest_paths + import rmgpy.constants as constants import rmgpy.molecule import rmgpy.quantity @@ -857,6 +860,8 @@ def __init__(self): self.libraries = {} self.surface = {} self.groups = {} + self.sidts = {} + self.sidt_taggings_and_decompositions = {} self.adsorption_groups = "adsorptionPt111" self.library_order = [] self.local_context = { @@ -883,6 +888,7 @@ def __reduce__(self): 'depository': self.depository, 'libraries': self.libraries, 'groups': self.groups, + 'sidts': self.sidts, 'library_order': self.library_order, 'surface' : self.surface, } @@ -895,6 +901,7 @@ def __setstate__(self, d): self.depository = d['depository'] self.libraries = d['libraries'] self.groups = d['groups'] + self.sidts = d['sidts'] self.library_order = d['library_order'] self.surface = d['surface'] @@ -909,6 +916,7 @@ def load(self, path, libraries=None, depository=True, surface=False): self.depository = {} self.load_libraries(os.path.join(path, 'libraries'), libraries) self.load_groups(os.path.join(path, 'groups')) + self.load_sidts(os.path.join(path,'sidt')) if surface: self.load_surface() @@ -1004,6 +1012,72 @@ def load_groups(self, path): self.record_ring_generic_nodes() self.record_polycylic_generic_nodes() + + def load_sidts(self, path): + """ + Load thermo SIDTs from the given `path` on disk, where `path` + points to the top-level folder of the thermo database. + """ + logging.info('Loading thermodynamics SIDTs from {0}...'.format(path)) + categories = [ + "Pt111_monodentate_adsorption_corrections", + "Pt111_bidentate_adsorption_corrections", + "Pt111_vdw_adsorption_corrections", + ] + + def add_monodentate_tag(m): + for a in m.atoms: + if a.is_surface_site(): + adatom = list(a.bonds.keys())[0] + adatom.label = "*" + break + + def add_bidentate_tag(m): + structs = [] + sites = m.get_surface_sites() + s1 = sites[0] + s2 = sites[1] + paths = find_shortest_paths(s1,s2) + path = paths[0] + for a in path: + a.label = "*" + + def add_vdw_tag(m): + for a in m.atoms: + if a.is_surface_site(): + a.label = "*" + break + + def multidentate_decomposition(m): + structs = [] + sites = m.get_surface_sites() + for tup in itertools.combinations(sites,2): + s1 = tup[0] + s2 = tup[1] + paths = find_shortest_paths(s1,s2) + path = paths[0] + inds = [] + for p in path: + inds.append(m.atoms.index(p)) + mol = m.copy(deep=True) + for ind in inds: + mol.atoms[ind].label = "*" + structs.append(mol) + + return structs + + self.sidt_taggings_and_decompositions = { + "Pt111_monodentate_adsorption_corrections": add_monodentate_tag, + "Pt111_bidentate_adsorption_corrections": add_bidentate_tag, + "Pt111_vdw_adsorption_corrections": add_vdw_tag, + "Pt111_multidentate_adsorption_corrections": multidentate_decomposition, #special + } + + for category in categories: + if os.path.exists(os.path.join(path,category+".json")): + nodes = read_nodes(os.path.join(path,category+".json")) + tree = MultiTargetSingleEvalSubgraphIsomorphicDecisionTree(nodes=nodes) + self.sidts[category] = tree def save(self, path): """ @@ -1293,7 +1367,7 @@ def get_thermo_data(self, species, metal_to_scale_to=None, training_set=None): if species.contains_surface_site(): try: thermo0 = self.get_thermo_data_for_surface_species(species) - metal_to_scale_from = self.adsorption_groups.split('adsorption')[-1] + metal_to_scale_from = self.adsorption_groups.split('adsorption')[-1].replace("SIDT","",1) if metal_to_scale_from != metal_to_scale_to: thermo0 = self.correct_binding_energy(thermo0, species, metal_to_scale_from=metal_to_scale_from, metal_to_scale_to=metal_to_scale_to) # group adsorption values come from Pt111 return thermo0 @@ -1683,81 +1757,133 @@ def _add_adsorption_correction(self, adsorption_thermo, adsorption_groups, molec molecule ([Molecule]): the molecule to apply the thermo correction surface_sites ([list([Atom])]): a list of the surface site atoms in the molecule """ - number_of_surface_sites = len(surface_sites) - - matches = [] - for atom in surface_sites: - labeled_atoms = {'*': atom} - node = adsorption_groups.descend_tree(molecule, labeled_atoms) - if node is None: - # no match, so try the next surface site - continue - while node is not None and node.data is None: - node = node.parent - if node is None: - # no data, so try the next surface site - continue - data = node.data - comment = node.label - loop_count = 0 - while isinstance(data, str): - loop_count += 1 - if loop_count > 100: - raise DatabaseError("Maximum iterations reached while following thermo group data pointers. A circular" - f" reference may exist. Last node was {node.label} pointing to group called {data} in " - f"database {adsorption_groups.label}") - - for entry in adsorption_groups.entries.values(): - if entry.label == data: - data = entry.data - comment = entry.label - break - else: - raise DatabaseError(f"Node {node.label} points to a non-existing group called {data} " - f"in database {adsorption_groups.label}") - data.comment = f'{adsorption_groups.label}({comment})' - group_surface_sites = node.item.get_surface_sites() - if len(group_surface_sites) == number_of_surface_sites: - # all the surface sites are accounted for so add the adsorption group and return - add_thermo_data(adsorption_thermo, data, group_additivity=True) - return True - else: - # we have not found a full match yet, so append and keep looking - matches.append((len(group_surface_sites),data)) - - if len(matches) == 0: - raise DatabaseError(f"Could not find an adsorption correction in {adsorption_groups.label} for {molecule}") - matches.sort(key = lambda x: -x[0]) - # sort the matches by descending number of surface sites - corrections_applied = 0 - # start a counter for the number of corrections applied - for number_of_group_sites, data in matches: - if number_of_surface_sites - number_of_group_sites < 0: - # too many sites in this group, skip to the next one - continue - if not corrections_applied: - # this is the first correction, so add H298, S298, and Cp - add_thermo_data(adsorption_thermo, data, group_additivity=True) - else: - # We have already corrected S298 and Cp, so we only want to correct H298 - adsorption_thermo.H298.value_si += data.H298.value_si - adsorption_thermo.comment += ' + H298({0})'.format(data.comment) - corrections_applied += 1 - number_of_surface_sites -= number_of_group_sites - if number_of_surface_sites <= 0: - # we have corrected for all the sites - if number_of_surface_sites < 0: - adsorption_thermo.comment += ' WARNING(Too many adsorption corrections were added to the thermo!' - adsorption_thermo.comment += 'The H298 is very likely understimated as a result!)' - break - if number_of_surface_sites > 0: - adsorption_thermo.comment += ' WARNING({} surface sites were unaccounted for with adsorption corrections!'.format(number_of_surface_sites) - adsorption_thermo.comment += 'The H298 is very likely overestimated as a result!)' - - return True + if "SIDT" not in self.adsorption_groups: + matches = [] + for atom in surface_sites: + labeled_atoms = {'*': atom} + node = adsorption_groups.descend_tree(molecule, labeled_atoms) + if node is None: + # no match, so try the next surface site + continue + while node is not None and node.data is None: + node = node.parent + if node is None: + # no data, so try the next surface site + continue + data = node.data + comment = node.label + loop_count = 0 + while isinstance(data, str): + loop_count += 1 + if loop_count > 100: + raise DatabaseError("Maximum iterations reached while following thermo group data pointers. A circular" + f" reference may exist. Last node was {node.label} pointing to group called {data} in " + f"database {adsorption_groups.label}") + + for entry in adsorption_groups.entries.values(): + if entry.label == data: + data = entry.data + comment = entry.label + break + else: + raise DatabaseError(f"Node {node.label} points to a non-existing group called {data} " + f"in database {adsorption_groups.label}") + data.comment = f'{adsorption_groups.label}({comment})' + group_surface_sites = node.item.get_surface_sites() + if len(group_surface_sites) == number_of_surface_sites: + # all the surface sites are accounted for so add the adsorption group and return + add_thermo_data(adsorption_thermo, data, group_additivity=True) + return True + else: + # we have not found a full match yet, so append and keep looking + matches.append((len(group_surface_sites),data)) + + if len(matches) == 0: + raise DatabaseError(f"Could not find an adsorption correction in {adsorption_groups.label} for {molecule}") + matches.sort(key = lambda x: -x[0]) + # sort the matches by descending number of surface sites + corrections_applied = 0 + # start a counter for the number of corrections applied + for number_of_group_sites, data in matches: + if number_of_surface_sites - number_of_group_sites < 0: + # too many sites in this group, skip to the next one + continue + if not corrections_applied: + # this is the first correction, so add H298, S298, and Cp + add_thermo_data(adsorption_thermo, data, group_additivity=True) + else: + # We have already corrected S298 and Cp, so we only want to correct H298 + adsorption_thermo.H298.value_si += data.H298.value_si + adsorption_thermo.comment += ' + H298({0})'.format(data.comment) + corrections_applied += 1 + number_of_surface_sites -= number_of_group_sites + if number_of_surface_sites <= 0: + # we have corrected for all the sites + if number_of_surface_sites < 0: + adsorption_thermo.comment += ' WARNING(Too many adsorption corrections were added to the thermo!' + adsorption_thermo.comment += 'The H298 is very likely understimated as a result!)' + break + + if number_of_surface_sites > 0: + adsorption_thermo.comment += ' WARNING({} surface sites were unaccounted for with adsorption corrections!'.format(number_of_surface_sites) + adsorption_thermo.comment += 'The H298 is very likely overestimated as a result!)' + return True + else: + if number_of_surface_sites == 1: + if len(molecule.split()) == 1: + self.sidt_taggings_and_decompositions["Pt111_monodentate_adsorption_corrections"](molecule) + root = self.sidts["Pt111_monodentate_adsorption_corrections"].nodes["Root"] + data, unc, tr = self.sidts["Pt111_monodentate_adsorption_corrections"].evaluate(molecule,trace=True,estimate_uncertainty=True) + else: + self.sidt_taggings_and_decompositions["Pt111_vdw_adsorption_corrections"](molecule) + data, unc, tr = self.sidts["Pt111_vdw_adsorption_corrections"].evaluate(molecule,trace=True,estimate_uncertainty=True) + elif number_of_surface_sites == 2: + self.sidt_taggings_and_decompositions["Pt111_bidentate_adsorption_corrections"](molecule) + data, unc, tr = self.sidts["Pt111_bidentate_adsorption_corrections"].evaluate(molecule,trace=True,estimate_uncertainty=True) + else: # >2 surface sites + data = None + sts = self.sidt_taggings_and_decompositions["Pt111_multidentate_adsorption_corrections"](molecule) + Nsts = 0 + for st in sts: + if data is None: + dE = self.sidts["Pt111_bidentate_adsorption_corrections"].evaluate(st) + else: + dE = self.sidts["Pt111_bidentate_adsorption_corrections"].evaluate(st) + + if dE is not None: + if data is None: + data = dE + else: + data += dE + Nsts += 1 + + data /= Nsts + unc = [275.3710444584647, + 35.06003505844255, + 9.443946756077908, + 12.96897559887163, + 16.813134867837555, + 20.353756412672936, + 26.25846562429047, + 30.888440606215646, + 38.49498107527693] #based on tridentate data + tr = "estimators not designed to handle >2 adsorption sites corrections errors may be very high" + + molecule.clear_labeled_atoms() + + adsorption_thermo.H298.value_si += data[0]*1000.0 #kJ/mol + adsorption_thermo.H298.uncertainty_si += unc[0]*1000.0 + adsorption_thermo.S298.value_si += data[1] #J/(mol*K) + adsorption_thermo.S298.uncertainty_si += unc[1] + adsorption_thermo.Cpdata.value_si += data[2:] #J/(mol*K) + adsorption_thermo.Cpdata.uncertainty_si += unc[2:] + adsorption_thermo.comment += f'{self.adsorption_groups}({tr})' + + return True + def get_thermo_data_from_libraries(self, species, training_set=None): """ Return the thermodynamic parameters for a given :class:`Species` diff --git a/rmgpy/rmg/main.py b/rmgpy/rmg/main.py index a5854c26cc..4ed8e8024c 100644 --- a/rmgpy/rmg/main.py +++ b/rmgpy/rmg/main.py @@ -400,6 +400,10 @@ def save_input(self, path=None): save_input_file(path, self) def load_database(self): + if "SIDT" in self.adsorption_groups: + Pt111_adsorption = "adsorptionSIDTPt111" + else: + Pt111_adsorption = "adsorptionPt111" self.database = RMGDatabase() self.database.load( path=self.database_directory, @@ -410,7 +414,7 @@ def load_database(self): kinetics_families=self.kinetics_families, kinetics_depositories=self.kinetics_depositories, statmech_libraries = self.statmech_libraries, - adsorption_groups='adsorptionPt111', # use Pt111 groups for training reactions + adsorption_groups=Pt111_adsorption, # use Pt111 groups for training reactions # frequenciesLibraries = self.statmech_libraries, depository=False, # Don't bother loading the depository information, as we don't use it ) diff --git a/test/database/databaseTest.py b/test/database/databaseTest.py index 3af53f5ac3..ff3e83f70e 100644 --- a/test/database/databaseTest.py +++ b/test/database/databaseTest.py @@ -46,7 +46,7 @@ from rmgpy.data.base import LogicOr from rmgpy.data.rmg import RMGDatabase from rmgpy.exceptions import ImplicitBenzeneError, UnexpectedChargeError -from rmgpy.molecule import Group +from rmgpy.molecule import Group, Molecule from rmgpy.molecule.atomtype import ATOMTYPES from rmgpy.molecule.pathfinder import find_shortest_path from rmgpy.quantity import ScalarQuantity @@ -241,7 +241,27 @@ def test_thermo(self): assert self.check_surface_thermo_libraries_have_surface_attributes( library_name, library ), "Thermo surface libraries {0}: Entry has metal attributes?".format(library_name) - + + with check: + assert len(self.database.thermo.sidts) > 0, "SIDT thermochemistry models did not load?" + + for name,tree in self.database.thermo.sidts.items(): + tagging = self.database.thermo.sidt_taggings_and_decompositions[name] + if "monodentate" in name: + tmol = Molecule(smiles="C*") + tagging(tmol) + tree.evaluate(tmol) + elif "bidentate" in name: + tmol = Molecule(smiles="*CC*") + tagging(tmol) + tree.evaluate(tmol) + elif "vdw" in name: + tmol = Molecule(smiles="C.*") + tagging(tmol) + tree.evaluate(tmol) + else: + pass + def test_solvation(self): for group_name, group in self.database.solvation.groups.items(): with check: