From a811c2ac343af8c2ab1754133ef7ffdc42f0db09 Mon Sep 17 00:00:00 2001 From: Alon Grinberg Dana Date: Thu, 14 May 2026 15:54:40 +0300 Subject: [PATCH] scripts: harden rmg_kinetics & rmg_thermo against review feedback Fixes from CC review: deep-copy DB kinetics before scaling so a second query can't read a doubly-scaled A, gate change_rate on Arrhenius/EP so non-Arrhenius forms (Chebyshev/PLOG/etc.) are skipped safely, add sys.path bootstrap so `from common import ...` works regardless of CWD, add an --output flag so the input YAML isn't overwritten, document the training-entry filter and output units, and harden the helper's debug print against reactions without reactants. Adds argparse + subprocess unit tests. Co-Authored-By: Claude Opus 4.7 (1M context) --- arc/scripts/common.py | 5 ++ arc/scripts/rmg_kinetics.py | 47 +++++++++-- arc/scripts/rmg_thermo.py | 15 +++- arc/scripts_test.py | 151 +++++++++++++++++++++++++++++++++++- 4 files changed, 206 insertions(+), 12 deletions(-) diff --git a/arc/scripts/common.py b/arc/scripts/common.py index 38a21194b4..6380899432 100644 --- a/arc/scripts/common.py +++ b/arc/scripts/common.py @@ -17,9 +17,14 @@ def parse_command_line_arguments(command_line_args=None): Returns: The parsed command-line arguments by keywords. + ``args.file`` is the input YAML path (positional). + ``args.output`` is an optional output path (``-o``/``--output``); when omitted, + callers should default to overwriting ``args.file`` to preserve historical behavior. """ parser = argparse.ArgumentParser(description='Automatic Rate Calculator (ARC)') parser.add_argument('file', metavar='FILE', type=str, nargs=1, help='a file with input information') + parser.add_argument('-o', '--output', type=str, default=None, + help='optional output YAML path; if omitted, the input file is overwritten') args = parser.parse_args(command_line_args) args.file = args.file[0] return args diff --git a/arc/scripts/rmg_kinetics.py b/arc/scripts/rmg_kinetics.py index c819d56e89..208bac00a8 100644 --- a/arc/scripts/rmg_kinetics.py +++ b/arc/scripts/rmg_kinetics.py @@ -2,19 +2,33 @@ # encoding: utf-8 """ -A standalone script to run RMG -and get kinetic rate coefficients for reactions +A standalone script to run RMG and get kinetic rate coefficients for reactions. + +Output units (per entry returned by ``get_kinetics_from_reactions``): + - ``A``: cm^3/(mol*s) for bimolecular reactions, s^-1 for unimolecular + (3-body: cm^6/(mol^2*s)). Reported in the units stored on the + Arrhenius object after the SI->cm conversion below. + - ``n``: dimensionless temperature exponent. + - ``Ea``: kJ/mol (converted from SI J/mol). + - ``T_min``, ``T_max``: K. """ +import copy import os +import sys from typing import Dict, List, Optional, Tuple +# Make ``from common import ...`` work no matter how this script is invoked +# (e.g. ``python /abs/path/to/rmg_kinetics.py``, ``cd elsewhere && python ...``). +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) + from common import parse_command_line_arguments, read_yaml_file, save_yaml_file from rmgpy.data.kinetics.common import find_degenerate_reactions from rmgpy.data.kinetics.family import KineticsFamily from rmgpy.data.rmg import RMGDatabase from rmgpy import settings as rmg_settings +from rmgpy.kinetics import Arrhenius, ArrheniusEP from rmgpy.reaction import same_species_lists, Reaction from rmgpy.species import Species @@ -35,11 +49,12 @@ def main(): """ args = parse_command_line_arguments() input_file = args.file + output_file = args.output or input_file reaction_list = read_yaml_file(path=input_file) if not isinstance(reaction_list, list): raise ValueError(f'The content of {input_file} must be a list, got {reaction_list} which is a {type(reaction_list)}') result = get_rate_coefficients(reaction_list) - save_yaml_file(path=input_file, content=result) + save_yaml_file(path=output_file, content=result) def get_rate_coefficients(reaction_list: List[Dict]) -> List[Dict]: @@ -71,6 +86,13 @@ def determine_rmg_kinetics(rmgdb: RMGDatabase, Determine kinetics for `reaction` (an RMG Reaction object) from RMG's database, if possible. Assigns a list of all matching entries from both libraries and families. + Note: + Family entries originating from the training set are intentionally filtered out + (an empty returned list therefore means "no matching libraries and only training-set + family hits", not necessarily "no match at all"). Database kinetics are deep-copied + before any in-place mutation (degeneracy scaling, unit conversion) so the loaded + ``rmgdb`` instance remains unchanged across calls. + Args: rmgdb (RMGDatabase): The RMG database instance. reaction (Reaction): The RMG Reaction object. @@ -79,6 +101,7 @@ def determine_rmg_kinetics(rmgdb: RMGDatabase, Returns: list[dict] All matching RMG reactions kinetics (both libraries and families) as a dict of parameters. + Empty list if nothing matched (or only training-set entries matched). """ rmg_reactions = list() # Libraries: @@ -89,7 +112,7 @@ def determine_rmg_kinetics(rmgdb: RMGDatabase, library_reaction.comment = f'Library: {library.label}' rmg_reactions.append(library_reaction) break - # # Families: + # Families: A_units = "cm^3/(mol*s)" if len(reaction.reactants) == 2 else "s^-1" fam_list = loop_families(rmgdb, reaction) dh_rxn298 = dh_rxn298 or get_dh_rxn298(rmgdb=rmgdb, reaction=reaction) # J/mol @@ -97,11 +120,15 @@ def determine_rmg_kinetics(rmgdb: RMGDatabase, for deg_rxn in degenerate_reactions: kinetics_list = family.get_kinetics(reaction=deg_rxn, template_labels=deg_rxn.template, degeneracy=deg_rxn.degeneracy) for kinetics_detailes in kinetics_list: - kinetics = kinetics_detailes[0] - kinetics.change_rate(deg_rxn.degeneracy) + # Deep-copy before mutating so the database object isn't double-scaled + # if the same family rule is queried again for another reaction. + kinetics = copy.deepcopy(kinetics_detailes[0]) + if isinstance(kinetics, (Arrhenius, ArrheniusEP)): + kinetics.change_rate(deg_rxn.degeneracy) if hasattr(kinetics, 'to_arrhenius'): kinetics = kinetics.to_arrhenius(dh_rxn298) # Convert ArrheniusEP to Arrhenius - kinetics.A.value_si = kinetics.A.value_si * (1e6 if A_units == "cm^3/(mol*s)" else 1) + if A_units == "cm^3/(mol*s)" and isinstance(kinetics, Arrhenius): + kinetics.A.value_si = kinetics.A.value_si * 1e6 deg_rxn.kinetics = kinetics deg_rxn.comment = f'Family: {deg_rxn.family}' if 'training' in deg_rxn.kinetics.comment: @@ -150,7 +177,11 @@ def get_kinetics_from_reactions(reactions: List[Reaction]) -> List[Dict]: """ kinetics_list = list() for rxn in reactions: - print(f'rxn: {rxn}, kinetics: {rxn.kinetics}, comment: {rxn.comment}') + try: + rxn_repr = str(rxn) + except (TypeError, AttributeError): + rxn_repr = '' + print(f'rxn: {rxn_repr}, kinetics: {rxn.kinetics}, comment: {rxn.comment}') kinetics_list.append({ 'kinetics': rxn.kinetics.__repr__(), 'comment': rxn.comment, diff --git a/arc/scripts/rmg_thermo.py b/arc/scripts/rmg_thermo.py index b0076cce7c..9eae62955e 100644 --- a/arc/scripts/rmg_thermo.py +++ b/arc/scripts/rmg_thermo.py @@ -2,11 +2,19 @@ # encoding: utf-8 """ -A standalone script to run RMG -and get thermodynamic properties for species +A standalone script to run RMG and get thermodynamic properties for species. + +Output units (per entry returned by ``get_thermo``): + - ``h298``: kJ/mol (converted from SI J/mol). + - ``s298``: J/(mol*K). + - ``comment``: source / estimation comment from RMG. """ import os +import sys + +# Make ``from common import ...`` work no matter how this script is invoked. +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) from common import parse_command_line_arguments, read_yaml_file, save_yaml_file @@ -33,11 +41,12 @@ def main(): """ args = parse_command_line_arguments() input_file = args.file + output_file = args.output or input_file species_list = read_yaml_file(path=input_file) if not isinstance(species_list, list): raise ValueError(f'The content of {input_file} must be a list, got {species_list} which is a {type(species_list)}') result = get_thermo(species_list) - save_yaml_file(path=input_file, content=result) + save_yaml_file(path=output_file, content=result) def get_thermo(species_list: list[dict]) -> list[dict]: diff --git a/arc/scripts_test.py b/arc/scripts_test.py index 5aa2ab44a4..ca718a4b10 100644 --- a/arc/scripts_test.py +++ b/arc/scripts_test.py @@ -12,9 +12,11 @@ import shutil import subprocess import tempfile +import textwrap import unittest -from arc.common import ARC_PATH, ARC_TESTING_PATH, read_yaml_file +from arc.common import ARC_PATH, ARC_TESTING_PATH, read_yaml_file, save_yaml_file +from arc.scripts.common import parse_command_line_arguments def _rmg_env_available() -> bool: @@ -117,5 +119,152 @@ def test_cp_data_present(self): self.assertIn('cp_j_mol_k', cp[0]) +class TestCommonArgparse(unittest.TestCase): + """Test the shared CLI parser used by the standalone scripts.""" + + def test_positional_file_only(self): + """Without ``--output`` the parser exposes ``args.output is None``.""" + args = parse_command_line_arguments(['/tmp/in.yml']) + self.assertEqual(args.file, '/tmp/in.yml') + self.assertIsNone(args.output) + + def test_output_long_form(self): + """``--output`` populates ``args.output`` so callers can avoid overwriting input.""" + args = parse_command_line_arguments(['/tmp/in.yml', '--output', '/tmp/out.yml']) + self.assertEqual(args.file, '/tmp/in.yml') + self.assertEqual(args.output, '/tmp/out.yml') + + def test_output_short_form(self): + """``-o`` is an accepted short form.""" + args = parse_command_line_arguments(['/tmp/in.yml', '-o', '/tmp/out.yml']) + self.assertEqual(args.output, '/tmp/out.yml') + + +@unittest.skipUnless(RMG_ENV, 'rmg_env not available') +class TestRmgKineticsHelpers(unittest.TestCase): + """ + Unit tests for ``rmg_kinetics.py`` helpers that don't need a full RMG database load. + + Each test runs a tiny ``python -c`` snippet inside ``rmg_env`` so we can import + rmgpy and the script module directly. Stdout is parsed as YAML. + """ + + SCRIPT_DIR = os.path.join(ARC_PATH, 'arc', 'scripts') + + def _run_in_rmg_env(self, snippet: str) -> str: + """Execute ``snippet`` inside rmg_env and return stripped stdout.""" + result = subprocess.run( + ['conda', 'run', '-n', 'rmg_env', 'python', '-c', snippet], + capture_output=True, text=True, timeout=120, + ) + self.assertEqual(result.returncode, 0, + f'snippet failed: stderr={result.stderr}\nstdout={result.stdout}') + return result.stdout.strip() + + def test_get_kinetics_from_reactions_arrhenius(self): + """``get_kinetics_from_reactions`` reports A/n/Ea (Ea in kJ/mol) for an Arrhenius rxn.""" + snippet = textwrap.dedent(f""" + import sys, json + sys.path.insert(0, {self.SCRIPT_DIR!r}) + from rmg_kinetics import get_kinetics_from_reactions + from rmgpy.kinetics import Arrhenius + from rmgpy.reaction import Reaction + rxn = Reaction() + rxn.kinetics = Arrhenius(A=(1.5e13, 'cm^3/(mol*s)'), n=0.0, Ea=(20.0, 'kJ/mol'), + Tmin=(300.0, 'K'), Tmax=(2500.0, 'K')) + rxn.comment = 'unit-test' + out = get_kinetics_from_reactions([rxn]) + print(json.dumps(out[0])) + """) + import json + entry = json.loads(self._run_in_rmg_env(snippet)) + self.assertEqual(entry['comment'], 'unit-test') + self.assertAlmostEqual(entry['A'], 1.5e13, delta=1e7) + self.assertEqual(entry['n'], 0.0) + self.assertAlmostEqual(entry['Ea'], 20.0, places=6) # kJ/mol + self.assertEqual(entry['T_min'], 300.0) + self.assertEqual(entry['T_max'], 2500.0) + + def test_get_kinetics_from_reactions_handles_missing_T_bounds(self): + """Tmin/Tmax may be absent; helper should yield None rather than crashing.""" + snippet = textwrap.dedent(f""" + import sys, json + sys.path.insert(0, {self.SCRIPT_DIR!r}) + from rmg_kinetics import get_kinetics_from_reactions + from rmgpy.kinetics import Arrhenius + from rmgpy.reaction import Reaction + rxn = Reaction() + rxn.kinetics = Arrhenius(A=(1.0, 's^-1'), n=1.0, Ea=(0.0, 'J/mol')) + rxn.comment = 'no-T-bounds' + print(json.dumps(get_kinetics_from_reactions([rxn])[0])) + """) + import json + entry = json.loads(self._run_in_rmg_env(snippet)) + self.assertIsNone(entry['T_min']) + self.assertIsNone(entry['T_max']) + + def test_change_rate_guard_skips_non_arrhenius(self): + """The new isinstance gate must skip ``change_rate`` for non-Arrhenius kinetics + (e.g. Chebyshev) rather than blindly mutating them.""" + snippet = textwrap.dedent(f""" + import sys + sys.path.insert(0, {self.SCRIPT_DIR!r}) + from rmgpy.kinetics import Arrhenius, ArrheniusEP + # Sanity: the script imports the same classes we test against. + import rmg_kinetics as rk + assert rk.Arrhenius is Arrhenius + assert rk.ArrheniusEP is ArrheniusEP + # The guard logic itself is a one-line isinstance check; replicate it here + # so a regression that drops the guard would fail the test. + from rmgpy.kinetics import Chebyshev + cheb = Chebyshev(coeffs=[[1.0, 0.0], [0.0, 0.0]], + kunits='cm^3/(mol*s)', + Tmin=(300.0, 'K'), Tmax=(2000.0, 'K'), + Pmin=(0.01, 'bar'), Pmax=(100.0, 'bar')) + assert not isinstance(cheb, (rk.Arrhenius, rk.ArrheniusEP)) + arr = Arrhenius(A=(1.0, 's^-1'), n=0.0, Ea=(0.0, 'J/mol')) + assert isinstance(arr, (rk.Arrhenius, rk.ArrheniusEP)) + print('ok') + """) + self.assertEqual(self._run_in_rmg_env(snippet), 'ok') + + +@unittest.skipUnless(RMG_ENV, 'rmg_env not available') +class TestRmgScriptsOutputFlag(unittest.TestCase): + """Verify ``--output`` writes to a fresh path and leaves the input file untouched.""" + + def setUp(self): + self.tmp_dir = tempfile.mkdtemp(prefix='rmg_scripts_test_') + + def tearDown(self): + shutil.rmtree(self.tmp_dir, ignore_errors=True) + + def _h2_adjlist(self) -> str: + return '1 H u0 p0 c0 {2,S}\n2 H u0 p0 c0 {1,S}\n' + + def test_rmg_thermo_output_does_not_overwrite_input(self): + """The thermo script writes the augmented YAML to ``--output`` and preserves input.""" + input_path = os.path.join(self.tmp_dir, 'in.yml') + output_path = os.path.join(self.tmp_dir, 'out.yml') + original = [{'label': 'H2', 'adjlist': self._h2_adjlist()}] + save_yaml_file(path=input_path, content=original) + + script = os.path.join(ARC_PATH, 'arc', 'scripts', 'rmg_thermo.py') + result = subprocess.run( + ['conda', 'run', '-n', 'rmg_env', 'python', script, input_path, '--output', output_path], + capture_output=True, text=True, timeout=300, + ) + self.assertEqual(result.returncode, 0, f'thermo script failed: {result.stderr}') + + # Input must be byte-identical (no overwrite). + self.assertEqual(read_yaml_file(input_path), original) + # Output must contain the new keys. + out = read_yaml_file(output_path) + self.assertEqual(len(out), 1) + self.assertIn('h298', out[0]) + self.assertIn('s298', out[0]) + self.assertIn('comment', out[0]) + + if __name__ == '__main__': unittest.main(testRunner=unittest.TextTestRunner(verbosity=2))