Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions openfe_benchmarks/data/_benchmark_systems.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,9 @@ class BenchmarkData:
ligand_networks : dict[str, Path] | None
Dictionary of available ligand networks where the key is the filename, '*network.json',
and the value is a Path to a ligand network file.
reference_data : dict[str, Path] | None
Dictionary of available reference data files where the key is the filename 'experimental*data.json',
and the value is a Path to a reference data file.
details : str
Information available in the preparation_details.md file
"""
Expand All @@ -221,6 +224,7 @@ class BenchmarkData:
ligands: dict[str, Path]
cofactors: dict[str, Path] | None
ligand_networks: dict[str, Path] | None
reference_data: dict[str, Path] | None
details: str

def __repr__(self):
Expand All @@ -231,6 +235,7 @@ def __repr__(self):
f"ligands={list(self.ligands.keys())}, "
f"cofactors={list(self.cofactors.keys()) if self.cofactors is not None else 'None'}, "
f"ligand_network={list(self.ligand_networks.keys()) if self.ligand_networks is not None else 'None'}"
f"reference_data={list(self.reference_data.keys()) if self.reference_data is not None else 'None'})"
)


Expand Down Expand Up @@ -263,6 +268,7 @@ def _validate_and_load_data_system(
ligands = {}
cofactors = {}
ligand_networks = {}
reference_data = {}
details = None

# Track all files for validation
Expand Down Expand Up @@ -341,6 +347,13 @@ def _validate_and_load_data_system(
logger.debug(f"Found ligand network: {filename}")
continue

# check for reference data file (experimental*data.json)
if filename.startswith("experimental") and filename.endswith("data.json"):
reference_data[file_path.stem] = file_path
categorized_files.add(file_path)
logger.debug(f"Found reference data: {filename}")
continue

# Check for uncategorized files
uncategorized = set(all_files) - categorized_files
for file_path in uncategorized:
Expand Down Expand Up @@ -411,6 +424,7 @@ def _validate_and_load_data_system(
ligands=ligands,
cofactors=cofactors,
ligand_networks=ligand_networks,
reference_data=reference_data,
details=details,
)

Expand Down
115 changes: 58 additions & 57 deletions openfe_benchmarks/data/benchmark_system_indexing.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,73 +4,74 @@
# Other tags indicate experimental data present, labeled as exp_bfe and exp_sfe appropriately.

charge_annihilation_set:
cdk2: ["protein",]
dlk: ["protein",]
egfr: ["protein",]
ephx2: ["protein",]
irak4_s2: ["protein",]
irak4_s3: ["protein",]
itk: ["protein",]
jak1: ["protein",]
jnk1: ["protein",]
ptp1b: ["protein",]
thrombin: ["protein", "cofactor"]
tyk2: ["protein",]
cdk2: ["protein","bfe"]
dlk: ["protein", "bfe"]
egfr: ["protein", "bfe"]
ephx2: ["protein", "bfe"]
irak4_s2: ["protein", "bfe"]
irak4_s3: ["protein", "bfe"]
itk: ["protein", "bfe"]
jak1: ["protein", "bfe"]
jnk1: ["protein", "bfe"]
ptp1b: ["protein", "bfe"]
thrombin: ["protein", "cofactor", "bfe"]
tyk2: ["protein", "bfe"]
fragments:
hsp90_2rings: ["protein",]
hsp90_single_ring: ["protein",]
jak2_set1: ["protein",]
jak2_set2: ["protein",]
liga: ["protein",]
mcl1: ["protein",]
mup1: ["protein",]
p38: ["protein",]
t4_lysozyme: ["protein",]
hsp90_2rings: ["protein", "bfe"]
hsp90_single_ring: ["protein", "bfe"]
jak2_set1: ["protein", "bfe"]
jak2_set2: ["protein", "bfe"]
liga: ["protein", "bfe"]
mcl1: ["protein", "bfe"]
mup1: ["protein", "bfe"]
p38: ["protein", "bfe"]
t4_lysozyme: ["protein", "bfe"]
jacs_set:
bace: ["protein",]
cdk2: ["protein",]
jnk1: ["protein",]
mcl1: ["protein",]
p38: ["protein",]
ptp1b: ["protein",]
thrombin: ["protein",]
tyk2: ["protein",]
bace: ["protein", "bfe"]
cdk2: ["protein", "bfe"]
jnk1: ["protein", "bfe"]
mcl1: ["protein", "bfe"]
p38: ["protein", "bfe"]
ptp1b: ["protein", "bfe"]
thrombin: ["protein", "bfe"]
tyk2: ["protein", "bfe"]
janssen_bace:
bace_ciordia_prospective: ["protein",]
bace_p3_arg368_in: ["protein",]
ciordia_retro: ["protein",]
keranen_p2: ["protein",]
bace_ciordia_prospective: ["protein", "bfe"]
bace_p3_arg368_in: ["protein", "bfe"]
ciordia_retro: ["protein", "bfe"]
keranen_p2: ["protein", "bfe"]
mcs_docking_set:
hne: ["protein", "cofactor"]
renin: ["protein",]
hne: ["protein", "cofactor", "bfe"]
renin: ["protein", "bfe"]
merck:
cdk8: ["protein",]
cmet: ["protein",]
eg5: ["protein",]
hif2a: ["protein",]
pfkfb3: ["protein", "cofactor"]
shp2: ["protein",]
syk: ["protein",]
tnks2: ["protein", "cofactor"]
cdk8: ["protein", "bfe"]
cmet: ["protein", "bfe"]
eg5: ["protein", "bfe"]
hif2a: ["protein", "bfe"]
pfkfb3: ["protein", "cofactor", "bfe"]
shp2: ["protein", "bfe"]
syk: ["protein", "bfe"]
tnks2: ["protein", "cofactor", "bfe"]
miscellaneous_set:
btk: ["protein",]
cdk8: ["protein",]
faah: ["protein",]
galectin: ["protein",]
hiv1_protease: ["protein",]
btk: ["protein", "bfe"]
cdk8: ["protein", "bfe"]
faah: ["protein", "bfe"]
galectin: ["protein", "bfe"]
hiv1_protease: ["protein", "bfe"]
water_set:
brd4: ["protein",]
chk1: ["protein",]
hsp90_kung: ["protein",]
hsp90_woodhead: ["protein", "cofactor"]
scyt_dehyd: ["protein",]
taf12: ["protein",]
thrombin: ["protein",]
urokinase: ["protein",]
brd4: ["protein", "bfe"]
chk1: ["protein", "bfe"]
hsp90_kung: ["protein", "bfe"]
hsp90_woodhead: ["protein", "cofactor", "bfe"]
scyt_dehyd: ["protein", "bfe"]
taf12: ["protein", "bfe"]
thrombin: ["protein", "bfe"]
urokinase: ["protein", "bfe"]
metadata:
last_updated: "2026-02-02"
last_updated: "2026-02-13"
notes:
- "Systems may or may not include cofactor files"
- "All systems can be used for ASFEs and RSFEs"
Comment thread
jthorton marked this conversation as resolved.
- "bfe tag indicates that experimental binding free energy data is available for this system"
- "Systems with proteins can also be used for RBFE and ABFE calculations"
- "Systems with ligands and cofactors could potentially be used for RBFE and ABFE calculations"
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
{
"39charg": {
"dg": {
"magnitude": -8.369999885559082,
"unit": "kilocalories_per_mole",
":is_custom:": true,
"pint_unit_registry": "openff_units"
},
"canonical_smiles": "[H]c1c(c(c(c(c1C(=O)[O-])[H])[H])N([H])c2nc3c(c(n2)OC([H])([H])C4(C(C(C(C(C4([H])[H])([H])[H])([H])[H])([H])[H])([H])[H])[H])N=C(N3[H])[H])[H]",
"inchikey": "TVEHBRDXVINFRZ-RHMRERFDNA-M",
"reference": "https://doi.org/10.1038/s42004-023-01019-9"
},
"34": {
"dg": {
"magnitude": -9.880000114440918,
"unit": "kilocalories_per_mole",
":is_custom:": true,
"pint_unit_registry": "openff_units"
},
"canonical_smiles": "[H]c1c(c(c(c(c1C(=O)N([H])[H])[H])[H])N([H])c2nc3c(c(n2)OC([H])([H])C4(C(C(C(C(C4([H])[H])([H])[H])([H])[H])([H])[H])([H])[H])[H])N=C(N3[H])[H])[H]",
"inchikey": "RUUOIINPNMNPIU-GJCIOOLPNA-N",
"reference": "https://doi.org/10.1038/s42004-023-01019-9"
},
"25": {
"dg": {
"magnitude": -9.829999923706056,
"unit": "kilocalories_per_mole",
":is_custom:": true,
"pint_unit_registry": "openff_units"
},
"canonical_smiles": "[H]c1c(c(c(c(c1N([H])c2nc3c(c(n2)OC([H])([H])C4(C(C(C(C(C4([H])[H])([H])[H])([H])[H])([H])[H])([H])[H])[H])N=C(N3[H])[H])[H])[H])O[H])[H]",
"inchikey": "RFSDQDHHBKYQOD-BDGWVKIONA-N",
"reference": "https://doi.org/10.1038/s42004-023-01019-9"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
{
"9": {
"dg": {
"magnitude": -8.729999542236328,
"unit": "kilocalories_per_mole",
":is_custom:": true,
"pint_unit_registry": "openff_units"
},
"canonical_smiles": "[H]c1c(nc(c(c1C#N)[H])N([H])C2=NN(C(=C2[H])C3(C(C(N(C(C3([H])[H])([H])[H])C(=O)C([H])([H])[H])([H])[H])([H])[H])[H])C([H])(C([H])([H])[H])C([H])([H])[H])[H]",
"inchikey": "CBLJTUXDZXYAMM-QWOVJGMINA-N",
"reference": "https://doi.org/10.1038/s42004-023-01019-9"
},
"17charg": {
"dg": {
"magnitude": -10.4399995803833,
"unit": "kilocalories_per_mole",
":is_custom:": true,
"pint_unit_registry": "openff_units"
},
"canonical_smiles": "[H]c1c(nc(c(c1C#N)[H])N([H])C2=NN(C(=C2[H])[C@]3(C(C([N@@+](C3([H])[H])([H])C4(C(OC4([H])[H])([H])[H])[H])([H])[H])([H])[H])[H])C5(C(C(C(C5([H])[H])([H])[H])([H])[H])([H])[H])[H])[H]",
"inchikey": "HPCAFTNDPHAAGH-WNJPLXHJNA-O",
"reference": "https://doi.org/10.1038/s42004-023-01019-9"
},
"4charg": {
"dg": {
"magnitude": -9.329999923706056,
"unit": "kilocalories_per_mole",
":is_custom:": true,
"pint_unit_registry": "openff_units"
},
"canonical_smiles": "[H]c1c(nc(c(c1C#N)[H])N([H])C2=NN(C(=C2[H])C3(C(C([N+](C(C3([H])[H])([H])[H])([H])[H])([H])[H])([H])[H])[H])C([H])(C([H])([H])[H])C([H])([H])[H])[H]",
"inchikey": "PZXSYKGUMOGQBI-ZFSSJWKENA-O",
"reference": "https://doi.org/10.1038/s42004-023-01019-9"
},
"18": {
"dg": {
"magnitude": -9.979999542236328,
"unit": "kilocalories_per_mole",
":is_custom:": true,
"pint_unit_registry": "openff_units"
},
"canonical_smiles": "[H]c1c(nc(c(c1C#N)[H])N([H])C2=NN(C(=C2[H])[C@]3(C(C(OC3([H])[H])([H])[H])([H])[H])[H])C4(C(C(C(C4([H])[H])([H])[H])([H])[H])([H])[H])[H])[H]",
"inchikey": "ZBHXJFIKXIBKBY-JAIDSPQONA-N",
"reference": "https://doi.org/10.1038/s42004-023-01019-9"
},
"8": {
"dg": {
"magnitude": -10.510000228881836,
"unit": "kilocalories_per_mole",
":is_custom:": true,
"pint_unit_registry": "openff_units"
},
"canonical_smiles": "[H]c1c(nc(c(c1C#N)[H])N([H])C2=NN(C(=C2[H])C3(C(C([N+](C(C3([H])[H])([H])[H])([H])[H])([H])[H])([H])[H])[H])C4(C(C(C(C4([H])[H])([H])[H])([H])[H])([H])[H])[H])[H]",
"inchikey": "VOBBPVMLJJZDTQ-UDEMLUNXNA-O",
"reference": "https://doi.org/10.1038/s42004-023-01019-9"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
{
"21": {
"dg": {
"magnitude": -9.88,
"unit": "kilocalories_per_mole",
":is_custom:": true,
"pint_unit_registry": "openff_units"
},
"canonical_smiles": "[H]c1c(c(c(c(c1[H])[H])C([H])([H])C(=O)N([H])c2c(c(c(c(c2[H])C3=C(N4C(=C(SC4=N3)[H])[H])c5c(c(nc(n5)N([H])c6c(c(c(c(c6[H])[H])N7C(C(OC(C7([H])[H])([H])[H])([H])[H])([H])[H])[H])[H])[H])[H])[H])[H])[H])[H])[H]",
"inchikey": "BFTPDUXUTFGDLP-QQYWGXKINA-N",
"reference": "https://doi.org/10.1038/s42004-023-01019-9"
},
"27chargR": {
"dg": {
"magnitude": -9.94,
"unit": "kilocalories_per_mole",
":is_custom:": true,
"pint_unit_registry": "openff_units"
},
"canonical_smiles": "[H]c1c(c(c(c(c1[H])[H])C([H])([H])C(=O)N([H])c2c(c(c(c(c2[H])C3=C(N4C(=C(SC4=N3)[H])[H])c5c(c(nc(n5)N([H])c6c(c(c7c(c6[H])C([N@+](C(C7([H])[H])([H])[H])([H])C([H])([H])[H])([H])[H])[H])[H])[H])[H])[H])[H])[H])[H])[H]",
"inchikey": "PVKPCQULPTYIRN-RRKCJBJINA-O",
"reference": "https://doi.org/10.1038/s42004-023-01019-9"
},
"28charg": {
"dg": {
"magnitude": -10.7,
"unit": "kilocalories_per_mole",
":is_custom:": true,
"pint_unit_registry": "openff_units"
},
"canonical_smiles": "[H]c1c(c(c(c(c1[H])[H])C([H])([H])C(=O)N([H])c2c(c(c(c(c2[H])C3=C(N4C(=C(SC4=N3)[H])[H])c5c(c(nc(n5)N([H])c6c(c(c(c(c6[H])[H])N7C(C([N+](C(C7([H])[H])([H])[H])([H])C([H])([H])C([H])([H])[H])([H])[H])([H])[H])[H])[H])[H])[H])[H])[H])[H])[H])[H]",
"inchikey": "MBYBHGUQPPLBRD-XUHMWGPANA-O",
"reference": "https://doi.org/10.1038/s42004-023-01019-9"
},
"29charg": {
"dg": {
"magnitude": -10.46,
"unit": "kilocalories_per_mole",
":is_custom:": true,
"pint_unit_registry": "openff_units"
},
"canonical_smiles": "[H]c1c(c(c(c(c1[H])[H])C([H])([H])C(=O)N([H])c2c(c(c(c(c2[H])C3=C(N4C(=C(SC4=N3)[H])[H])c5c(c(nc(n5)N([H])c6c(c(c(c(c6[H])C([H])([H])C([H])([H])[N+]([H])(C([H])([H])[H])C([H])([H])[H])[H])[H])[H])[H])[H])[H])[H])[H])[H])[H]",
"inchikey": "JPSXZEQQHPNTNX-UPNFYDLJNA-O",
"reference": "https://doi.org/10.1038/s42004-023-01019-9"
},
"30charg": {
"dg": {
"magnitude": -11.25,
"unit": "kilocalories_per_mole",
":is_custom:": true,
"pint_unit_registry": "openff_units"
},
"canonical_smiles": "[H]c1c(c(c(c(c1[H])[H])C([H])([H])C(=O)N([H])c2c(c(c(c(c2[H])C3=C(N4C(=C(SC4=N3)[H])[H])c5c(c(nc(n5)N([H])c6c(c(c7c(c6[H])C([N+](C(C7([H])[H])([H])[H])([H])[H])([H])[H])[H])[H])[H])[H])[H])[H])[H])[H])[H]",
"inchikey": "KEOFEULIBILKAL-NSQPPVFJNA-O",
"reference": "https://doi.org/10.1038/s42004-023-01019-9"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
{
"47charg": {
"dg": {
"magnitude": -11.25,
"unit": "kilocalories_per_mole",
":is_custom:": true,
"pint_unit_registry": "openff_units"
},
"canonical_smiles": "[H]c1c(c(c(c(c1C(=O)[O-])[H])[H])OC2(C(C(N(C(C2([H])[H])([H])[H])C(=O)N([H])C([H])([H])c3c(c(c(c(c3Cl)[H])Cl)[H])[H])([H])[H])([H])[H])[H])[H]",
"inchikey": "SJFWZPJFJSGSAB-BGLXQYIQNA-M",
"reference": "https://doi.org/10.1038/s42004-023-01019-9"
},
"19charg": {
"dg": {
"magnitude": -8.239999771118164,
"unit": "kilocalories_per_mole",
":is_custom:": true,
"pint_unit_registry": "openff_units"
},
"canonical_smiles": "[H]c1c(c(c(c(c1C([H])([H])N([H])C(=O)N2C(C(C(C(C2([H])[H])([H])[H])([H])C(=O)[O-])([H])[H])([H])[H])Cl)[H])Cl)[H]",
"inchikey": "YYEDYEHCUJGPOP-ZVXBUSEHNA-M",
"reference": "https://doi.org/10.1038/s42004-023-01019-9"
},
"20": {
"dg": {
"magnitude": -9.140000343322754,
"unit": "kilocalories_per_mole",
":is_custom:": true,
"pint_unit_registry": "openff_units"
},
"canonical_smiles": "[H]c1c(c(c(c(c1C([H])([H])N([H])C(=O)N2C(C(C(C(C2([H])[H])([H])[H])([H])C(=O)N([H])[H])([H])[H])([H])[H])Cl)[H])Cl)[H]",
"inchikey": "UUCMRBUYLOLCHH-VRGMUVNCNA-N",
"reference": "https://doi.org/10.1038/s42004-023-01019-9"
},
"33": {
"dg": {
"magnitude": -10.739999771118164,
"unit": "kilocalories_per_mole",
":is_custom:": true,
"pint_unit_registry": "openff_units"
},
"canonical_smiles": "[H]c1c(c(c(c(c1C([H])([H])N([H])C(=O)N2C(C(C(C(C2([H])[H])([H])[H])([H])Oc3nc(c(c(n3)[H])[H])[H])([H])[H])([H])[H])Cl)[H])Cl)[H]",
"inchikey": "UZEPPAOAIDTLTK-QWOVJGMINA-N",
"reference": "https://doi.org/10.1038/s42004-023-01019-9"
}
}
Loading
Loading