diff --git a/src/classes/structure.cpp b/src/classes/structure.cpp index daed391e7e..4ce870202a 100644 --- a/src/classes/structure.cpp +++ b/src/classes/structure.cpp @@ -76,6 +76,9 @@ int Structure::nAtoms(Atom::Presence withPresence) const { return i->isPresence(withPresence); }); } +// Return atom at index +StructureAtom *Structure::atomAt(int i) { return atoms_[i].get(); } + // Return atoms const std::vector> &Structure::atoms() const { return atoms_; } std::vector> &Structure::atoms() { return atoms_; } diff --git a/src/classes/structure.h b/src/classes/structure.h index 2f5f8d8893..88a1e59629 100644 --- a/src/classes/structure.h +++ b/src/classes/structure.h @@ -92,6 +92,8 @@ class Structure : public Serialisable<> void removeAtoms(const std::vector &atoms); // Return the number of atoms int nAtoms(Atom::Presence withPresence = Atom::Presence::Any) const; + // Return atom at index + StructureAtom *atomAt(int i); // Return atoms const std::vector> &atoms() const; std::vector> &atoms(); diff --git a/src/gui/createGrapheneSpeciesDialog.h b/src/gui/createGrapheneSpeciesDialog.h index c265d4936c..e24b001243 100644 --- a/src/gui/createGrapheneSpeciesDialog.h +++ b/src/gui/createGrapheneSpeciesDialog.h @@ -8,8 +8,8 @@ #include "gui/selectElementDialog.h" #include "gui/ui_createGrapheneSpeciesDialog.h" #include "gui/wizard.h" -#include "io/import/cif.h" #include "main/dissolve.h" +#include "nodes/cif/io/cifContext.h" #include // Forward Declarations diff --git a/src/gui/importCIFDialog.cpp b/src/gui/importCIFDialog.cpp index 94873c163c..a94e31f3d1 100644 --- a/src/gui/importCIFDialog.cpp +++ b/src/gui/importCIFDialog.cpp @@ -13,7 +13,7 @@ #include ImportCIFDialog::ImportCIFDialog(QWidget *parent, Dissolve &dissolve) - : QDialog(parent), cifAssemblyModel_(cifHandler_.assemblies()), dissolve_(dissolve) + : QDialog(parent), cifAssemblyModel_(cifContext_.assemblies()), dissolve_(dissolve) { ui_.setupUi(this); @@ -38,7 +38,7 @@ ImportCIFDialog::ImportCIFDialog(QWidget *parent, Dissolve &dissolve) ComboEnumOptionsPopulator(ui_.DensityUnitsCombo, Units::densityUnits()); // Set display configuration - ui_.StructureViewer->setConfiguration(cifHandler_.generatedConfiguration()); + ui_.StructureViewer->setConfiguration(cifContext_.generatedConfiguration()); createMoietyRemovalNETA(ui_.MoietyNETARemovalEdit->text().toStdString()); } @@ -53,34 +53,34 @@ void ImportCIFDialog::updateWidgets() Locker updateLock(widgetsUpdating_); // DATA_ ID - ui_.InfoDataLabel->setText(QString::fromStdString(cifHandler_.getTagString("DATA_").value_or(""))); + ui_.InfoDataLabel->setText(QString::fromStdString(cifContext_.getTagString("DATA_").value_or(""))); // Chemical Formula - ui_.InfoChemicalFormulaLabel->setText(QString::fromStdString(cifHandler_.chemicalFormula())); + ui_.InfoChemicalFormulaLabel->setText(QString::fromStdString(cifContext_.chemicalFormula())); // Publication Data ui_.InfoPublicationTitleLabel->setText( - QString::fromStdString(cifHandler_.getTagString("_publ_section_title").value_or(""))); + QString::fromStdString(cifContext_.getTagString("_publ_section_title").value_or(""))); ui_.InfoPublicationReferenceLabel->setText(QString::fromStdString(std::format( - "{}, {}, {}, {}", cifHandler_.getTagString("_journal_name_full").value_or("N/A"), - cifHandler_.getTagString("_journal_year").value_or("N/A"), cifHandler_.getTagString("_journal_volume").value_or("N/A"), - cifHandler_.getTagString("_journal_page_first").value_or("N/A")))); + "{}, {}, {}, {}", cifContext_.getTagString("_journal_name_full").value_or("N/A"), + cifContext_.getTagString("_journal_year").value_or("N/A"), cifContext_.getTagString("_journal_volume").value_or("N/A"), + cifContext_.getTagString("_journal_page_first").value_or("N/A")))); ui_.InfoAuthorsList->clear(); - auto authors = cifHandler_.getTagStrings("_publ_author_name"); + auto authors = cifContext_.getTagStrings("_publ_author_name"); for (auto &author : authors) ui_.InfoAuthorsList->addItem(QString::fromStdString(author)); // Spacegroup - ui_.SpaceGroupsCombo->setCurrentIndex(cifHandler_.spaceGroup() - 1); + ui_.SpaceGroupsCombo->setCurrentIndex(cifContext_.spaceGroup() - 1); // Bonding - ui_.BondFromCIFRadio->setEnabled(cifHandler_.hasBondDistances()); + ui_.BondFromCIFRadio->setEnabled(cifContext_.hasBondDistances()); // Assemblies ui_.AssemblyView->expandAll(); // Configuration information - auto *cfg = cifHandler_.generatedConfiguration(); + auto *cfg = cifContext_.generatedConfiguration(); const auto *box = cfg->box(); ui_.CurrentBoxTypeLabel->setText(QString::fromStdString(std::string(Box::boxTypes().keyword(box->type())))); QString boxInfo = QString("A: %1 Å
").arg(box->axisLengths().x); @@ -95,7 +95,7 @@ void ImportCIFDialog::updateWidgets() ui_.MoleculePopulationLabel->setText(QString::number(cfg->nMolecules())); // Output - auto validSpecies = !cifHandler_.molecularSpecies().empty(); + auto validSpecies = !cifContext_.molecularSpecies().empty(); ui_.OutputMolecularRadio->setEnabled(validSpecies); ui_.OutputFrameworkRadio->setEnabled(!validSpecies); ui_.OutputSupermoleculeRadio->setEnabled(!validSpecies); @@ -106,7 +106,7 @@ void ImportCIFDialog::updateWidgets() // Update molecular species list ui_.OutputMolecularSpeciesList->clear(); - for (auto &molecularSp : cifHandler_.molecularSpecies()) + for (auto &molecularSp : cifContext_.molecularSpecies()) { ui_.OutputMolecularSpeciesList->addItem(QString::fromStdString(std::string(molecularSp.species()->name()))); } @@ -118,7 +118,7 @@ void ImportCIFDialog::updateWidgets() // Update density label void ImportCIFDialog::updateDensityLabel() { - auto *cfg = cifHandler_.generatedConfiguration(); + auto *cfg = cifContext_.generatedConfiguration(); if (!cfg) ui_.DensityUnitsLabel->setText("N/A"); else @@ -131,11 +131,11 @@ void ImportCIFDialog::updateDensityLabel() void ImportCIFDialog::on_InputFileEdit_editingFinished() { // Load the CIF file - if (!cifHandler_.read(ui_.InputFileEdit->text().toStdString())) + if (!cifContext_.read(ui_.InputFileEdit->text().toStdString())) Messenger::error("Failed to load CIF file '{}'.\n", ui_.InputFileEdit->text().toStdString()); else { - cifHandler_.generate(); + cifContext_.generate(); updateWidgets(); } } @@ -157,7 +157,7 @@ void ImportCIFDialog::on_SpaceGroupsCombo_currentIndexChanged(int index) if (widgetsUpdating_) return; - cifHandler_.setSpaceGroup((SpaceGroups::SpaceGroupId)(ui_.SpaceGroupsCombo->currentIndex() + 1)); + cifContext_.setSpaceGroup((SpaceGroups::SpaceGroupId)(ui_.SpaceGroupsCombo->currentIndex() + 1)); updateWidgets(); } @@ -166,7 +166,7 @@ void ImportCIFDialog::on_NormalOverlapToleranceRadio_clicked(bool checked) { if (checked) { - cifHandler_.setOverlapTolerance(0.1); + cifContext_.setOverlapTolerance(0.1); updateWidgets(); } } @@ -175,7 +175,7 @@ void ImportCIFDialog::on_LooseOverlapToleranceRadio_clicked(bool checked) { if (checked) { - cifHandler_.setOverlapTolerance(0.5); + cifContext_.setOverlapTolerance(0.5); updateWidgets(); } } @@ -184,7 +184,7 @@ void ImportCIFDialog::on_CalculateBondingRadio_clicked(bool checked) { if (checked) { - cifHandler_.setUseCIFBondingDefinitions(false); + cifContext_.setUseCIFBondingDefinitions(false); updateWidgets(); } } @@ -193,14 +193,14 @@ void ImportCIFDialog::on_BondFromCIFRadio_clicked(bool checked) { if (checked) { - cifHandler_.setUseCIFBondingDefinitions(true); + cifContext_.setUseCIFBondingDefinitions(true); updateWidgets(); } } void ImportCIFDialog::on_BondingPreventMetallicCheck_clicked(bool checked) { - cifHandler_.setPreventMetallicBonds(checked); + cifContext_.setPreventMetallicBonds(checked); updateWidgets(); } @@ -217,13 +217,13 @@ bool ImportCIFDialog::createMoietyRemovalNETA(std::string definition) void ImportCIFDialog::on_MoietyRemoveAtomicsCheck_clicked(bool checked) { - cifHandler_.setRemoveAtomics(checked); + cifContext_.setRemoveAtomics(checked); updateWidgets(); } void ImportCIFDialog::on_MoietyRemoveWaterCheck_clicked(bool checked) { - cifHandler_.setRemoveWaterAndCoordinateOxygens(checked); + cifContext_.setRemoveWaterAndCoordinateOxygens(checked); updateWidgets(); } @@ -236,7 +236,7 @@ void ImportCIFDialog::on_MoietyRemoveByNETACheck_clicked(bool checked) if (ui_.MoietyNETARemovalIndicator->state() != CheckIndicator::OKState) return; - cifHandler_.setRemoveNETA(checked, ui_.MoietyNETARemoveFragmentsCheck->isChecked()); + cifContext_.setRemoveNETA(checked, ui_.MoietyNETARemoveFragmentsCheck->isChecked()); updateWidgets(); } @@ -245,8 +245,8 @@ void ImportCIFDialog::on_MoietyNETARemovalEdit_textEdited(const QString &text) if (!createMoietyRemovalNETA(ui_.MoietyNETARemovalEdit->text().toStdString())) return; - cifHandler_.setRemoveNETA(ui_.MoietyRemoveByNETACheck->isChecked(), ui_.MoietyNETARemoveFragmentsCheck->isChecked()); - cifHandler_.setMoietyRemovalNETA(moietyNETA_.definitionString()); + cifContext_.setRemoveNETA(ui_.MoietyRemoveByNETACheck->isChecked(), ui_.MoietyNETARemoveFragmentsCheck->isChecked()); + cifContext_.setMoietyRemovalNETA(moietyNETA_.definitionString()); updateWidgets(); } @@ -255,32 +255,32 @@ void ImportCIFDialog::on_MoietyNETARemoveFragmentsCheck_clicked(bool checked) if (ui_.MoietyNETARemovalIndicator->state() != CheckIndicator::OKState) return; - cifHandler_.setRemoveNETA(ui_.MoietyRemoveByNETACheck->isChecked(), checked); + cifContext_.setRemoveNETA(ui_.MoietyRemoveByNETACheck->isChecked(), checked); updateWidgets(); } void ImportCIFDialog::assembliesChanged(const QModelIndex &, const QModelIndex &, const QList &) { - cifHandler_.generate(); + cifContext_.generate(); updateWidgets(); } void ImportCIFDialog::on_RepeatASpin_valueChanged(int value) { - cifHandler_.setSupercellRepeat({ui_.RepeatASpin->value(), ui_.RepeatBSpin->value(), ui_.RepeatCSpin->value()}); + cifContext_.setSupercellRepeat({ui_.RepeatASpin->value(), ui_.RepeatBSpin->value(), ui_.RepeatCSpin->value()}); updateWidgets(); } void ImportCIFDialog::on_RepeatBSpin_valueChanged(int value) { - cifHandler_.setSupercellRepeat({ui_.RepeatASpin->value(), ui_.RepeatBSpin->value(), ui_.RepeatCSpin->value()}); + cifContext_.setSupercellRepeat({ui_.RepeatASpin->value(), ui_.RepeatBSpin->value(), ui_.RepeatCSpin->value()}); updateWidgets(); } void ImportCIFDialog::on_RepeatCSpin_valueChanged(int value) { - cifHandler_.setSupercellRepeat({ui_.RepeatASpin->value(), ui_.RepeatBSpin->value(), ui_.RepeatCSpin->value()}); + cifContext_.setSupercellRepeat({ui_.RepeatASpin->value(), ui_.RepeatBSpin->value(), ui_.RepeatCSpin->value()}); updateWidgets(); } @@ -300,22 +300,22 @@ void ImportCIFDialog::on_OutputSupermoleculeRadio_clicked(bool checked) {} void ImportCIFDialog::on_OKButton_clicked(bool checked) { - Flags outputFlags; + Flags outputFlags; if (ui_.OutputMolecularRadio->isChecked()) - outputFlags.setFlag(CIFHandler::OutputFlags::OutputMolecularSpecies); + outputFlags.setFlag(CIFContext::OutputFlags::OutputMolecularSpecies); else if (ui_.OutputFrameworkRadio->isChecked()) - outputFlags.setFlag(CIFHandler::OutputFlags::OutputFramework); + outputFlags.setFlag(CIFContext::OutputFlags::OutputFramework); else if (ui_.OutputSupermoleculeRadio->isChecked()) - outputFlags.setFlag(CIFHandler::OutputFlags::OutputSupermolecule); + outputFlags.setFlag(CIFContext::OutputFlags::OutputSupermolecule); // Output a configuration as well as the species for certain options - if (outputFlags.isSet(CIFHandler::OutputFlags::OutputMolecularSpecies) || - outputFlags.isSet(CIFHandler::OutputFlags::OutputFramework)) + if (outputFlags.isSet(CIFContext::OutputFlags::OutputMolecularSpecies) || + outputFlags.isSet(CIFContext::OutputFlags::OutputFramework)) { - outputFlags.setFlag(CIFHandler::OutputFlags::OutputConfiguration); + outputFlags.setFlag(CIFContext::OutputFlags::OutputConfiguration); } - cifHandler_.finalise(dissolve_.coreData(), outputFlags); + cifContext_.finalise(dissolve_.coreData(), outputFlags); accept(); } diff --git a/src/gui/importCIFDialog.h b/src/gui/importCIFDialog.h index 52133f3a42..09542002f3 100644 --- a/src/gui/importCIFDialog.h +++ b/src/gui/importCIFDialog.h @@ -7,8 +7,8 @@ #include "gui/models/cifAssemblyModel.h" #include "gui/ui_importCIFDialog.h" #include "gui/wizard.h" -#include "io/import/cif.h" #include "main/dissolve.h" +#include "nodes/cif/io/cifContext.h" #include // Forward Declarations @@ -39,7 +39,7 @@ class ImportCIFDialog : public QDialog // Main Dissolve object Dissolve &dissolve_; // CIF Handler - CIFHandler cifHandler_; + CIFContext cifContext_; // NETA for moiety removal NETADefinition moietyNETA_; diff --git a/src/gui/models/cifAssemblyModel.h b/src/gui/models/cifAssemblyModel.h index f1ba29b6d1..7ded24966d 100644 --- a/src/gui/models/cifAssemblyModel.h +++ b/src/gui/models/cifAssemblyModel.h @@ -3,7 +3,7 @@ #pragma once -#include "io/import/cif.h" +#include "nodes/cif/io/cifContext.h" #include #include diff --git a/src/io/import/CIFImportErrorListeners.cpp b/src/io/import/CIFImportErrorListeners.cpp index 6d727406f5..960fe3570d 100644 --- a/src/io/import/CIFImportErrorListeners.cpp +++ b/src/io/import/CIFImportErrorListeners.cpp @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-3.0-or-later // Copyright (c) 2026 Team Dissolve and contributors -#include "io/import/CIFImportErrorListeners.h" +#include "nodes/cif/io/CIFImportErrorListeners.h" #include "base/messenger.h" #include "base/sysFunc.h" diff --git a/src/io/import/CIFImportLexer.g4 b/src/io/import/CIFImportLexer.g4 index 50f921c330..84f510b7fa 100644 --- a/src/io/import/CIFImportLexer.g4 +++ b/src/io/import/CIFImportLexer.g4 @@ -9,7 +9,7 @@ lexer grammar CIFImportLexer; // Add custom includes after standard ANTLR includes in both *.h and *.cpp files @lexer::postinclude { #include "base/sysFunc.h" - #include "io/import/CIFImportVisitor.h" + #include "nodes/cif/io/CIFImportVisitor.h" } // Directly precedes the lexer class declaration in the h file (e.g. for additional types etc.). diff --git a/src/io/import/CIFImportVisitor.cpp b/src/io/import/CIFImportVisitor.cpp index f0e5bec273..6b95676e03 100644 --- a/src/io/import/CIFImportVisitor.cpp +++ b/src/io/import/CIFImportVisitor.cpp @@ -1,10 +1,10 @@ // SPDX-License-Identifier: GPL-3.0-or-later // Copyright (c) 2026 Team Dissolve and contributors -#include "io/import/CIFImportVisitor.h" +#include "nodes/cif/io/CIFImportVisitor.h" #include "base/messenger.h" #include "base/sysFunc.h" -#include "io/import/CIFImportErrorListeners.h" +#include "nodes/cif/io/CIFImportErrorListeners.h" CIFImportVisitor::CIFImportVisitor(CIFHandler::CIFTags &tags) : tags_(tags) {} diff --git a/src/io/import/CIFImportVisitor.h b/src/io/import/CIFImportVisitor.h index 20e6f554db..b73271862c 100644 --- a/src/io/import/CIFImportVisitor.h +++ b/src/io/import/CIFImportVisitor.h @@ -4,7 +4,7 @@ #pragma once #include "CIFImportParserBaseVisitor.h" -#include "io/import/cif.h" +#include "nodes/cif/io/cif.h" #include "templates/optionalRef.h" #include diff --git a/src/io/import/CMakeLists.txt b/src/io/import/CMakeLists.txt index 63ad9a0fd9..2466de48dd 100644 --- a/src/io/import/CMakeLists.txt +++ b/src/io/import/CMakeLists.txt @@ -1,32 +1,5 @@ -# CIFImport ANTLR Lexer/Parser -antlr_target(CIFImportGrammarLexer CIFImportLexer.g4 LEXER) -# PACKAGE CIFImportGrammar) -antlr_target( - CIFImportGrammarParser - CIFImportParser.g4 - PARSER - # PACKAGE CIFImportGrammar - DEPENDS_ANTLR - CIFImportGrammarLexer - COMPILE_FLAGS - -no-listener - -visitor - -lib - ${ANTLR_CIFImportGrammarLexer_OUTPUT_DIR} -) - -# Append path to ANTLR parser output, and set cache variable -list(APPEND ANTLR_OUTPUT_DIRS ${ANTLR_CIFImportGrammarLexer_OUTPUT_DIR}) -list(APPEND ANTLR_OUTPUT_DIRS ${ANTLR_CIFImportGrammarParser_OUTPUT_DIR}) -set(ANTLR_OUTPUT_DIRS - ${ANTLR_OUTPUT_DIRS} - CACHE INTERNAL "" -) - add_library( import - cif.cpp - cifClasses.cpp coordinates.cpp coordinates_dlpoly.cpp coordinates_epsr.cpp @@ -49,8 +22,6 @@ add_library( trajectory.cpp trajectory_dlpoly.cpp values.cpp - cif.h - cifClasses.h coordinates.h data1D.h data2D.h @@ -59,15 +30,10 @@ add_library( species.h trajectory.h values.h - CIFImportErrorListeners.cpp - CIFImportVisitor.cpp - ${ANTLR_CIFImportGrammarLexer_CXX_OUTPUTS} - ${ANTLR_CIFImportGrammarParser_CXX_OUTPUTS} ) target_include_directories( - import PRIVATE ${PROJECT_SOURCE_DIR}/src ${PROJECT_BINARY_DIR}/src ${ANTLR_CIFImportGrammarLexer_OUTPUT_DIR} - ${ANTLR_CIFImportGrammarParser_OUTPUT_DIR} + import PRIVATE ${PROJECT_SOURCE_DIR}/src ${PROJECT_BINARY_DIR}/src ) -target_link_libraries(import base) +target_link_libraries(import base nodes) diff --git a/src/io/import/cif.cpp b/src/io/import/cif.cpp index d0bc116560..5724e257a5 100644 --- a/src/io/import/cif.cpp +++ b/src/io/import/cif.cpp @@ -1,7 +1,6 @@ // SPDX-License-Identifier: GPL-3.0-or-later // Copyright (c) 2026 Team Dissolve and contributors -#include "io/import/cif.h" #include "CIFImportLexer.h" #include "base/messenger.h" #include "base/sysFunc.h" @@ -11,10 +10,10 @@ #include "generator/add.h" #include "generator/box.h" #include "generator/coordinateSets.h" -#include "io/import/CIFImportErrorListeners.h" -#include "io/import/CIFImportVisitor.h" -#include "io/import/cif.h" #include "neta/neta.h" +#include "nodes/cif/io/CIFImportErrorListeners.h" +#include "nodes/cif/io/CIFImportVisitor.h" +#include "nodes/cif/io/cifContext.h" #include "templates/algorithms.h" CIFHandler::CIFHandler() diff --git a/src/io/import/cif.h b/src/io/import/cif.h index fa9f34b812..d93483db31 100644 --- a/src/io/import/cif.h +++ b/src/io/import/cif.h @@ -7,9 +7,9 @@ #include "classes/coreData.h" #include "classes/species.h" #include "data/spaceGroups.h" -#include "io/import/cifClasses.h" #include "math/matrix4.h" #include "neta/neta.h" +#include "nodes/cif/io/cifClasses.h" #include "templates/flags.h" #include diff --git a/src/io/import/cifClasses.cpp b/src/io/import/cifClasses.cpp index 433973285b..a2ba6fd5b0 100644 --- a/src/io/import/cifClasses.cpp +++ b/src/io/import/cifClasses.cpp @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-3.0-or-later // Copyright (c) 2026 Team Dissolve and contributors -#include "io/import/cifClasses.h" +#include "nodes/cif/io/cifClasses.h" #include "classes/empiricalFormula.h" #include "classes/molecule.h" #include "classes/species.h" diff --git a/src/nodes/CMakeLists.txt b/src/nodes/CMakeLists.txt index f26e9a23f0..88e577b828 100644 --- a/src/nodes/CMakeLists.txt +++ b/src/nodes/CMakeLists.txt @@ -10,7 +10,48 @@ file( "./*.h" ) -add_library(nodes ${node_sources} ${node_headers}) +# CIF/ANTLR +set(CIF_IO_DIR "${CMAKE_CURRENT_SOURCE_DIR}/cif/io") +set(ANTLR_CIF_PREFIX "NodeCIFImport") -target_include_directories(nodes PRIVATE ${PROJECT_SOURCE_DIR}/src) +# CIFImport ANTLR Lexer/Parser +antlr_target(CIFImportGrammarLexer "${CIF_IO_DIR}/CIFImportLexer.g4" LEXER) +# PACKAGE CIFImportGrammar) +antlr_target( + CIFImportGrammarParser + "${CIF_IO_DIR}/CIFImportParser.g4" + PARSER + # PACKAGE CIFImportGrammar + DEPENDS_ANTLR + CIFImportGrammarLexer + COMPILE_FLAGS + -no-listener + -visitor + -lib + ${ANTLR_CIFImportGrammarLexer_OUTPUT_DIR} +) + +# Append path to ANTLR parser output, and set cache variable +list(APPEND ANTLR_OUTPUT_DIRS ${ANTLR_CIFImportGrammarLexer_OUTPUT_DIR}) +list(APPEND ANTLR_OUTPUT_DIRS ${ANTLR_CIFImportGrammarParser_OUTPUT_DIR}) +set(ANTLR_OUTPUT_DIRS + ${ANTLR_OUTPUT_DIRS} + CACHE INTERNAL "" +) + +add_library(nodes + ${node_sources} + ${node_headers} + ${ANTLR_CIFImportGrammarLexer_CXX_OUTPUTS} + ${ANTLR_CIFImportGrammarParser_CXX_OUTPUTS} +) + +message(STATUS "ANTLR generated products located in the following directories: ${ANTLR_CIFImportGrammarLexer_CXX_OUTPUTS}, ${ANTLR_CIFImportGrammarParser_CXX_OUTPUTS}") + +target_include_directories(nodes PRIVATE + ${PROJECT_SOURCE_DIR}/src + ${PROJECT_BINARY_DIR}/src + ${ANTLR_CIFImportGrammarLexer_OUTPUT_DIR} + ${ANTLR_CIFImportGrammarParser_OUTPUT_DIR} +) target_link_libraries(nodes PUBLIC base ${THREADING_LINK_LIBS}) diff --git a/src/nodes/cifBondingOptions.cpp b/src/nodes/cif/cifBondingOptions.cpp similarity index 82% rename from src/nodes/cifBondingOptions.cpp rename to src/nodes/cif/cifBondingOptions.cpp index c70f0fa5d8..0d98707aa7 100644 --- a/src/nodes/cifBondingOptions.cpp +++ b/src/nodes/cif/cifBondingOptions.cpp @@ -1,16 +1,16 @@ // SPDX-License-Identifier: GPL-3.0-or-later // Copyright (c) 2026 Team Dissolve and contributors -#include "nodes/cifBondingOptions.h" +#include "nodes/cif/cifBondingOptions.h" CIFBondingOptionsNode::CIFBondingOptionsNode(Graph *parentGraph) : Node(parentGraph) { // Inputs - addInput("CIFContext", "CIF handling context derived from parsing of CIF file", context_) + addInput("CIFContext", "CIF handling context derived from parsing of CIF file", context_) ->setFlags({ParameterBase::Required}); // Outputs - addOutput("CIFContext", "CIF handling context derived from parsing of CIF file", context_); + addOutput("CIFContext", "CIF handling context derived from parsing of CIF file", context_); // Options addOption("BondingTolerance", "Bonding tolerance, if calculating bonding rather than using CIF definitions", diff --git a/src/nodes/cifBondingOptions.h b/src/nodes/cif/cifBondingOptions.h similarity index 90% rename from src/nodes/cifBondingOptions.h rename to src/nodes/cif/cifBondingOptions.h index f67659187c..86097f525b 100644 --- a/src/nodes/cifBondingOptions.h +++ b/src/nodes/cif/cifBondingOptions.h @@ -3,10 +3,10 @@ #pragma once -#include "nodes/cifLoader.h" +#include "nodes/cif/io/cifContext.h" #include "nodes/node.h" -// CIFLoader Node +// ImportCIFStructure Node class CIFBondingOptionsNode : public Node { public: @@ -22,7 +22,7 @@ class CIFBondingOptionsNode : public Node */ private: // CIF handler context - CIFLoaderNode::CIFContext *context_{nullptr}; + CIFContext *context_{nullptr}; // Bonding tolerance, if calculating bonding rather than using CIF definitions Number bondingTolerance_{1.1}; // Whether to use CIF bonding definitions diff --git a/src/nodes/cifMolecularSpecies.cpp b/src/nodes/cif/cifMolecularSpecies.cpp similarity index 91% rename from src/nodes/cifMolecularSpecies.cpp rename to src/nodes/cif/cifMolecularSpecies.cpp index b47685750e..24e003f776 100644 --- a/src/nodes/cifMolecularSpecies.cpp +++ b/src/nodes/cif/cifMolecularSpecies.cpp @@ -1,14 +1,14 @@ // SPDX-License-Identifier: GPL-3.0-or-later // Copyright (c) 2026 Team Dissolve and contributors -#include "nodes/cifMolecularSpecies.h" +#include "nodes/cif/cifMolecularSpecies.h" #include #include CIFMolecularSpeciesNode::CIFMolecularSpeciesNode(Graph *parentGraph) : Node(parentGraph) { // Inputs - addInput("CIFContext", "CIF handling context derived from parsing of CIF file", context_) + addInput("CIFContext", "CIF handling context derived from parsing of CIF file", context_) ->setFlags({ParameterBase::Required}); // Outputs diff --git a/src/nodes/cifMolecularSpecies.h b/src/nodes/cif/cifMolecularSpecies.h similarity index 90% rename from src/nodes/cifMolecularSpecies.h rename to src/nodes/cif/cifMolecularSpecies.h index 50df4671b8..fc842ad71a 100644 --- a/src/nodes/cifMolecularSpecies.h +++ b/src/nodes/cif/cifMolecularSpecies.h @@ -3,10 +3,10 @@ #pragma once -#include "nodes/cifLoader.h" +#include "nodes/cif/io/cifContext.h" #include "nodes/node.h" -// CIFLoader Node +// ImportCIFStructure Node class CIFMolecularSpeciesNode : public Node { public: @@ -22,7 +22,7 @@ class CIFMolecularSpeciesNode : public Node */ private: // CIF handler context - CIFLoaderNode::CIFContext *context_{nullptr}; + CIFContext *context_{nullptr}; // Supercell configuration Configuration *supercellConfiguration_{nullptr}; // Detected molecular species diff --git a/src/nodes/cifPeriodicFramework.cpp b/src/nodes/cif/cifPeriodicFramework.cpp similarity index 90% rename from src/nodes/cifPeriodicFramework.cpp rename to src/nodes/cif/cifPeriodicFramework.cpp index e47f3c193f..a80f9464e5 100644 --- a/src/nodes/cifPeriodicFramework.cpp +++ b/src/nodes/cif/cifPeriodicFramework.cpp @@ -1,12 +1,12 @@ // SPDX-License-Identifier: GPL-3.0-or-later // Copyright (c) 2026 Team Dissolve and contributors -#include "nodes/cifPeriodicFramework.h" +#include "nodes/cif/cifPeriodicFramework.h" CIFPeriodicFrameworkNode::CIFPeriodicFrameworkNode(Graph *parentGraph) : Node(parentGraph) { // Inputs - addInput("CIFContext", "CIF handling context derived from parsing of CIF file", context_) + addInput("CIFContext", "CIF handling context derived from parsing of CIF file", context_) ->setFlags({ParameterBase::Required}); // Outputs diff --git a/src/nodes/cifPeriodicFramework.h b/src/nodes/cif/cifPeriodicFramework.h similarity index 91% rename from src/nodes/cifPeriodicFramework.h rename to src/nodes/cif/cifPeriodicFramework.h index 607150cbd9..d32b545bb6 100644 --- a/src/nodes/cifPeriodicFramework.h +++ b/src/nodes/cif/cifPeriodicFramework.h @@ -3,10 +3,10 @@ #pragma once -#include "nodes/cifLoader.h" +#include "nodes/cif/io/cifContext.h" #include "nodes/node.h" -// CIFLoader Node +// ImportCIFStructure Node class CIFPeriodicFrameworkNode : public Node { public: @@ -22,7 +22,7 @@ class CIFPeriodicFrameworkNode : public Node */ private: // CIF handler context - CIFLoaderNode::CIFContext *context_{nullptr}; + CIFContext *context_{nullptr}; // Supercell configuration Configuration *supercellConfiguration_{nullptr}; // Supercell species diff --git a/src/nodes/cifRemoveAtomic.cpp b/src/nodes/cif/cifRemoveAtomic.cpp similarity index 72% rename from src/nodes/cifRemoveAtomic.cpp rename to src/nodes/cif/cifRemoveAtomic.cpp index bb4ba604fb..6d18bcd696 100644 --- a/src/nodes/cifRemoveAtomic.cpp +++ b/src/nodes/cif/cifRemoveAtomic.cpp @@ -1,16 +1,16 @@ // SPDX-License-Identifier: GPL-3.0-or-later // Copyright (c) 2026 Team Dissolve and contributors -#include "nodes/cifRemoveAtomic.h" +#include "nodes/cif/cifRemoveAtomic.h" CIFRemoveAtomicNode::CIFRemoveAtomicNode(Graph *parentGraph) : Node(parentGraph) { // Inputs - addInput("CIFContext", "CIF handling context derived from parsing of CIF file", context_) + addInput("CIFContext", "CIF handling context derived from parsing of CIF file", context_) ->setFlags({ParameterBase::Required}); // Outputs - addOutput("CIFContext", "CIF handling context derived from parsing of CIF file", context_); + addOutput("CIFContext", "CIF handling context derived from parsing of CIF file", context_); // Options addOption("RemoveAtomics", "Whether to remove free atomic moieties in clean-up", removeAtomics_); diff --git a/src/nodes/cifRemoveAtomic.h b/src/nodes/cif/cifRemoveAtomic.h similarity index 87% rename from src/nodes/cifRemoveAtomic.h rename to src/nodes/cif/cifRemoveAtomic.h index 4ac1c1c93a..d81d5ef38b 100644 --- a/src/nodes/cifRemoveAtomic.h +++ b/src/nodes/cif/cifRemoveAtomic.h @@ -3,10 +3,10 @@ #pragma once -#include "nodes/cifLoader.h" +#include "nodes/cif/io/cifContext.h" #include "nodes/node.h" -// CIFLoader Node +// ImportCIFStructure Node class CIFRemoveAtomicNode : public Node { public: @@ -22,7 +22,7 @@ class CIFRemoveAtomicNode : public Node */ private: // CIF handler context - CIFLoaderNode::CIFContext *context_{nullptr}; + CIFContext *context_{nullptr}; // Whether to remove free atomic moieties in clean-up bool removeAtomics_{false}; diff --git a/src/nodes/cifRemoveWater.cpp b/src/nodes/cif/cifRemoveWater.cpp similarity index 75% rename from src/nodes/cifRemoveWater.cpp rename to src/nodes/cif/cifRemoveWater.cpp index 9ac46cf7c5..d1b23f245f 100644 --- a/src/nodes/cifRemoveWater.cpp +++ b/src/nodes/cif/cifRemoveWater.cpp @@ -1,16 +1,16 @@ // SPDX-License-Identifier: GPL-3.0-or-later // Copyright (c) 2026 Team Dissolve and contributors -#include "nodes/cifRemoveWater.h" +#include "nodes/cif/cifRemoveWater.h" CIFRemoveWaterNode::CIFRemoveWaterNode(Graph *parentGraph) : Node(parentGraph) { // Inputs - addInput("CIFContext", "CIF handling context derived from parsing of CIF file", context_) + addInput("CIFContext", "CIF handling context derived from parsing of CIF file", context_) ->setFlags({ParameterBase::Required}); // Outputs - addOutput("CIFContext", "CIF handling context derived from parsing of CIF file", context_); + addOutput("CIFContext", "CIF handling context derived from parsing of CIF file", context_); // Options addOption("RemoveWaterAndCoordinatedOxygens", "Whether to remove water and coordinated oxygen atoms in clean-up", diff --git a/src/nodes/cifRemoveWater.h b/src/nodes/cif/cifRemoveWater.h similarity index 87% rename from src/nodes/cifRemoveWater.h rename to src/nodes/cif/cifRemoveWater.h index 73206b1fa2..4482a3a5d0 100644 --- a/src/nodes/cifRemoveWater.h +++ b/src/nodes/cif/cifRemoveWater.h @@ -3,10 +3,10 @@ #pragma once -#include "nodes/cifLoader.h" +#include "nodes/cif/io/cifContext.h" #include "nodes/node.h" -// CIFLoader Node +// ImportCIFStructure Node class CIFRemoveWaterNode : public Node { public: @@ -22,7 +22,7 @@ class CIFRemoveWaterNode : public Node */ private: // CIF handler context - CIFLoaderNode::CIFContext *context_{nullptr}; + CIFContext *context_{nullptr}; // Whether to remove water and coordinated oxygen atoms in clean-up bool removeWaterAndCoordinatedOxygens_{false}; diff --git a/src/nodes/cifStructureCleanup.cpp b/src/nodes/cif/cifStructureCleanup.cpp similarity index 79% rename from src/nodes/cifStructureCleanup.cpp rename to src/nodes/cif/cifStructureCleanup.cpp index ae2a5c0411..2c7562130d 100644 --- a/src/nodes/cifStructureCleanup.cpp +++ b/src/nodes/cif/cifStructureCleanup.cpp @@ -1,16 +1,16 @@ // SPDX-License-Identifier: GPL-3.0-or-later // Copyright (c) 2026 Team Dissolve and contributors -#include "nodes/cifStructureCleanup.h" +#include "nodes/cif/cifStructureCleanup.h" CIFStructureCleanupNode::CIFStructureCleanupNode(Graph *parentGraph) : Node(parentGraph) { // Inputs - addInput("CIFContext", "CIF handling context derived from parsing of CIF file", context_) + addInput("CIFContext", "CIF handling context derived from parsing of CIF file", context_) ->setFlags({ParameterBase::Required}); // Outputs - addOutput("CIFContext", "CIF handling context derived from parsing of CIF file", context_); + addOutput("CIFContext", "CIF handling context derived from parsing of CIF file", context_); // Options addOption("RemoveNETA", "Whether to remove by NETA definition in clean-up", removeNETA_); diff --git a/src/nodes/cifStructureCleanup.h b/src/nodes/cif/cifStructureCleanup.h similarity index 89% rename from src/nodes/cifStructureCleanup.h rename to src/nodes/cif/cifStructureCleanup.h index b0b75bd928..10ec0b8848 100644 --- a/src/nodes/cifStructureCleanup.h +++ b/src/nodes/cif/cifStructureCleanup.h @@ -3,10 +3,10 @@ #pragma once -#include "nodes/cifLoader.h" +#include "nodes/cif/io/cifContext.h" #include "nodes/node.h" -// CIFLoader Node +// ImportCIFStructure Node class CIFStructureCleanupNode : public Node { public: @@ -22,7 +22,7 @@ class CIFStructureCleanupNode : public Node */ private: // CIF handler context - CIFLoaderNode::CIFContext *context_{nullptr}; + CIFContext *context_{nullptr}; // Whether to remove by NETA definition in clean-up bool removeNETA_{false}; // Whether to expand NETA matches to fragments when removing in clean-up diff --git a/src/nodes/cifSuperMolecule.cpp b/src/nodes/cif/cifSuperMolecule.cpp similarity index 87% rename from src/nodes/cifSuperMolecule.cpp rename to src/nodes/cif/cifSuperMolecule.cpp index af8b6421c6..183079fb23 100644 --- a/src/nodes/cifSuperMolecule.cpp +++ b/src/nodes/cif/cifSuperMolecule.cpp @@ -1,12 +1,12 @@ // SPDX-License-Identifier: GPL-3.0-or-later // Copyright (c) 2026 Team Dissolve and contributors -#include "nodes/cifSuperMolecule.h" +#include "nodes/cif/cifSuperMolecule.h" CIFSuperMoleculeNode::CIFSuperMoleculeNode(Graph *parentGraph) : Node(parentGraph) { // Inputs - addInput("CIFContext", "CIF handling context derived from parsing of CIF file", context_) + addInput("CIFContext", "CIF handling context derived from parsing of CIF file", context_) ->setFlags({ParameterBase::Required}); // Outputs diff --git a/src/nodes/cifSuperMolecule.h b/src/nodes/cif/cifSuperMolecule.h similarity index 88% rename from src/nodes/cifSuperMolecule.h rename to src/nodes/cif/cifSuperMolecule.h index f96e0261b4..0af23edb60 100644 --- a/src/nodes/cifSuperMolecule.h +++ b/src/nodes/cif/cifSuperMolecule.h @@ -3,10 +3,10 @@ #pragma once -#include "nodes/cifLoader.h" +#include "nodes/cif/io/cifContext.h" #include "nodes/node.h" -// CIFLoader Node +// ImportCIFStructure Node class CIFSuperMoleculeNode : public Node { public: @@ -22,7 +22,7 @@ class CIFSuperMoleculeNode : public Node */ private: // CIF handler context - CIFLoaderNode::CIFContext *context_{nullptr}; + CIFContext *context_{nullptr}; // Non-periodic species const Species *nonPeriodicSpecies_{nullptr}; // Supercell species diff --git a/src/nodes/cifLoader.cpp b/src/nodes/cif/importCIFStructure.cpp similarity index 60% rename from src/nodes/cifLoader.cpp rename to src/nodes/cif/importCIFStructure.cpp index 94e692a6d9..790b113010 100644 --- a/src/nodes/cifLoader.cpp +++ b/src/nodes/cif/importCIFStructure.cpp @@ -1,34 +1,41 @@ // SPDX-License-Identifier: GPL-3.0-or-later // Copyright (c) 2026 Team Dissolve and contributors -#include "nodes/cifLoader.h" +#include "nodes/cif/importCIFStructure.h" #include #include #include -CIFLoaderNode::CIFLoaderNode(Graph *parentGraph) : Node(parentGraph) +ImportCIFStructureNode::ImportCIFStructureNode(Graph *parentGraph) : Node(parentGraph) { // Outputs addPointerOutput("CIFContext", "CIF handling context derived from parsing of CIF file", context_) ->setFlags({ParameterBase::Required}); + addOutput("Structure", "Structure containing atoms and connectivity", structure_); // Option addOption("FilePath", "File path", filePath_); addOption("SpaceGroupID", "Set space group from index", spaceGroup_); } -std::string_view CIFLoaderNode::type() const { return "CIFLoader"; } +std::string_view ImportCIFStructureNode::type() const { return "ImportCIFStructure"; } -std::string_view CIFLoaderNode::summary() const { return "Load and parse a Crystallographic Information File (CIF)"; } +std::string_view ImportCIFStructureNode::summary() const +{ + return "Load and parse a Crystallographic Information File (CIF) to a structure"; +} // Run main processing -NodeConstants::ProcessResult CIFLoaderNode::process() +NodeConstants::ProcessResult ImportCIFStructureNode::process() { // Read contents of CIF file if (context_.read(filePath_)) { if (spaceGroup_ != SpaceGroups::NoSpaceGroup) context_.setSpaceGroup(spaceGroup_); + + structure_ = context_.structure(); + return NodeConstants::ProcessResult::Success; } diff --git a/src/nodes/cifLoader.h b/src/nodes/cif/importCIFStructure.h similarity index 69% rename from src/nodes/cifLoader.h rename to src/nodes/cif/importCIFStructure.h index f1aec6766b..87ecca0e49 100644 --- a/src/nodes/cifLoader.h +++ b/src/nodes/cif/importCIFStructure.h @@ -3,18 +3,16 @@ #pragma once -#include "io/import/cif.h" +#include "classes/structure.h" +#include "nodes/cif/io/cifContext.h" #include "nodes/node.h" -// CIFLoader Node -class CIFLoaderNode : public Node +// ImportCIFStructure Node +class ImportCIFStructureNode : public Node { public: - using CIFContext = CIFHandler; - - public: - CIFLoaderNode(Graph *parentGraph); - ~CIFLoaderNode() override = default; + ImportCIFStructureNode(Graph *parentGraph); + ~ImportCIFStructureNode() override = default; public: std::string_view type() const override; @@ -26,6 +24,8 @@ class CIFLoaderNode : public Node private: // CIF handler context CIFContext context_; + // CIF strucutre + Structure *structure_; // Space group ID SpaceGroups::SpaceGroupId spaceGroup_{SpaceGroups::SpaceGroupId::NoSpaceGroup}; // CIF filepath diff --git a/src/nodes/cif/io/CIFImportErrorListeners.cpp b/src/nodes/cif/io/CIFImportErrorListeners.cpp new file mode 100644 index 0000000000..960fe3570d --- /dev/null +++ b/src/nodes/cif/io/CIFImportErrorListeners.cpp @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: GPL-3.0-or-later +// Copyright (c) 2026 Team Dissolve and contributors + +#include "nodes/cif/io/CIFImportErrorListeners.h" +#include "base/messenger.h" +#include "base/sysFunc.h" + +/* + * CIFImport Lexer Error Listener + */ + +CIFImportLexerErrorListener::CIFImportLexerErrorListener() {} + +void CIFImportLexerErrorListener::syntaxError(antlr4::Recognizer *recognizer, antlr4::Token *token, size_t line, + size_t charPositionInLine, const std::string &message, std::exception_ptr ep) +{ + Messenger::print("\nError in CIFImport definition on line {}, column {}.\n", line, charPositionInLine); + + // The actual error message can contain braces, so escape those to avoid breaking fmt + auto escaped = DissolveSys::replace(DissolveSys::replace(message, "{", "{{"), "}", "}}"); + + throw CIFImportExceptions::CIFImportSyntaxException(std::format("Syntax Error: {}", escaped)); +} + +/* + * CIFImport Parser Error Listener + */ + +CIFImportParserErrorListener::CIFImportParserErrorListener() {} + +void CIFImportParserErrorListener::syntaxError(antlr4::Recognizer *recognizer, antlr4::Token *token, size_t line, + size_t charPositionInLine, const std::string &message, std::exception_ptr ep) +{ + Messenger::print("\nError in CIFImport definition on line {}, column {}.\n", line, charPositionInLine); + + // The actual error message can contain braces, so escape those to avoid breaking fmt + auto escaped = DissolveSys::replace(DissolveSys::replace(message, "{", "{{"), "}", "}}"); + + throw CIFImportExceptions::CIFImportSyntaxException(std::format("Syntax Error: {}", escaped)); +} diff --git a/src/nodes/cif/io/CIFImportErrorListeners.h b/src/nodes/cif/io/CIFImportErrorListeners.h new file mode 100644 index 0000000000..ed27e07296 --- /dev/null +++ b/src/nodes/cif/io/CIFImportErrorListeners.h @@ -0,0 +1,52 @@ +// SPDX-License-Identifier: GPL-3.0-or-later +// Copyright (c) 2026 Team Dissolve and contributors + +#pragma once + +#include +#include + +namespace CIFImportExceptions +{ +// CIFImport Syntax Exception +class CIFImportSyntaxException : public std::exception +{ + public: + CIFImportSyntaxException(std::string_view message = "Undefined CIFImport Syntax Exception") : message_{message} {} + + private: + // Error message + std::string message_; + + public: + virtual const char *what() const throw() { return message_.c_str(); } +}; +} // namespace CIFImportExceptions + +// CIFImport Lexer Error Listener +class CIFImportLexerErrorListener : public antlr4::BaseErrorListener +{ + public: + CIFImportLexerErrorListener(); + + /* + * BaseErrorListener Overrides + */ + public: + void syntaxError(antlr4::Recognizer *recognizer, antlr4::Token *, size_t line, size_t charPositionInLine, + const std::string &, std::exception_ptr ep); +}; + +// CIFImport Parser Error Listener +class CIFImportParserErrorListener : public antlr4::BaseErrorListener +{ + public: + CIFImportParserErrorListener(); + + /* + * BaseErrorListener Overrides + */ + public: + void syntaxError(antlr4::Recognizer *recognizer, antlr4::Token *, size_t line, size_t charPositionInLine, + const std::string &, std::exception_ptr ep); +}; diff --git a/src/nodes/cif/io/CIFImportLexer.g4 b/src/nodes/cif/io/CIFImportLexer.g4 new file mode 100644 index 0000000000..84f510b7fa --- /dev/null +++ b/src/nodes/cif/io/CIFImportLexer.g4 @@ -0,0 +1,76 @@ +// SPDX-License-Identifier: GPL-3.0-or-later +// Copyright (c) 2026 Team Dissolve and contributors + +lexer grammar CIFImportLexer; + +// Lexer file header +@lexer::header {/* CIFImport ANTLR Lexer */} + +// Add custom includes after standard ANTLR includes in both *.h and *.cpp files +@lexer::postinclude { + #include "base/sysFunc.h" + #include "nodes/cif/io/CIFImportVisitor.h" +} + +// Directly precedes the lexer class declaration in the h file (e.g. for additional types etc.). +@lexer::context {/* lexer context section */} + +// Appears in the private part of the lexer in the h file. +@lexer::declarations { } + +// Appears in line with the other class member definitions in the cpp file. +@lexer::definitions {/* lexer definitions section */} + +/* + * Lexer Rules + * + * Rules defined here are based on those specified for CIF 1.1 (https://www.iucr.org/resources/cif/spec/version1.1/cifsyntax#bnf). + * They are reproduced as faithfully as possible, with small modifications made along the way (e.g. EOL tokens are skipped rather than matched as part of tokens for the most part. + * Quotes and bracketed errors are stripped from the generated tokens. + */ + +// Fragments +fragment OrdinaryChar: '!' | '%' | '&' | '(' | ')' | '*' | '+' | ',' | '-' | '.' | '/' | '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' | ':' | '<' | '=' | '>' | '?' | '@' | [A-Z] | '\\' | '^' | '`' | [a-z] | '{' | '|' | '}' | '~'; +fragment NonBlankChar: OrdinaryChar | '"' | '#' | '$' | '\'' | '_' | ';' | '[' | ']'; +fragment TextLeadChar: OrdinaryChar | '"' | '#' | '$' | '\'' | '_' | ' ' | '\t' | '[' | ']'; +fragment AnyPrintChar: OrdinaryChar | '"' | '#' | '$' | '\'' | '_' | ' ' | '\t' | ';' | '[' | ']'; +fragment EOL: [\r\n]; +fragment DIGIT: [0-9]; + +// Reserved Words +DATA_Name: ('D' | 'd') ('A' | 'a') ('T' | 't') ('A' | 'a') '_' NonBlankChar+; +LOOP_: ('L' | 'l') ('O' | 'o') ('O' | 'o') ('P' | 'p') '_'; +GLOBAL_: ('G' | 'g') ('L' | 'l') ('O' | 'o') ('B' | 'b') ('A' | 'a') ('L' | 'l') '_'; +SAVE_: ('S' | 's') ('A' | 'a') ('V' | 'v') ('E' | 'e') '_'; +SAVE_Name: ('S' | 's') ('A' | 'a') ('V' | 'v') ('E' | 'e') '_' NonBlankChar+; +STOP_: ('S' | 's') ('T' | 't') ('O' | 'o') ('P' | 'p') '_'; + +// Whitespace & Comments +WhiteSpace: [ \t\r\n]+ -> skip; +Comment: '#' AnyPrintChar*; + +// Tags & Values +Tag: '_' NonBlankChar+; +Value: '.' + | '?' + | Numeric { setText(std::string(DissolveSys::beforeChar(getText(), '('))); } + | UnquotedString + | SingleQuotedString { setText(getText().substr(1, getText().length()-2)); } + | DoubleQuotedString { setText(getText().substr(1, getText().length()-2)); } + | SemiColonTextField { setText(getText().find_first_not_of(";\r\n") != std::string::npos ? getText().substr(getText().find_first_not_of(";\r\n "), getText().find_last_not_of(";\r\n ") - getText().find_first_not_of(";\r\n ") + 1) : ""); } + ; + +// Numerics +Numeric: Number ( '(' UnsignedInteger ')' ); +Number: Integer | Float; +Integer: ('+' | '-')? UnsignedInteger; +Float: Integer Exponent +| ((DIGIT* '.' UnsignedInteger) | (DIGIT+ '.')) Exponent?; +Exponent: ('e'|'E') ('+'|'-')? UnsignedInteger+; +UnsignedInteger: DIGIT+; + +// Character Strings +UnquotedString: OrdinaryChar NonBlankChar*; +SingleQuotedString: '\'' AnyPrintChar*? '\''; +DoubleQuotedString: '"' AnyPrintChar*? '"'; +SemiColonTextField: ';' EOL* (TextLeadChar AnyPrintChar* EOL*?)* ';'; diff --git a/src/nodes/cif/io/CIFImportParser.g4 b/src/nodes/cif/io/CIFImportParser.g4 new file mode 100644 index 0000000000..d07e42512c --- /dev/null +++ b/src/nodes/cif/io/CIFImportParser.g4 @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: GPL-3.0-or-later +// Copyright (c) 2026 Team Dissolve and contributors + +parser grammar CIFImportParser; + +options { + tokenVocab = CIFImportLexer; +} + +// Parser file header +@parser::header {/* CIFImport ANTLR Parser */} + +// Add custom includes after standard ANTLR includes in both *.h and *.cpp files +@parser::postinclude { } + +// Appears in the private part of the parser in the h file. +@parser::members { /* public parser declarations/members section */ } + +// Appears in the public part of the parser in the h file. +@parser::declarations {/* private parser declarations section */} + +// Appears in line with the other class member definitions in the cpp file. +@parser::definitions {/* parser definitions section */} + +/* + * CIFImport Grammar + * + * Rules defined here are based on those specified for CIF 1.1 (https://www.iucr.org/resources/cif/spec/version1.1/cifsyntax#bnf). + * Some additional flexibility has been added, principally removing the enforced ordering of items to permit "looser" CIF files + * (of which there are many) to be read. + */ + +cif: (Comment | dataBlockHeading | dataBlock)+?; + +dataBlock: dataItems | saveFrame; + +dataBlockHeading: DATA_Name; + +saveFrame: saveFrameHeading dataItems+ SAVE_; + +saveFrameHeading: SAVE_Name; + +dataItems: name=Tag value=Value #taggedData +| loop #loopedData +; +loop: LOOP_ columns+=Tag+ values+=Value+; diff --git a/src/nodes/cif/io/CIFImportVisitor.cpp b/src/nodes/cif/io/CIFImportVisitor.cpp new file mode 100644 index 0000000000..8601d98135 --- /dev/null +++ b/src/nodes/cif/io/CIFImportVisitor.cpp @@ -0,0 +1,65 @@ +// SPDX-License-Identifier: GPL-3.0-or-later +// Copyright (c) 2026 Team Dissolve and contributors + +#include "nodes/cif/io/CIFImportVisitor.h" +#include "base/messenger.h" +#include "base/sysFunc.h" +#include "nodes/cif/io/CIFImportErrorListeners.h" + +CIFImportVisitor::CIFImportVisitor(CIFContext::CIFTags &tags) : tags_(tags) {} + +/* + * Data + */ + +// Extract information from tree into supplied objects +void CIFImportVisitor::extract(CIFImportParser::CifContext *tree) +{ + // Traverse the AST + visitChildren(tree); + + // Summarise + Messenger::print("Read in {} unique CIF data tags.\n", tags_.size()); +} + +/* + * Visitor Overrides + */ + +antlrcpp::Any CIFImportVisitor::visitDataBlockHeading(CIFImportParser::DataBlockHeadingContext *ctx) +{ + tags_["DATA_"].push_back(std::string(DissolveSys::afterChar(ctx->getText(), "DATA_"))); + + return visitChildren(ctx); +} + +antlrcpp::Any CIFImportVisitor::visitTaggedData(CIFImportParser::TaggedDataContext *ctx) +{ + tags_[ctx->name->getText()].push_back(ctx->value->getText()); + + return visitChildren(ctx); +} + +antlrcpp::Any CIFImportVisitor::visitLoopedData(CIFImportParser::LoopedDataContext *ctx) +{ + // Sanity check table data size + if (ctx->loop()->values.size() % ctx->loop()->columns.size()) + Messenger::exception("CIF table has {} columns (first is '{}') but {} values (and C%V != 0).\n", + ctx->loop()->columns.size(), ctx->loop()->columns.front()->getText(), ctx->loop()->values.size()); + + // Construct / retrieve dictionary elements for columns + std::vector>> columns; + for (auto *column : ctx->loop()->columns) + columns.emplace_back(tags_[column->getText()]); + + // Add values to columns - data will be in row-major order + auto colId = 0u; + for (auto *value : ctx->loop()->values) + { + columns[colId].get().emplace_back(value->getText()); + ++colId; + colId %= ctx->loop()->columns.size(); + } + + return visitChildren(ctx); +} diff --git a/src/nodes/cif/io/CIFImportVisitor.h b/src/nodes/cif/io/CIFImportVisitor.h new file mode 100644 index 0000000000..9b079f801f --- /dev/null +++ b/src/nodes/cif/io/CIFImportVisitor.h @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: GPL-3.0-or-later +// Copyright (c) 2026 Team Dissolve and contributors + +#pragma once + +#include "CIFImportParserBaseVisitor.h" +#include "nodes/cif/io/cifContext.h" +#include "templates/optionalRef.h" +#include + +// CIFImport Visitor for ANTLR +class CIFImportVisitor : CIFImportParserBaseVisitor +{ + public: + CIFImportVisitor(CIFContext::CIFTags &tags); + ~CIFImportVisitor() override = default; + + /* + * Data + */ + private: + // Dictionary data storage + CIFContext::CIFTags &tags_; + + public: + // Extract information from tree + void extract(CIFImportParser::CifContext *tree); + + /* + * Visitor Overrides + */ + private: + antlrcpp::Any visitDataBlockHeading(CIFImportParser::DataBlockHeadingContext *ctx) override; + antlrcpp::Any visitTaggedData(CIFImportParser::TaggedDataContext *ctx) override; + antlrcpp::Any visitLoopedData(CIFImportParser::LoopedDataContext *ctx) override; +}; diff --git a/src/nodes/cif/io/cifClasses.cpp b/src/nodes/cif/io/cifClasses.cpp new file mode 100644 index 0000000000..a2ba6fd5b0 --- /dev/null +++ b/src/nodes/cif/io/cifClasses.cpp @@ -0,0 +1,130 @@ +// SPDX-License-Identifier: GPL-3.0-or-later +// Copyright (c) 2026 Team Dissolve and contributors + +#include "nodes/cif/io/cifClasses.h" +#include "classes/empiricalFormula.h" +#include "classes/molecule.h" +#include "classes/species.h" + +/* + * CIF Symmetry-Unique Atom + */ + +CIFSymmetryAtom::CIFSymmetryAtom(std::string_view label, Elements::Element Z, Vector3 rFrac, double occ) + : label_{label}, Z_(Z), rFrac_(rFrac), occupancy_(occ) {}; + +// Return label (from _atom_site_label) +std::string_view CIFSymmetryAtom::label() const { return label_; } + +// Return element (from _atom_site_type_symbol) +Elements::Element CIFSymmetryAtom::Z() const { return Z_; } + +// Return fractional coordinate of atom (from _atom_site_fract_[xyz]) +Vector3 CIFSymmetryAtom::rFrac() const { return rFrac_; } + +// Return site occupancy (from _atom_site_occupancy) +double CIFSymmetryAtom::occupancy() const { return occupancy_; } + +/* + * CIF Bonding Pair + */ + +CIFBondingPair::CIFBondingPair(std::string_view labelI, std::string_view labelJ, double r) + : labelI_{labelI}, labelJ_{labelJ}, r_(r) {}; + +// Return labels of involved atom i (from _geom_bond_atom_site_label_1) +std::string_view CIFBondingPair::labelI() const { return labelI_; } + +// Return labels of involved atom i (from _geom_bond_atom_site_label_2) +std::string_view CIFBondingPair::labelJ() const { return labelJ_; } + +// Return distance between bonded pair (from _geom_bond_distance) +double CIFBondingPair::r() const { return r_; } + +/* + * CIF Atom Group + */ + +CIFAtomGroup::CIFAtomGroup(std::string_view name) : name_{name} {}; + +// Group name (from _atom_site_disorder_group) +std::string_view CIFAtomGroup::name() const { return name_; } + +// Add atom to the group +void CIFAtomGroup::addAtom(const CIFSymmetryAtom &i) { atoms_.emplace_back(i); } + +// Return atoms in the group +const std::vector &CIFAtomGroup::atoms() const { return atoms_; } + +// Set whether the group is active (included in unit cell generation) +void CIFAtomGroup::setActive(bool b) { active_ = b; } + +// Return whether the group is active (included in unit cell generation) +bool CIFAtomGroup::active() const { return active_; } + +/* + * CIF Assembly + */ + +CIFAssembly::CIFAssembly(std::string_view name) : name_{name} {} + +// Return name of the assembly (from _atom_site_disorder_assembly or 'Global') +std::string_view CIFAssembly::name() const { return name_; } + +// Return all groups +std::vector &CIFAssembly::groups() { return groups_; } +const std::vector &CIFAssembly::groups() const { return groups_; } + +// Get (add or retrieve) named group +CIFAtomGroup &CIFAssembly::getGroup(std::string_view groupName) +{ + auto it = + std::find_if(groups_.begin(), groups_.end(), [groupName](const auto &group) { return group.name() == groupName; }); + if (it != groups_.end()) + return *it; + + return groups_.emplace_back(groupName); +} + +// Return the number of defined groups +int CIFAssembly::nGroups() const { return groups_.size(); } + +/* + * CIF Molecular Species + */ + +CIFMolecularSpecies::CIFMolecularSpecies() : species_(std::make_shared()) {} + +// Return species parent for molecule instances +std::shared_ptr &CIFMolecularSpecies::species() { return species_; }; +const std::shared_ptr &CIFMolecularSpecies::species() const { return species_; }; + +// Return molecule instances +const std::vector &CIFMolecularSpecies::instances() const { return instances_; } +std::vector &CIFMolecularSpecies::instances() { return instances_; } + +// Append supplied instances to our vector +void CIFMolecularSpecies::appendInstances(const std::vector &newInstances) +{ + // Increase our reservation + instances_.reserve(instances_.size() + newInstances.size()); + instances_.insert(instances_.end(), newInstances.begin(), newInstances.end()); +} + +// Return coordinates for all instances as a vector of vectors +std::vector> CIFMolecularSpecies::allInstanceCoordinates() const +{ + std::vector> coordinates; + + for (auto &instance : instances_) + { + std::vector instanceCoords; + instanceCoords.reserve(species_->nAtoms()); + for (auto &atom : instance.localAtoms()) + instanceCoords.emplace_back(atom.r()); + + coordinates.emplace_back(instanceCoords); + } + + return coordinates; +} diff --git a/src/nodes/cif/io/cifClasses.h b/src/nodes/cif/io/cifClasses.h new file mode 100644 index 0000000000..dd9dcf613d --- /dev/null +++ b/src/nodes/cif/io/cifClasses.h @@ -0,0 +1,142 @@ +// SPDX-License-Identifier: GPL-3.0-or-later +// Copyright (c) 2026 Team Dissolve and contributors + +#pragma once + +#include "classes/localMolecule.h" +#include "classes/species.h" +#include "data/elements.h" +#include "math/vector3.h" +#include +#include + +// Forward Declarations +class Species; + +// CIF Symmetry-Unique Atom +class CIFSymmetryAtom +{ + public: + CIFSymmetryAtom(std::string_view label, Elements::Element Z, Vector3 rFrac, double occ); + ~CIFSymmetryAtom() = default; + + private: + // Label (from _atom_site_label) + std::string label_; + // Element (from _atom_site_type_symbol) + Elements::Element Z_; + // Fractional coordinate of atom (from _atom_site_fract_[xyz]) + Vector3 rFrac_; + // Site occupancy (from _atom_site_occupancy) + double occupancy_; + + public: + // Return label (from _atom_site_label) + std::string_view label() const; + // Return element (from _atom_site_type_symbol) + Elements::Element Z() const; + // Return fractional coordinate of atom (from _atom_site_fract_[xyz]) + Vector3 rFrac() const; + // Return site occupancy (from _atom_site_occupancy) + double occupancy() const; +}; + +// CIF Bonding Pair +class CIFBondingPair +{ + public: + CIFBondingPair(std::string_view labelI, std::string_view labelJ, double r); + ~CIFBondingPair() = default; + + private: + // Labels of involved atoms (from _geom_bond_atom_site_label_[12]) + std::string labelI_, labelJ_; + // Distance between bonded pair (from _geom_bond_distance) + double r_; + + public: + // Return labels of involved atom i (from _geom_bond_atom_site_label_1) + std::string_view labelI() const; + // Return labels of involved atom i (from _geom_bond_atom_site_label_2) + std::string_view labelJ() const; + // Return distance between bonded pair (from _geom_bond_distance) + double r() const; +}; + +// CIF Atom Group +class CIFAtomGroup +{ + public: + CIFAtomGroup(std::string_view name); + ~CIFAtomGroup() = default; + + private: + // Group name (from _atom_site_disorder_group) + std::string name_; + // Atoms in the group + std::vector atoms_; + // Whether the group is active (included in unit cell generation) + bool active_{false}; + + public: + // Group name (from _atom_site_disorder_group) + std::string_view name() const; + // Add atom to the group + void addAtom(const CIFSymmetryAtom &i); + // Return atoms in the group + const std::vector &atoms() const; + // Set whether the group is active (included in unit cell generation) + void setActive(bool b); + // Return whether the group is active (included in unit cell generation) + bool active() const; +}; + +// CIF Assembly +class CIFAssembly +{ + public: + CIFAssembly(std::string_view name); + ~CIFAssembly() = default; + + private: + // Name of the assembly (from _atom_site_disorder_assembly or 'Global') + std::string name_; + // Available atom groups + std::vector groups_; + + public: + // Return name of the assembly (from _atom_site_disorder_assembly or 'Global') + std::string_view name() const; + // Return all groups + std::vector &groups(); + const std::vector &groups() const; + // Get (add or retrieve) named group + CIFAtomGroup &getGroup(std::string_view groupName); + // Return the number of defined groups + int nGroups() const; +}; + +// CIF Repeated Molecular Species +class CIFMolecularSpecies +{ + public: + CIFMolecularSpecies(); + + private: + // Species parent for molecule instances + std::shared_ptr species_; + // Molecule instances + std::vector instances_; + + public: + // Return species parent for molecule instances + std::shared_ptr &species(); + const std::shared_ptr &species() const; + // Return molecule instances + const std::vector &instances() const; + std::vector &instances(); + // Append supplied instances to our vector + void appendInstances(const std::vector &newInstances); + // Return coordinates for all instances as a vector of vectors + std::vector> allInstanceCoordinates() const; +}; diff --git a/src/nodes/cif/io/cifContext.cpp b/src/nodes/cif/io/cifContext.cpp new file mode 100644 index 0000000000..3ac123b3d4 --- /dev/null +++ b/src/nodes/cif/io/cifContext.cpp @@ -0,0 +1,1361 @@ +// SPDX-License-Identifier: GPL-3.0-or-later +// Copyright (c) 2026 Team Dissolve and contributors + +#include "nodes/cif/io/cifContext.h" +#include "CIFImportLexer.h" +#include "CIFImportParser.h" +#include "base/messenger.h" +#include "base/sysFunc.h" +#include "classes/coreData.h" +#include "classes/empiricalFormula.h" +#include "classes/species.h" +#include "generator/add.h" +#include "generator/box.h" +#include "generator/coordinateSets.h" +#include "neta/neta.h" +#include "nodes/cif/io/CIFImportErrorListeners.h" +#include "nodes/cif/io/CIFImportVisitor.h" +#include "templates/algorithms.h" + +CIFContext::CIFContext() +{ + unitCellConfiguration_.setName("Structural"); + unitCellSpecies_.setName("Crystal"); + cleanedUnitCellConfiguration_.setName("Cleaned"); + cleanedUnitCellSpecies_.setName("Crystal (Cleaned)"); + supercellConfiguration_.setName("Supercell"); + supercellSpecies_.setName("Crystal (Supercell)"); +} + +/* + * Raw Data + */ + +// Parse supplied file into the destination objects +bool CIFContext::parse(std::string_view filename, CIFContext::CIFTags &tags) const +{ + // Set up ANTLR input stream + std::ifstream cifFile(std::string(filename), std::ios::in | std::ios::binary); + if (!cifFile.is_open()) + return Messenger::error("Failed to open CIF file '{}.\n", filename); + + antlr4::ANTLRInputStream input(cifFile); + + // Create ANTLR lexer and set-up error listener + CIFImportLexer lexer(&input); + CIFImportLexerErrorListener lexerErrorListener; + lexer.removeErrorListeners(); + lexer.addErrorListener(&lexerErrorListener); + + // Generate tokens from input stream + antlr4::CommonTokenStream tokens(&lexer); + + // Create ANTLR parser and set-up error listeners + CIFImportParser parser(&tokens); + CIFImportParserErrorListener parserErrorListener; + parser.removeErrorListeners(); + parser.removeParseListeners(); + parser.addErrorListener(&lexerErrorListener); + parser.addErrorListener(&parserErrorListener); + + // Generate the AST + CIFImportParser::CifContext *tree = nullptr; + try + { + tree = parser.cif(); + } + catch (CIFImportExceptions::CIFImportSyntaxException &ex) + { + Messenger::error("{}", ex.what()); + return false; + } + + // Visit the nodes in the AST + CIFImportVisitor visitor(tags); + try + { + visitor.extract(tree); + } + catch (CIFImportExceptions::CIFImportSyntaxException &ex) + { + return Messenger::error("{}", ex.what()); + } + + return true; +} + +// Return whether the specified file parses correctly +bool CIFContext::validFile(std::string_view filename) const +{ + CIFTags tags; + return parse(filename, tags); +} + +// Read CIF data from specified file +bool CIFContext::read(std::string_view filename) +{ + assemblies_.clear(); + bondingPairs_.clear(); + tags_.clear(); + + if (!parse(filename, tags_)) + return Messenger::error("Failed to parse CIF file '{}'.\n", filename); + + /* + * Determine space group - the search order for tags is: + * + * 1. Hall symbol + * 2. Hermann-Mauginn name + * 3. Space group index + * + * In the case of 2 or 3 we also try to search for the origin choice. + * + * If a space group has already been set, don't try to overwrite it (it was probably forcibly set because the detection + * below fails). + */ + + // Check for Hall symbol + if (spaceGroup_ == SpaceGroups::NoSpaceGroup && hasTag("_space_group_name_Hall")) + spaceGroup_ = SpaceGroups::findByHallSymbol(*getTagString("_space_group_name_Hall")); + if (spaceGroup_ == SpaceGroups::NoSpaceGroup && hasTag("_symmetry_space_group_name_Hall")) + spaceGroup_ = SpaceGroups::findByHallSymbol(*getTagString("_symmetry_space_group_name_Hall")); + + if (spaceGroup_ == SpaceGroups::NoSpaceGroup) + { + // Might need the coordinate system code... + auto sgCode = getTagString("_space_group.IT_coordinate_system_code"); + + // Find a HM name + if (hasTag("_space_group_name_H-M_alt")) + spaceGroup_ = + SpaceGroups::findByHermannMauginnSymbol(*getTagString("_space_group_name_H-M_alt"), sgCode.value_or("")); + if (spaceGroup_ == SpaceGroups::NoSpaceGroup && hasTag("_symmetry_space_group_name_H-M")) + spaceGroup_ = + SpaceGroups::findByHermannMauginnSymbol(*getTagString("_symmetry_space_group_name_H-M"), sgCode.value_or("")); + + // Find a space group index? + if (spaceGroup_ == SpaceGroups::NoSpaceGroup && hasTag("_space_group_IT_number")) + spaceGroup_ = + SpaceGroups::findByInternationalTablesIndex(*getTagInt("_space_group_IT_number"), sgCode.value_or("")); + if (spaceGroup_ == SpaceGroups::NoSpaceGroup && hasTag("_space_group.IT_number")) + spaceGroup_ = + SpaceGroups::findByInternationalTablesIndex(*getTagInt("_space_group.IT_number"), sgCode.value_or("")); + if (spaceGroup_ == SpaceGroups::NoSpaceGroup && hasTag("_symmetry_Int_Tables_number")) + spaceGroup_ = + SpaceGroups::findByInternationalTablesIndex(*getTagInt("_symmetry_Int_Tables_number"), sgCode.value_or("")); + } + + // Create symmetry-unique atoms list + auto atomSiteLabel = getTagStrings("_atom_site_label"); + auto atomSiteTypeSymbol = getTagStrings("_atom_site_type_symbol"); + auto atomSiteFractX = getTagDoubles("_atom_site_fract_x"); + auto atomSiteFractY = getTagDoubles("_atom_site_fract_y"); + auto atomSiteFractZ = getTagDoubles("_atom_site_fract_z"); + auto atomSiteOccupancy = getTagDoubles("_atom_site_occupancy"); + auto atomDisorderAssembly = getTagStrings("_atom_site_disorder_assembly"); + auto atomDisorderGroup = getTagStrings("_atom_site_disorder_group"); + if (atomSiteLabel.empty() && atomSiteTypeSymbol.empty()) + return Messenger::error( + "No suitable atom site names found (no '_atom_site_label' or '_atom_site_type_symbol' tags present in CIF).\n"); + if (atomSiteFractX.empty() || atomSiteFractY.empty() || atomSiteFractZ.empty()) + return Messenger::error("Atom site fractional positions are incomplete (vector sizes are {}, {}, and {}).\n", + atomSiteFractX.size(), atomSiteFractY.size(), atomSiteFractZ.size()); + if (!((atomSiteFractX.size() == atomSiteFractY.size()) && (atomSiteFractX.size() == atomSiteFractZ.size()))) + return Messenger::error("Atom site fractional positions have mismatched sizes (vector sizes are {}, {}, and {}).\n", + atomSiteFractX.size(), atomSiteFractY.size(), atomSiteFractZ.size()); + for (auto n = 0; n < atomSiteFractX.size(); ++n) + { + // Get standard information + auto label = n < atomSiteLabel.size() ? atomSiteLabel[n] : std::format("{}{}", atomSiteTypeSymbol[n], n); + auto Z = n < atomSiteTypeSymbol.size() + ? Elements::element(atomSiteTypeSymbol[n]) + : (n < atomSiteLabel.size() ? Elements::element(atomSiteLabel[n]) : Elements::Unknown); + auto occ = n < atomSiteOccupancy.size() ? atomSiteOccupancy[n] : 1.0; + Vector3 rFrac(atomSiteFractX[n], atomSiteFractY[n], atomSiteFractZ[n]); + + // Add the atom to an assembly - there are three possibilities regarding (disorder) grouping: + // 1) A group is defined, but no assembly - add the atom to the 'Disorder' assembly + // 2) An assembly and a group are defined - add it to that + // 3) No group or assembly are defined - add the atom to the 'Global' assembly under a 'Default' group + auto assemblyName = atomDisorderAssembly.empty() ? "." : atomDisorderAssembly[n]; + auto groupName = atomDisorderGroup.empty() ? "." : atomDisorderGroup[n]; + if (assemblyName == "." && groupName != ".") + assemblyName = "Disorder"; + else if (assemblyName == "." && groupName == ".") + assemblyName = "Global"; + + if (groupName == ".") + groupName = "Default"; + + // Get the assembly and group that we're adding the atom to + auto &assembly = getAssembly(assemblyName); + auto &group = assembly.getGroup(groupName); + group.setActive(groupName == "Default" || groupName == "1"); + group.addAtom({label, Z, rFrac, occ}); + } + + // Construct bonding pairs list + auto bondLabelsI = getTagStrings("_geom_bond_atom_site_label_1"); + auto bondLabelsJ = getTagStrings("_geom_bond_atom_site_label_2"); + auto bondDistances = getTagDoubles("_geom_bond_distance"); + if (bondLabelsI.size() == bondLabelsJ.size() && (bondLabelsI.size() == bondDistances.size())) + { + for (auto &&[i, j, r] : zip(bondLabelsI, bondLabelsJ, bondDistances)) + bondingPairs_.emplace_back(i, j, r); + } + else + Messenger::warn("Bonding pairs array sizes are mismatched, so no bonding information will be available."); + + return true; +} + +// Return if the specified tag exists +bool CIFContext::hasTag(std::string tag) const { return tags_.find(tag) != tags_.end(); } + +// Return tag data string (if it exists) assuming a single datum (first in the vector) +std::optional CIFContext::getTagString(std::string tag) const +{ + auto it = tags_.find(tag); + if (it == tags_.end()) + return std::nullopt; + + // Check data vector size + if (it->second.size() != 1) + Messenger::warn("Returning first datum for tag '{}', but {} are available.\n", tag, it->second.size()); + + return it->second.front(); +} + +// Return tag data strings (if it exists) +std::vector CIFContext::getTagStrings(std::string tag) const +{ + auto it = tags_.find(tag); + if (it == tags_.end()) + return {}; + + return it->second; +} + +// Return tag data as double (if it exists) assuming a single datum (first in the vector) +std::optional CIFContext::getTagDouble(std::string tag) const +{ + auto it = tags_.find(tag); + if (it == tags_.end()) + return std::nullopt; + + // Check data vector size + if (it->second.size() != 1) + Messenger::warn("Returning first datum for tag '{}', but {} are available.\n", tag, it->second.size()); + + double result; + try + { + result = std::stod(it->second.front()); + } + catch (...) + { + Messenger::error("Data tag '{}' contains a value that can't be converted to a double ('{}').\n", tag, + it->second.front()); + return std::nullopt; + } + + return result; +} + +// Return tag data doubles (if it exists) +std::vector CIFContext::getTagDoubles(std::string tag) const +{ + auto it = tags_.find(tag); + if (it == tags_.end()) + return {}; + + std::vector v; + for (const auto &s : it->second) + { + auto d = 0.0; + try + { + d = std::stod(s); + } + catch (...) + { + Messenger::warn("Data tag '{}' contains a value that can't be converted to a double ('{}').\n", tag, s); + } + v.push_back(d); + } + + return v; +} + +// Return tag data as integer (if it exists) assuming a single datum (first in the vector) +std::optional CIFContext::getTagInt(std::string tag) const +{ + auto it = tags_.find(tag); + if (it == tags_.end()) + return std::nullopt; + + // Check data vector size + if (it->second.size() != 1) + Messenger::warn("Returning first datum for tag '{}', but {} are available.\n", tag, it->second.size()); + + int result; + try + { + result = std::stoi(it->second.front()); + } + catch (...) + { + Messenger::error("Data tag '{}' contains a value that can't be converted to an integer ('{}').\n", tag, + it->second.front()); + return std::nullopt; + } + + return result; +} + +/* + * Processed Data + */ + +// Set space group from index +void CIFContext::setSpaceGroup(SpaceGroups::SpaceGroupId sgid) +{ + if (spaceGroup_ == sgid) + return; + + spaceGroup_ = sgid; + generate(); +} + +// Return space group information +SpaceGroups::SpaceGroupId CIFContext::spaceGroup() const { return spaceGroup_; } + +// Return cell lengths +std::optional CIFContext::getCellLengths() const +{ + auto a = getTagDouble("_cell_length_a"); + if (!a) + Messenger::error("Cell length A not defined in CIF.\n"); + auto b = getTagDouble("_cell_length_b"); + if (!b) + Messenger::error("Cell length B not defined in CIF.\n"); + auto c = getTagDouble("_cell_length_c"); + if (!c) + Messenger::error("Cell length C not defined in CIF.\n"); + + if (a && b && c) + return Vector3(a.value(), b.value(), c.value()); + else + return std::nullopt; +} + +// Return cell angles +std::optional CIFContext::getCellAngles() const +{ + auto alpha = getTagDouble("_cell_angle_alpha"); + if (!alpha) + Messenger::error("Cell angle alpha not defined in CIF.\n"); + auto beta = getTagDouble("_cell_angle_beta"); + if (!beta) + Messenger::error("Cell angle beta not defined in CIF.\n"); + auto gamma = getTagDouble("_cell_angle_gamma"); + if (!gamma) + Messenger::error("Cell angle gamma not defined in CIF.\n"); + + if (alpha && beta && gamma) + return Vector3(alpha.value(), beta.value(), gamma.value()); + else + return std::nullopt; +} + +// Return chemical formula +std::string CIFContext::chemicalFormula() const +{ + auto it = tags_.find("_chemical_formula_sum"); + return (it != tags_.end() ? it->second.front() : "Unknown"); +} + +// Get (add or retrieve) named assembly +CIFAssembly &CIFContext::getAssembly(std::string_view name) +{ + auto it = std::find_if(assemblies_.begin(), assemblies_.end(), [name](const auto &a) { return a.name() == name; }); + if (it != assemblies_.end()) + return *it; + + return assemblies_.emplace_back(name); +} + +// Return atom assemblies +std::vector &CIFContext::assemblies() { return assemblies_; } + +const std::vector &CIFContext::assemblies() const { return assemblies_; } + +// Return whether any bond distances are defined +bool CIFContext::hasBondDistances() const { return !bondingPairs_.empty(); } + +// Return whether a bond distance is defined for the specified label pair +std::optional CIFContext::bondDistance(std::string_view labelI, std::string_view labelJ) const +{ + auto it = std::find_if( + bondingPairs_.begin(), bondingPairs_.end(), [labelI, labelJ](const auto &bp) + { return (bp.labelI() == labelI && bp.labelJ() == labelJ) || (bp.labelI() == labelJ && bp.labelJ() == labelI); }); + if (it != bondingPairs_.end()) + return it->r(); + return std::nullopt; +} + +/* + * Creation + */ + +// Create basic unit cell +bool CIFContext::createBasicUnitCell() +{ + unitCellConfiguration_.empty(); + unitCellSpecies_.clear(); + atomLabelTypes_.clear(); + + // Create temporary atom types corresponding to the unique atom labels + for (auto &a : assemblies_) + { + for (auto &g : a.groups()) + { + if (!g.active()) + continue; + + for (auto &i : g.atoms()) + { + if (std::find_if(atomLabelTypes_.begin(), atomLabelTypes_.end(), + [i](const auto &at) { return i.label() == at->name(); }) == atomLabelTypes_.end()) + { + atomLabelTypes_.emplace_back(std::make_shared(i.Z(), i.label())); + } + } + } + } + + // Configure the unit cell "species" + auto cellLengths = getCellLengths(); + if (!cellLengths) + return false; + auto cellAngles = getCellAngles(); + if (!cellAngles) + return false; + unitCellSpecies_.createBox(cellLengths.value(), cellAngles.value()); + auto *box = unitCellSpecies_.box(); + + // Configuration + Messenger::setQuiet(true); + unitCellConfiguration_.createBoxAndCells(cellLengths.value(), cellAngles.value(), false); + Messenger::setQuiet(false); + + // -- Generate atoms + auto symmetryGenerators = SpaceGroups::symmetryOperators(spaceGroup_); + for (const auto &generator : symmetryGenerators) + for (auto &a : assemblies_) + for (auto &g : a.groups()) + if (g.active()) + for (auto &unique : g.atoms()) + { + // Generate folded atomic position in real space + auto r = generator * unique.rFrac(); + box->toReal(r); + r = box->fold(r); + + // If this atom overlaps with another in the box, don't add it as it's a symmetry-related copy + if (std::any_of(unitCellSpecies_.atoms().begin(), unitCellSpecies_.atoms().end(), + [&, r, box](const auto &j) + { return box->minimumDistance(r, j.r()) < overlapTolerance_; })) + continue; + + // Create the new atom + auto atIt = std::find_if(atomLabelTypes_.begin(), atomLabelTypes_.end(), + [&unique](const auto at) { return unique.label() == at->name(); }); + unitCellSpecies_.addAtom(unique.Z(), r, 0.0, atIt != atomLabelTypes_.end() ? atIt->get() : nullptr); + } + + // Check that we actually generated some atoms... + if (unitCellSpecies_.nAtoms() == 0) + return false; + + // Bonding + if (!preventAllBonds_) + if (useCIFBondingDefinitions_) + applyCIFBonding(&unitCellSpecies_, preventMetallicBonds_); + else + unitCellSpecies_.addMissingBonds(bondingTolerance_, preventMetallicBonds_); + + unitCellConfiguration_.addMolecule(&unitCellSpecies_); + unitCellConfiguration_.updateObjectRelationships(); + + Messenger::print("Created basic crystal unit cell - {} non-overlapping atoms.\n", unitCellSpecies_.nAtoms()); + + return true; +} + +// Create structure from basic unit cell atoms and connectivity +bool CIFContext::createStructure() +{ + // Create box for structure + auto box = unitCellConfiguration_.box(); + structure_.createBox(box->axisLengths(), box->axisAngles(), box->type() == Box::BoxType::NonPeriodic); + + // Add structural atoms + const auto &atoms = unitCellSpecies_.atoms(); + for (const auto &atom : atoms) + structure_.addAtom(atom.Z(), atom.r(), atom.q()); + + // Iterate over newly added structure atom pairs, adding any existing bonds between them if not already present + auto pairs = PairIterator(structure_.nAtoms()); + for (const auto &pair : pairs) + { + auto [i, j] = pair; + auto atomI = structure_.atomAt(i); + auto atomJ = structure_.atomAt(j); + + if (!structure_.hasBond(atomI, atomJ)) + structure_.addBond(atomI, atomJ); + } + + Messenger::print("Created basic structure - {} structure atoms, {} structure bonds found whle parsing the CIF.\n", structure_.nAtoms(), structure_.bonds().size()); + + return true; +} + +// Create the cleaned unit cell +bool CIFContext::createCleanedUnitCell() +{ + cleanedUnitCellConfiguration_.empty(); + cleanedUnitCellSpecies_.clear(); + + // Configure the species + cleanedUnitCellSpecies_.copyBasic(&unitCellSpecies_, true); + auto cellLengths = getCellLengths(); + if (!cellLengths) + return false; + auto cellAngles = getCellAngles(); + if (!cellAngles) + return false; + cleanedUnitCellSpecies_.createBox(cellLengths.value(), cellAngles.value()); + + // Configuration + Messenger::setQuiet(true); + cleanedUnitCellConfiguration_.createBoxAndCells(cellLengths.value(), cellAngles.value(), false); + Messenger::setQuiet(false); + + if (removeAtomics_) + { + std::vector indicesToRemove; + for (const auto &i : cleanedUnitCellSpecies_.atoms()) + if (i.nBonds() == 0) + indicesToRemove.push_back(i.index()); + Messenger::print("Atomic removal deleted {} atoms.\n", indicesToRemove.size()); + + // Remove selected atoms + cleanedUnitCellSpecies_.removeAtoms(indicesToRemove); + } + + if (removeWaterAndCoordinateOxygens_) + { + NETADefinition waterVacuum("?O,nbonds=1,nh<=1|?O,nbonds>=2,-H(nbonds=1,-O)"); + if (!waterVacuum.isValid()) + { + Messenger::error("NETA definition for water removal is invalid.\n"); + return false; + } + + std::vector indicesToRemove; + for (const auto &i : cleanedUnitCellSpecies_.atoms()) + if (waterVacuum.matches(&i)) + indicesToRemove.push_back(i.index()); + Messenger::print("Water removal deleted {} atoms.\n", indicesToRemove.size()); + + // Remove selected atoms + cleanedUnitCellSpecies_.removeAtoms(indicesToRemove); + } + + if (removeNETA_ && moietyRemovalNETA_.isValid()) + { + // Select all atoms that are in moieties where one of its atoms matches our NETA definition + std::vector indicesToRemove; + for (auto &i : cleanedUnitCellSpecies_.atoms()) + if (moietyRemovalNETA_.matches(&i)) + { + // Select all atoms that are part of the same moiety? + if (removeNETAByFragment_) + { + cleanedUnitCellSpecies_.clearAtomSelection(); + auto selection = cleanedUnitCellSpecies_.fragment(i.index()); + std::copy(selection.begin(), selection.end(), std::back_inserter(indicesToRemove)); + } + else + indicesToRemove.push_back(i.index()); + } + Messenger::print("Moiety removal deleted {} atoms.\n", indicesToRemove.size()); + + // Remove selected atoms + cleanedUnitCellSpecies_.removeAtoms(indicesToRemove); + } + + cleanedUnitCellConfiguration_.addMolecule(&cleanedUnitCellSpecies_); + cleanedUnitCellConfiguration_.updateObjectRelationships(); + + Messenger::print("Created cleaned crystal unit cell - {} atoms after removal(s).\n", cleanedUnitCellSpecies_.nAtoms()); + + return true; +} + +// Try to detect molecules in the cell contents +bool CIFContext::detectMolecules() +{ + molecularSpecies_.clear(); + + // Try selecting within the species from the first atom - if this captures all atoms we have a bound framework... + if (cleanedUnitCellSpecies_.fragment(0).size() == cleanedUnitCellSpecies_.nAtoms()) + { + Messenger::print( + "Can't create molecular definitions since this unit cell appears to be a continuous framework/network. Consider " + "adjusting the bonding options in order to generate molecular fragments.\n"); + return false; + } + + std::vector atomMask(cleanedUnitCellSpecies_.nAtoms(), false); + + // Find all molecular species, and their instances + auto indexIterator = atomMask.begin(); + while (indexIterator != atomMask.end()) + { + // Select a fragment from the next available index + auto atomIndex = indexIterator - atomMask.begin(); + auto fragmentIndices = cleanedUnitCellSpecies_.fragment(atomIndex); + + // Create a new CIF molecular species from the fragment + auto &cifSp = molecularSpecies_.emplace_back(); + auto *sp = cifSp.species().get(); + // -- Copy selected atoms + for (auto fragAtomIndex : fragmentIndices) + { + const auto &unitCellAtom = cleanedUnitCellSpecies_.atom(fragAtomIndex); + sp->addAtom(unitCellAtom.Z(), unitCellAtom.r(), 0.0, unitCellAtom.atomType()); + } + + // Give the species a temporary unit cell so we can calculate / apply bonding + if (!preventAllBonds_) + sp->createBox(cleanedUnitCellSpecies_.box()->axisLengths(), cleanedUnitCellSpecies_.box()->axisAngles()); + if (useCIFBondingDefinitions_) + applyCIFBonding(sp, preventMetallicBonds_); + else + sp->addMissingBonds(bondingTolerance_, preventMetallicBonds_); + sp->removeBox(); + + // Set up a temporary molecule to unfold the species + LocalMolecule tempMol(sp); + tempMol.unFold(cleanedUnitCellSpecies_.box()); + for (auto &&[molAtom, spAtom] : zip(tempMol.localAtoms(), sp->atoms())) + spAtom.setR(molAtom.r()); + + // Give the species a name + sp->setName(EmpiricalFormula::formula(sp->atoms(), [&](const auto &at) { return at.Z(); })); + + // Find instances of this fragment. For large fragments that represent > 50% of the remaining atoms we don't even + // attempt to create a NETA definition etc. For cases such as framework species this will speed up detection no end. + std::vector instances; + if (fragmentIndices.size() * 2 > cleanedUnitCellSpecies_.nAtoms()) + { + // Create an instance of the current fragment + auto &mol = instances.emplace_back(sp); + for (auto i = 0; i < sp->nAtoms(); ++i) + atomMask[fragmentIndices[i]] = true; + } + else + { + // Determine the best NETA definition describing the fragment + auto &&[bestNETA, rootAtoms] = bestNETADefinition(sp); + if (rootAtoms.empty()) + return Messenger::error( + "Couldn't generate molecular partitioning for CIF - no suitable NETA definition for the " + "fragment {} could be determined.\n", + sp->name()); + + // Find instances of this fragment + instances = getSpeciesInstances(sp, atomMask, bestNETA, rootAtoms); + if (instances.empty()) + { + molecularSpecies_.clear(); + return Messenger::error("Failed to find species instances for fragment '{}'.\n", sp->name()); + } + } + + // Store the instances + cifSp.instances() = instances; + + // Search for the next valid starting index + indexIterator = std::find(std::next(indexIterator), atomMask.end(), false); + } + + Messenger::print("Partitioned unit cell into {} distinct molecular species:\n\n", molecularSpecies_.size()); + Messenger::print(" ID N Species Formula\n"); + auto count = 1; + for (const auto &cifMol : molecularSpecies_) + Messenger::print(" {:3d} {:4d} {}\n", count++, cifMol.instances().size(), + EmpiricalFormula::formula(cifMol.species()->atoms(), [](const auto &i) { return i.Z(); })); + Messenger::print(""); + + return true; +} + +// Create supercell species +bool CIFContext::createSupercell() +{ + supercellConfiguration_.empty(); + supercellSpecies_.clear(); + + // Configure the species + auto supercellLengths = cleanedUnitCellSpecies_.box()->axisLengths(); + supercellLengths.multiply(supercellRepeat_.x, supercellRepeat_.y, supercellRepeat_.z); + supercellSpecies_.createBox(supercellLengths, cleanedUnitCellSpecies_.box()->axisAngles(), false); + + // Set up configuration + Messenger::setQuiet(true); + supercellConfiguration_.createBoxAndCells(supercellLengths, cleanedUnitCellSpecies_.box()->axisAngles(), false); + Messenger::setQuiet(false); + + // Copy atoms from the Crystal species - we'll do the bonding afterwards + if (molecularSpecies_.empty()) + { + supercellSpecies_.atoms().reserve(supercellRepeat_.x * supercellRepeat_.y * supercellRepeat_.z * + cleanedUnitCellSpecies_.nAtoms()); + for (auto ix = 0; ix < supercellRepeat_.x; ++ix) + for (auto iy = 0; iy < supercellRepeat_.y; ++iy) + for (auto iz = 0; iz < supercellRepeat_.z; ++iz) + { + Vector3 deltaR = cleanedUnitCellSpecies_.box()->axes() * Vector3(ix, iy, iz); + for (const auto &i : cleanedUnitCellSpecies_.atoms()) + supercellSpecies_.addAtom(i.Z(), i.r() + deltaR, 0.0, i.atomType()); + } + if (!preventAllBonds_) + if (useCIFBondingDefinitions_) + applyCIFBonding(&supercellSpecies_, preventMetallicBonds_); + else + supercellSpecies_.addMissingBonds(bondingTolerance_, preventMetallicBonds_); + + // Add the structural species to the configuration + supercellConfiguration_.addMolecule(&supercellSpecies_); + supercellConfiguration_.updateObjectRelationships(); + } + else + { + supercellSpecies_.atoms().reserve(supercellRepeat_.x * supercellRepeat_.y * supercellRepeat_.z * + cleanedUnitCellSpecies_.nAtoms()); + + // Create images of all molecular unit cell species + for (auto &molecularSpecies : molecularSpecies_) + { + const auto *sp = molecularSpecies.species().get(); + const auto &coreInstances = molecularSpecies.instances(); + std::vector supercellInstances; + supercellInstances.reserve(supercellRepeat_.x * supercellRepeat_.y * supercellRepeat_.z * coreInstances.size()); + + // Loop over cell images + for (auto ix = 0; ix < supercellRepeat_.x; ++ix) + { + for (auto iy = 0; iy < supercellRepeat_.y; ++iy) + { + for (auto iz = 0; iz < supercellRepeat_.z; ++iz) + { + // Skip origin cell + if (ix == 0 && iy == 0 && iz == 0) + continue; + + // Set translation vector + auto tVec = cleanedUnitCellSpecies_.box()->axes() * Vector3(ix, iy, iz); + + // Create images of core molecule instances + for (auto &instance : coreInstances) + { + auto &mol = supercellInstances.emplace_back(); + mol.setSpecies(sp); + + for (auto &&[coreAtom, instanceAtom] : zip(instance.localAtoms(), mol.localAtoms())) + instanceAtom.setR(coreAtom.r() + tVec); + } + } + } + } + + // Append the new instances to our existing ones for the unit cell + molecularSpecies.appendInstances(supercellInstances); + + // Add the molecules to our configuration + for (const auto &instance : molecularSpecies.instances()) + { + auto mol = supercellConfiguration_.addMolecule(sp); + for (auto &&[molAtom, instanceAtom] : zip(mol->atoms(), instance.localAtoms())) + molAtom->setR(instanceAtom.r()); + } + } + + supercellConfiguration_.updateObjectRelationships(); + } + + Messenger::print("Created ({}, {}, {}) supercell - {} atoms total.\n", supercellRepeat_.x, supercellRepeat_.y, + supercellRepeat_.z, supercellConfiguration_.nAtoms()); + + return true; +} + +// Set overlap tolerance +void CIFContext::setOverlapTolerance(double tol) +{ + overlapTolerance_ = tol; + + generate(CIFGenerationStage::CreateBasicUnitCell); +} + +// Set whether to use CIF bonding definitions +void CIFContext::setUseCIFBondingDefinitions(bool b) +{ + if (useCIFBondingDefinitions_ == b) + return; + + useCIFBondingDefinitions_ = b; + + generate(); +} + +// Set bonding tolerance +void CIFContext::setBondingTolerance(double tol) +{ + bondingTolerance_ = tol; + + if (!useCIFBondingDefinitions_) + generate(); +} + +// Whether to ignore all bonds +void CIFContext::setPreventAllBonds(bool b) +{ + if (preventAllBonds_ == b) + return; + + preventAllBonds_ = b; + + generate(); +} + +// Set whether to prevent metallic bonding +void CIFContext::setPreventMetallicBonds(bool b) +{ + if (preventMetallicBonds_ == b) + return; + + preventMetallicBonds_ = b; + + generate(); +} + +// Set whether to remove free atomic moieties in clean-up +void CIFContext::setRemoveAtomics(bool b) +{ + if (removeAtomics_ == b) + return; + + removeAtomics_ = b; + + generate(CIFGenerationStage::CreateCleanedUnitCell); +} + +// Set whether to remove water and coordinated oxygen atoms in clean-up +void CIFContext::setRemoveWaterAndCoordinateOxygens(bool b) +{ + if (removeWaterAndCoordinateOxygens_ == b) + return; + + removeWaterAndCoordinateOxygens_ = b; + + generate(CIFGenerationStage::CreateCleanedUnitCell); +} + +// Set whether to remove by NETA definition in clean-up +void CIFContext::setRemoveNETA(bool b, bool byFragment) +{ + if (removeNETA_ == b && removeNETAByFragment_ == byFragment) + return; + + removeNETA_ = b; + removeNETAByFragment_ = byFragment; + + if (moietyRemovalNETA_.isValid()) + generate(CIFGenerationStage::CreateCleanedUnitCell); +} + +// Set NETA for moiety removal +bool CIFContext::setMoietyRemovalNETA(std::string_view netaDefinition) { return moietyRemovalNETA_.create(netaDefinition); } + +// Set supercell repeat +void CIFContext::setSupercellRepeat(const Vector3i &repeat) +{ + supercellRepeat_ = repeat; + + generate(CIFGenerationStage::CreateSupercell); +} + +// Recreate the data +bool CIFContext::generate(CIFGenerationStage fromStage) +{ + // Generate data starting from the specified stage, falling through to subsequent stages in the switch + + switch (fromStage) + { + case (CIFGenerationStage::CreateBasicUnitCell): + if (!createBasicUnitCell()) + return false; + case (CIFGenerationStage::CreateCleanedUnitCell): + if (!createCleanedUnitCell()) + return false; + case (CIFGenerationStage::DetectMolecules): + detectMolecules(); + case (CIFGenerationStage::CreateSupercell): + if (!createSupercell()) + return false; + } + + return true; +} + +// Return whether the generated data is valid +bool CIFContext::isValid() const +{ + return !molecularSpecies_.empty() || supercellSpecies_.fragment(0).size() != supercellSpecies_.nAtoms(); +} + +// Return supercell species +const Species &CIFContext::supercellSpecies() const { return supercellSpecies_; } + +// Return cleaned unit cell species +const Species &CIFContext::cleanedUnitCellSpecies() const { return cleanedUnitCellSpecies_; } + +// Return the detected molecular species +const std::vector &CIFContext::molecularSpecies() const { return molecularSpecies_; } + +// Return the generated configuration +Configuration *CIFContext::generatedConfiguration() { return &supercellConfiguration_; } + +// Return the basic unit cell configuration +Structure *CIFContext::structure() { return &structure_; } + +// Finalise, copying the required species and resulting configuration to the target CoreData +void CIFContext::finalise(CoreData &coreData, const Flags &flags) const +{ + Configuration *configuration; + + if (flags.isSet(OutputFlags::OutputMolecularSpecies)) + { + if (flags.isSet(OutputFlags::OutputConfiguration)) + { + configuration = coreData.addConfiguration(); + configuration->setName(chemicalFormula()); + + // Grab the generator + auto &generator = configuration->generator(); + + // Add Box + auto boxNode = generator.createRootNode({}); + auto cellLengths = supercellConfiguration_.box()->axisLengths(); + auto cellAngles = supercellConfiguration_.box()->axisAngles(); + boxNode->keywords().set("Lengths", Vector3NodeValue(cellLengths.get(0), cellLengths.get(1), cellLengths.get(2))); + boxNode->keywords().set("Angles", Vector3NodeValue(cellAngles.get(0), cellAngles.get(1), cellAngles.get(2))); + + for (auto &cifMolecularSp : molecularSpecies_) + { + // Add the species + auto *sp = coreData.copySpecies(cifMolecularSp.species().get()); + + // Determine a unique suffix + auto base = sp->name(); + std::string uniqueSuffix{base}; + if (!generator.nodes().empty()) + { + // Start from the last root node + auto root = generator.nodes().back(); + auto suffix = 0; + + while (generator.rootSequence().nodeInScope(root, std::format("SymmetryCopies_{}", uniqueSuffix)) != + nullptr) + uniqueSuffix = std::format("{}_{:02d}", base, ++suffix); + } + + // We use 'CoordinateSets' here, because in this instance we are working with (CoordinateSet, Add) pairs + + // CoordinateSets + auto coordsNode = + generator.createRootNode(std::format("SymmetryCopies_{}", uniqueSuffix), sp); + coordsNode->keywords().setEnumeration("Source", CoordinateSetsGeneratorNode::CoordinateSetSource::File); + coordsNode->setSets(cifMolecularSp.allInstanceCoordinates()); + + // Add + auto addNode = generator.createRootNode(std::format("Add_{}", uniqueSuffix), coordsNode); + addNode->keywords().set("Population", NodeValueProxy(int(cifMolecularSp.instances().size()))); + addNode->keywords().setEnumeration("Positioning", AddGeneratorNode::PositioningType::Current); + addNode->keywords().set("Rotate", false); + addNode->keywords().setEnumeration("BoxAction", AddGeneratorNode::BoxActionStyle::None); + } + } + else + { + for (auto &cifMolecularSp : molecularSpecies_) + { + coreData.copySpecies(cifMolecularSp.species().get()); + } + } + } + else + { + auto *sp = coreData.addSpecies(); + sp->copyBasic(&supercellSpecies_); + if (flags.isSet(OutputFlags::OutputSupermolecule)) + { + sp->removePeriodicBonds(); + sp->removeBox(); + } + else + sp->createBox(supercellSpecies_.box()->axisLengths(), supercellSpecies_.box()->axisAngles()); + + sp->updateIntramolecularTerms(); + + if (flags.isSet(OutputFlags::OutputConfiguration)) + { + configuration = coreData.addConfiguration(); + configuration->setName(chemicalFormula()); + + // Grab the generator + auto &generator = configuration->generator(); + + // Add Box + auto boxNode = generator.createRootNode({}); + auto cellLengths = supercellConfiguration_.box()->axisLengths(); + auto cellAngles = supercellConfiguration_.box()->axisAngles(); + boxNode->keywords().set("Lengths", Vector3NodeValue(cellLengths.get(0), cellLengths.get(1), cellLengths.get(2))); + boxNode->keywords().set("Angles", Vector3NodeValue(cellAngles.get(0), cellAngles.get(1), cellAngles.get(2))); + + // Add + auto addNode = generator.createRootNode(std::format("Add_{}", sp->name()), sp); + addNode->keywords().set("Population", NodeValueProxy(1)); + addNode->keywords().setEnumeration("Positioning", AddGeneratorNode::PositioningType::Current); + addNode->keywords().set("Rotate", false); + addNode->keywords().setEnumeration("BoxAction", AddGeneratorNode::BoxActionStyle::None); + } + } +} + +/* + * Helpers + */ + +// Apply CIF bonding to a given species +void CIFContext::applyCIFBonding(Species *sp, bool preventMetallicBonding) +{ + if (!hasBondDistances()) + return; + + auto *box = sp->box(); + auto pairs = PairIterator(sp->nAtoms()); + for (auto pair : pairs) + { + // Grab indices and atom references + auto [indexI, indexJ] = pair; + if (indexI == indexJ) + continue; + + auto &i = sp->atom(indexI); + auto &j = sp->atom(indexJ); + + // Prevent metallic bonding? + if (preventMetallicBonding && Elements::isMetallic(i.Z()) && Elements::isMetallic(j.Z())) + continue; + + // Retrieve distance + auto r = bondDistance(i.atomType()->name(), j.atomType()->name()); + if (!r) + continue; + else if (fabs(box->minimumDistance(i.r(), j.r()) - r.value()) < 1.0e-2) + sp->addBond(&i, &j); + } +} + +// Determine the best NETA definition for the supplied species +std::tuple> CIFContext::bestNETADefinition(Species *sp) +{ + // Set up the return value and bind its contents + std::tuple> result{NETADefinition(), {}}; + auto &&[bestNETA, rootAtoms] = result; + + // Maintain a set of atoms matched by any NETA description we generate + std::set alreadyMatched; + + // Loop over species atoms + for (auto &i : sp->atoms()) + { + // Skip this atom? + if (alreadyMatched.find(&i) != alreadyMatched.end()) + continue; + + // Create a NETA definition with this atom as the root + NETADefinition neta; + neta.create(&i, std::nullopt, + Flags(NETADefinition::NETACreationFlags::ExplicitHydrogens, + NETADefinition::NETACreationFlags::IncludeRootElement)); + + // Apply this match over the whole species + std::vector currentRootAtoms; + for (auto &j : sp->atoms()) + { + if (neta.matches(&j)) + { + currentRootAtoms.push_back(&j); + alreadyMatched.insert(&j); + } + } + + // Is this a better description? + auto better = false; + if (rootAtoms.empty() || currentRootAtoms.size() < rootAtoms.size()) + better = true; + else if (currentRootAtoms.size() == rootAtoms.size()) + { + // Replace the current match if there are more bonds on the current atom. + if (i.nBonds() > rootAtoms.front()->nBonds()) + better = true; + } + + if (better) + { + bestNETA = neta; + rootAtoms = currentRootAtoms; + } + } + + return result; +} + +// Get instances for the supplied species from the cleaned unit cell +std::vector CIFContext::getSpeciesInstances(const Species *referenceSpecies, std::vector &atomMask, + const NETADefinition &neta, + const std::vector &referenceRootAtoms) +{ + if (referenceRootAtoms.empty() || !neta.isValid()) + return {}; + + // Loop over atoms in the unit cell - we'll mark any that we select as an instance so we speed things up and avoid + // duplicates + const auto &unitCellAtoms = cleanedUnitCellSpecies_.atoms(); + std::vector instances; + auto atomIndexIterator = std::find(atomMask.begin(), atomMask.end(), false); + while (atomIndexIterator != atomMask.end()) + { + // Try to match this atom / fragment + const auto atomIndex = atomIndexIterator - atomMask.begin(); + auto &atom = unitCellAtoms[atomIndex]; + auto matchedUnitCellAtoms = neta.matchedPath(&atom).set(); + if (matchedUnitCellAtoms.empty()) + { + atomIndexIterator = std::find(std::next(atomIndexIterator), atomMask.end(), false); + continue; + } + + // Found a fragment that matches the NETA description - we now create a temporary instance Species which will contain + // the selected fragment atoms, reassembled into a molecule (i.e. unfolded) and with bonding applied / calculated. + // We need to copy the unit cell from the crystal so we detect bonds properly. + Species instanceSpecies; + instanceSpecies.createBox(unitCellSpecies_.box()->axisLengths(), unitCellSpecies_.box()->axisAngles()); + auto rootAtomLocalIndex = -1; + // -- Create species atoms from those matched in the unit cell by the NETA description. + for (auto &matchedAtom : matchedUnitCellAtoms) + { + auto idx = instanceSpecies.addAtom(matchedAtom->Z(), matchedAtom->r(), 0.0, matchedAtom->atomType()); + + // Store the index of the root atom in match in our instance species when we find it + if (matchedAtom == &atom) + rootAtomLocalIndex = idx; + } + // -- Store the local root atom so we can access its coordinates for the origin translation + auto &instanceSpeciesRootAtom = instanceSpecies.atom(rootAtomLocalIndex); + // -- Calculate / apply bonding + if (!preventAllBonds_) + if (useCIFBondingDefinitions_) + applyCIFBonding(&instanceSpecies, preventMetallicBonds_); + else + instanceSpecies.addMissingBonds(bondingTolerance_, preventMetallicBonds_); + + // Create a LocalMolecule as a working area for folding, translation, and rotation of the instance coordinates. + LocalMolecule instanceMolecule; + instanceMolecule.setSpecies(&instanceSpecies); + // -- Copy the coordinates off the matched unit cell atoms to our molecule and flag them as complete + auto count = 0; + for (auto &&[matchedAtom, instanceMolAtom] : zip(matchedUnitCellAtoms, instanceMolecule.localAtoms())) + { + instanceMolAtom.setR(matchedAtom->r()); + atomMask[matchedAtom->index()] = true; + } + auto &instanceMoleculeRootAtom = instanceMolecule.localAtoms()[rootAtomLocalIndex]; + + // Unfold the molecule and store the unfolded molecule coordinates back into the instance Species. + // This represents our full instance coordinates we will be storing (but not their final order) + instanceMolecule.unFold(unitCellSpecies_.box()); + for (auto &&[molAtom, spAtom] : zip(instanceMolecule.localAtoms(), instanceSpecies.atoms())) + spAtom.setR(molAtom.r()); + + /* + * Now, we have a root match atom on the current instance and a vector of possible matching sites on the reference + * species (in referenceRootAtoms). For each of the referenceRootAtoms, try to incrementally select along bonds using + * basic NETA connectivity. + */ + + // Generate basic NETA descriptions for each atom in the reference and candidate species + std::map referenceAtomNETA; + for (auto &spAtom : referenceSpecies->atoms()) + referenceAtomNETA[&spAtom] = NETADefinition(&spAtom, 1, {NETADefinition::NETACreationFlags::IncludeRootElement}); + + std::map matchMap; + for (const auto *referenceRootAtom : referenceRootAtoms) + { + // The root atom is the starting point + matchMap = matchAtom(referenceRootAtom, &instanceSpeciesRootAtom, referenceAtomNETA, {}); + if (!matchMap.empty()) + break; + } + + // Result? + if (matchMap.empty()) + { + Messenger::error("Failed to match connectivity of an instance to the reference molecule.\n"); + return {}; + } + else if (matchMap.size() != referenceSpecies->nAtoms()) + { + Messenger::error( + "Internal error - failed to match connectivity of all atoms within an instance to the reference molecule.\n"); + return {}; + } + + // Create the final instance + auto &instance = instances.emplace_back(); + instance.setSpecies(referenceSpecies); + for (const auto &[refSpeciesAtom, instanceSpeciesAtom] : matchMap) + { + instance.localAtom(refSpeciesAtom->index()).setR(instanceSpeciesAtom->r()); + } + + // Find the next available atom + atomIndexIterator = std::find(std::next(atomIndexIterator), atomMask.end(), false); + } + + return instances; +} + +// Recursively check NETA description matches between the supplied atoms +std::map +CIFContext::matchAtom(const SpeciesAtom *referenceAtom, const SpeciesAtom *instanceAtom, + const std::map &refNETA, + const std::map &map) +{ + // If the reference atom NETA doesn't match the instance atom we cannot proceed + if (!refNETA.at(referenceAtom).matches(instanceAtom)) + return {}; + + // Check the map to see if we have already associated the reference atom to an instance atom, or if the instance atom + // is already associated to a different reference atom. + for (auto &&[mappedRefAtom, mappedInstanceAtom] : map) + { + // Found it - double-check to ensure that the current association matches our instance atom. If it does we can return + // the map as it currently stands. If not we return an empty map to indicate failure. + if (mappedRefAtom == referenceAtom) + { + if (mappedInstanceAtom == instanceAtom) + { + return map; + } + else + { + return {}; + } + } + else if (mappedInstanceAtom == instanceAtom) + { + return {}; + } + } + + // Copy the current map, associate our initial pair of atoms and try to extend it + auto newMap = map; + newMap[referenceAtom] = instanceAtom; + + // Cycle over bonds on the reference atom and find + for (const auto &referenceBond : referenceAtom->bonds()) + { + // Get the reference bond partner + auto *referenceBondPartner = referenceBond.get().partner(referenceAtom); + + // Try to find a match over bonds on the instance atom + std::map bondResult; + for (const auto &instanceBond : instanceAtom->bonds()) + { + // Get the instance bond partner + auto *instanceBondPartner = instanceBond.get().partner(instanceAtom); + + // Recurse + bondResult = matchAtom(referenceBondPartner, instanceBondPartner, refNETA, newMap); + if (!bondResult.empty()) + break; + } + + // If we found a suitable match recursing into the bond, store the result into newMap and continue to the next bond. + // If we didn't find a good match, we return now. + if (bondResult.empty()) + { + return {}; + } + else + { + newMap = bondResult; + } + } + + // If we get to here then we succeeded, so return the new map + return newMap; +} + +// Calculate difference metric between the supplied species and local molecule +std::pair> CIFContext::differenceMetric(const Species *species, const LocalMolecule &molecule) +{ + auto difference = 0.0; + std::vector atomIndexMap(species->nAtoms(), -1); + auto nBadAtoms = 0; + for (auto spI = 0; spI < species->nAtoms(); ++spI) + { + auto &spAtom = species->atom(spI); + + // For this species atom find the closest atom in the molecule + auto distanceSq = 1.0e6; + for (auto molI = 0; molI < molecule.nAtoms(); ++molI) + { + auto rABSq = (spAtom.r() - molecule.localAtoms()[molI].r()).magnitudeSq(); + if (rABSq < distanceSq) + { + distanceSq = rABSq; + atomIndexMap[spI] = molI; + } + } + + if (distanceSq > 0.1) + ++nBadAtoms; + + // Update the difference score + const auto &closestMolSpAtom = molecule.species()->atom(atomIndexMap[spI]); + difference += distanceSq; + if (spAtom.Z() != closestMolSpAtom.Z()) + difference += std::max(int(spAtom.Z()), int(closestMolSpAtom.Z())) * 10.0; + } + + return {difference, atomIndexMap}; +} diff --git a/src/nodes/cif/io/cifContext.h b/src/nodes/cif/io/cifContext.h new file mode 100644 index 0000000000..3f168a292f --- /dev/null +++ b/src/nodes/cif/io/cifContext.h @@ -0,0 +1,220 @@ +// SPDX-License-Identifier: GPL-3.0-or-later +// Copyright (c) 2026 Team Dissolve and contributors + +#pragma once + +#include "classes/configuration.h" +#include "classes/coreData.h" +#include "classes/species.h" +#include "classes/structure.h" +#include "data/spaceGroups.h" +#include "math/matrix4.h" +#include "neta/neta.h" +#include "nodes/cif/io/cifClasses.h" +#include "templates/flags.h" +#include + +// Forward Declarations +class Box; + +// CIF Handler +class CIFContext +{ + public: + CIFContext(); + ~CIFContext() = default; + + /* + * Raw Data + */ + public: + // Data Types + using CIFTags = std::map>; + + private: + // Vector of enumerated data items + CIFTags tags_; + + private: + // Parse supplied file into the destination objects + bool parse(std::string_view filename, CIFTags &tags) const; + + public: + // Return whether the specified file parses correctly + bool validFile(std::string_view filename) const; + // Read CIF data from specified file + bool read(std::string_view filename); + // Return if the specified tag exists + bool hasTag(std::string tag) const; + // Return tag data string (if it exists) assuming a single datum (first in the vector) + std::optional getTagString(std::string tag) const; + // Return tag data strings (if it exists) + std::vector getTagStrings(std::string tag) const; + // Return tag data as double (if it exists) assuming a single datum (first in the vector) + std::optional getTagDouble(std::string tag) const; + // Return tag data doubles (if it exists) + std::vector getTagDoubles(std::string tag) const; + // Return tag data as integer (if it exists) assuming a single datum (first in the vector) + std::optional getTagInt(std::string tag) const; + + /* + * Processed Data + */ + private: + // Space group + SpaceGroups::SpaceGroupId spaceGroup_{SpaceGroups::NoSpaceGroup}; + // Atom assemblies + std::vector assemblies_; + // Bond information + std::vector bondingPairs_; + + public: + // Set space group from index + void setSpaceGroup(SpaceGroups::SpaceGroupId sgid); + // Return space group + SpaceGroups::SpaceGroupId spaceGroup() const; + // Return cell lengths + std::optional getCellLengths() const; + // Return cell angles + std::optional getCellAngles() const; + // Return chemical formula + std::string chemicalFormula() const; + // Get (add or retrieve) named assembly + CIFAssembly &getAssembly(std::string_view name); + // Return atom assemblies + std::vector &assemblies(); + const std::vector &assemblies() const; + // Return whether any bond distances are defined + bool hasBondDistances() const; + // Return whether a bond distance is defined for the specified label pair + std::optional bondDistance(std::string_view labelI, std::string_view labelJ) const; + + /* + * Creation + */ + public: + // CIF Generation Stages + enum class CIFGenerationStage + { + CreateBasicUnitCell, + CreateCleanedUnitCell, + DetectMolecules, + CreateSupercell + }; + // CIF Species Output Flags + enum OutputFlags + { + OutputConfiguration, /* Output a Configuration */ + OutputMolecularSpecies, /* Partitioning - output molecular species */ + OutputFramework, /* Partitioning - output a framework species */ + OutputSupermolecule /* Partitioning - output a supermolecule */ + }; + + private: + // Temporary atom types used for unique atom labels + std::vector> atomLabelTypes_; + // Tolerance for removal of overlapping atoms + double overlapTolerance_{0.1}; + // Whether to use CIF bonding definitions + bool useCIFBondingDefinitions_{false}; + // Bonding tolerance, if calculating bonding rather than using CIF definitions + double bondingTolerance_{1.1}; + // Whether to ignore all bonds + bool preventAllBonds_{false}; + // Whether to prevent metallic bonding + bool preventMetallicBonds_{true}; + // Whether to remove free atomic moieties in clean-up + bool removeAtomics_{false}; + // Whether to remove water and coordinated oxygen atoms in clean-up + bool removeWaterAndCoordinateOxygens_{false}; + // Whether to remove by NETA definition in clean-up + bool removeNETA_{false}; + // Whether to expand NETA matches to fragments when removing in clean-up + bool removeNETAByFragment_{false}; + // NETA for moiety removal, if specified + NETADefinition moietyRemovalNETA_; + // Supercell repeat + Vector3i supercellRepeat_{1, 1, 1}; + // Basic unit cell + Species unitCellSpecies_; + Configuration unitCellConfiguration_; + Structure structure_; + // Cleaned unit cell + Species cleanedUnitCellSpecies_; + Configuration cleanedUnitCellConfiguration_; + // Molecular definition of unit cell (if possible) + std::vector molecularSpecies_; + // Final generated result (supercell) + Species supercellSpecies_; + Configuration supercellConfiguration_; + + private: + // Create basic unit cell + bool createBasicUnitCell(); + // Create structure from basic unit cell atoms and connectivity + bool createStructure(); + // Create the cleaned unit cell + bool createCleanedUnitCell(); + // Try to detect molecules in the cell contents + bool detectMolecules(); + // Create supercell species + bool createSupercell(); + + public: + // Set overlap tolerance + void setOverlapTolerance(double tol); + // Set whether to use CIF bonding definitions + void setUseCIFBondingDefinitions(bool b); + // Set bonding tolerance + void setBondingTolerance(double tol); + // Whether to ignore all bonds + void setPreventAllBonds(bool b); + // Set whether to prevent metallic bonding + void setPreventMetallicBonds(bool b); + // Set whether to remove free atomic moieties in clean-up + void setRemoveAtomics(bool b); + // Set whether to remove water and coordinated oxygen atoms in clean-up + void setRemoveWaterAndCoordinateOxygens(bool b); + // Set whether to remove by NETA definition in clean-up + void setRemoveNETA(bool b, bool byFragment); + // Set NETA for moiety removal + bool setMoietyRemovalNETA(std::string_view netaDefinition); + // Set supercell repeat + void setSupercellRepeat(const Vector3i &repeat); + // Recreate the data + bool generate(CIFGenerationStage fromStage = CIFGenerationStage::CreateBasicUnitCell); + // Return whether the generated data is valid + bool isValid() const; + // Return supercell species + const Species &supercellSpecies() const; + // Return cleaned unit cell species + const Species &cleanedUnitCellSpecies() const; + // Return the detected molecular species + const std::vector &molecularSpecies() const; + // Return the generated configuration + Configuration *generatedConfiguration(); + // Return the basic crystal structure + Structure *structure(); + // Finalise, copying the required species and resulting configuration to the target CoreData + void finalise(CoreData &coreData, const Flags &flags = {}) const; + + /* + * Helpers + */ + private: + // Apply CIF bonding to a given species + void applyCIFBonding(Species *sp, bool preventMetallicBonding); + // Determine the best NETA definition for the supplied species + std::tuple> bestNETADefinition(Species *sp); + // Get instances of species molecules from the supplied NETA definition + std::vector getSpeciesInstances(const Species *referenceSpecies, std::vector &atomMask, + const NETADefinition &neta, + const std::vector &referenceRootAtoms); + // Calculate difference metric between the supplied species and local molecule + static std::pair> differenceMetric(const Species *species, const LocalMolecule &molecule); + // Recursively check NETA description matches between the supplied atoms + std::map matchAtom(const SpeciesAtom *referenceAtom, + const SpeciesAtom *instanceAtom, + const std::map &refNETA, + const std::map &map); +}; diff --git a/src/nodes/setCIFAtomGroupActivity.cpp b/src/nodes/cif/setCIFAtomGroupActivity.cpp similarity index 78% rename from src/nodes/setCIFAtomGroupActivity.cpp rename to src/nodes/cif/setCIFAtomGroupActivity.cpp index 3f5d69a5ab..d477a7ec9e 100644 --- a/src/nodes/setCIFAtomGroupActivity.cpp +++ b/src/nodes/cif/setCIFAtomGroupActivity.cpp @@ -1,16 +1,16 @@ // SPDX-License-Identifier: GPL-3.0-or-later // Copyright (c) 2026 Team Dissolve and contributors -#include "nodes/setCIFAtomGroupActivity.h" +#include "nodes/cif/setCIFAtomGroupActivity.h" SetCIFAtomGroupActivityNode::SetCIFAtomGroupActivityNode(Graph *parentGraph) : Node(parentGraph) { // Inputs - addInput("CIFContext", "CIF handling context derived from parsing of CIF file", context_) + addInput("CIFContext", "CIF handling context derived from parsing of CIF file", context_) ->setFlags({ParameterBase::Required}); // Outputs - addOutput("CIFContext", "CIF handling context derived from parsing of CIF file", context_); + addOutput("CIFContext", "CIF handling context derived from parsing of CIF file", context_); // Options addOption("Assembly", "CIF assembly name", assemblyName_); diff --git a/src/nodes/setCIFAtomGroupActivity.h b/src/nodes/cif/setCIFAtomGroupActivity.h similarity index 89% rename from src/nodes/setCIFAtomGroupActivity.h rename to src/nodes/cif/setCIFAtomGroupActivity.h index 93b532af89..4d7e6f36d5 100644 --- a/src/nodes/setCIFAtomGroupActivity.h +++ b/src/nodes/cif/setCIFAtomGroupActivity.h @@ -3,10 +3,10 @@ #pragma once -#include "nodes/cifLoader.h" +#include "nodes/cif/io/cifContext.h" #include "nodes/node.h" -// CIFLoader Node +// ImportCIFStructure Node class SetCIFAtomGroupActivityNode : public Node { public: @@ -22,7 +22,7 @@ class SetCIFAtomGroupActivityNode : public Node */ private: // CIF handler context - CIFLoaderNode::CIFContext *context_{nullptr}; + CIFContext *context_{nullptr}; // Selected CIF assembly atom group name std::string atomGroupName_; // Selected CIF assembly name diff --git a/src/nodes/registry.cpp b/src/nodes/registry.cpp index 9a7660b38f..427e0ddbd8 100644 --- a/src/nodes/registry.cpp +++ b/src/nodes/registry.cpp @@ -7,14 +7,15 @@ #include "nodes/angle.h" #include "nodes/atomicMC/atomicMC.h" #include "nodes/bragg.h" -#include "nodes/cifBondingOptions.h" -#include "nodes/cifLoader.h" -#include "nodes/cifMolecularSpecies.h" -#include "nodes/cifPeriodicFramework.h" -#include "nodes/cifRemoveAtomic.h" -#include "nodes/cifRemoveWater.h" -#include "nodes/cifStructureCleanup.h" -#include "nodes/cifSuperMolecule.h" +#include "nodes/cif/cifBondingOptions.h" +#include "nodes/cif/cifMolecularSpecies.h" +#include "nodes/cif/cifPeriodicFramework.h" +#include "nodes/cif/cifRemoveAtomic.h" +#include "nodes/cif/cifRemoveWater.h" +#include "nodes/cif/cifStructureCleanup.h" +#include "nodes/cif/cifSuperMolecule.h" +#include "nodes/cif/importCIFStructure.h" +#include "nodes/cif/setCIFAtomGroupActivity.h" #include "nodes/configuration.h" #include "nodes/data1DImport.h" #include "nodes/derivative.h" @@ -35,7 +36,6 @@ #include "nodes/multiply.h" #include "nodes/neutronSQ/neutronSQ.h" #include "nodes/numberNode.h" -#include "nodes/setCIFAtomGroupActivity.h" #include "nodes/setCell.h" #include "nodes/siteRDF.h" #include "nodes/species.h" @@ -70,7 +70,7 @@ void NodeRegistry::instantiateNodeProducers() {"CIFStructureCleanup", makeDerivedNode()}, {"Configuration", makeDerivedNode()}, {"CIFBondingOptions", makeDerivedNode()}, - {"CIFLoader", makeDerivedNode()}, + {"ImportCIFStructure", makeDerivedNode()}, {"CIFMolecularSpecies", makeDerivedNode()}, {"CIFPeriodicFramework", makeDerivedNode()}, {"CIFRemoveAtomic", makeDerivedNode()}, diff --git a/tests/io/cif.cpp b/tests/io/cif.cpp index 6486be9b6d..82031fe6a3 100644 --- a/tests/io/cif.cpp +++ b/tests/io/cif.cpp @@ -1,9 +1,9 @@ // SPDX-License-Identifier: GPL-3.0-or-later // Copyright (c) 2026 Team Dissolve and contributors -#include "io/import/cif.h" #include "classes/empiricalFormula.h" #include "io/import/species.h" +#include "nodes/cif/io/cifContext.h" #include "tests/testData.h" #include @@ -67,64 +67,64 @@ TEST_F(ImportCIFTest, Parse) for (auto &cif : cifs) { - CIFHandler cifHandler; - ASSERT_TRUE(cifHandler.read(cifPath + cif)); + CIFContext cifContext; + ASSERT_TRUE(cifContext.read(cifPath + cif)); } } TEST_F(ImportCIFTest, NaCl) { - CIFHandler cifHandler; - ASSERT_TRUE(cifHandler.read("cif/NaCl-1000041.cif")); - EXPECT_TRUE(cifHandler.generate()); + CIFContext cifContext; + ASSERT_TRUE(cifContext.read("cif/NaCl-1000041.cif")); + EXPECT_TRUE(cifContext.generate()); // Check basic info - EXPECT_EQ(cifHandler.spaceGroup(), SpaceGroups::SpaceGroup_225); + EXPECT_EQ(cifContext.spaceGroup(), SpaceGroups::SpaceGroup_225); constexpr double A = 5.62; - testBox(cifHandler.generatedConfiguration(), {A, A, A}, {90, 90, 90}, 8); + testBox(cifContext.generatedConfiguration(), {A, A, A}, {90, 90, 90}, 8); // Calculating bonding is the default, but this gives a continuous framework... - EXPECT_EQ(cifHandler.molecularSpecies().size(), 0); + EXPECT_EQ(cifContext.molecularSpecies().size(), 0); // Get molecular species - cifHandler.setUseCIFBondingDefinitions(true); - EXPECT_EQ(cifHandler.molecularSpecies().size(), 2); - testMolecularSpecies(cifHandler.molecularSpecies()[0], {"Na", 4, 1}); + cifContext.setUseCIFBondingDefinitions(true); + EXPECT_EQ(cifContext.molecularSpecies().size(), 2); + testMolecularSpecies(cifContext.molecularSpecies()[0], {"Na", 4, 1}); std::vector R = {{0.0, 0.0, 0.0}, {0.0, A / 2, A / 2}, {A / 2, 0.0, A / 2}, {A / 2, A / 2, 0.0}}; - for (auto &&[instance, r2] : zip(cifHandler.molecularSpecies()[0].instances(), R)) + for (auto &&[instance, r2] : zip(cifContext.molecularSpecies()[0].instances(), R)) DissolveSystemTest::checkVec3(instance.localAtoms()[0].r(), r2); - testMolecularSpecies(cifHandler.molecularSpecies()[1], {"Cl", 4, 1}); - for (auto &&[instance, r2] : zip(cifHandler.molecularSpecies()[1].instances(), R)) + testMolecularSpecies(cifContext.molecularSpecies()[1], {"Cl", 4, 1}); + for (auto &&[instance, r2] : zip(cifContext.molecularSpecies()[1].instances(), R)) DissolveSystemTest::checkVec3(instance.localAtoms()[0].r(), (r2 - A / 2).abs()); // 2x2x2 supercell - cifHandler.setSupercellRepeat({2, 2, 2}); - EXPECT_TRUE(cifHandler.generate()); - testBox(cifHandler.generatedConfiguration(), {A * 2, A * 2, A * 2}, {90, 90, 90}, 8 * 8); + cifContext.setSupercellRepeat({2, 2, 2}); + EXPECT_TRUE(cifContext.generate()); + testBox(cifContext.generatedConfiguration(), {A * 2, A * 2, A * 2}, {90, 90, 90}, 8 * 8); } TEST_F(ImportCIFTest, NaClO3) { - CIFHandler cifHandler; - ASSERT_TRUE(cifHandler.read("cif/NaClO3-1010057.cif")); - EXPECT_TRUE(cifHandler.generate()); + CIFContext cifContext; + ASSERT_TRUE(cifContext.read("cif/NaClO3-1010057.cif")); + EXPECT_TRUE(cifContext.generate()); // Check basic info - EXPECT_EQ(cifHandler.spaceGroup(), SpaceGroups::SpaceGroup_198); + EXPECT_EQ(cifContext.spaceGroup(), SpaceGroups::SpaceGroup_198); constexpr double A = 6.55; - testBox(cifHandler.generatedConfiguration(), {A, A, A}, {90, 90, 90}, 20); + testBox(cifContext.generatedConfiguration(), {A, A, A}, {90, 90, 90}, 20); // Turn off automatic bond calculation - there are no bonding defs in the CIF, so we expect species for each atomic // component (4 Na, 4 Cl, and 12 O) - cifHandler.setUseCIFBondingDefinitions(true); - auto &cifMols = cifHandler.molecularSpecies(); + cifContext.setUseCIFBondingDefinitions(true); + auto &cifMols = cifContext.molecularSpecies(); ASSERT_EQ(cifMols.size(), 3); testMolecularSpecies(cifMols[0], {"Na", 4, 1}); testMolecularSpecies(cifMols[1], {"Cl", 4, 1}); testMolecularSpecies(cifMols[2], {"O", 12, 1}); // Calculate bonding ourselves to get the correct species - cifHandler.setUseCIFBondingDefinitions(false); + cifContext.setUseCIFBondingDefinitions(false); ASSERT_EQ(cifMols.size(), 2); testMolecularSpecies(cifMols[0], {"Na", 4, 1}); testMolecularSpecies(cifMols[1], {"ClO3", 4, 4}); @@ -132,14 +132,14 @@ TEST_F(ImportCIFTest, NaClO3) TEST_F(ImportCIFTest, CuBTC) { - CIFHandler cifHandler; - ASSERT_TRUE(cifHandler.read("cif/CuBTC-7108574.cif")); - EXPECT_TRUE(cifHandler.generate()); + CIFContext cifContext; + ASSERT_TRUE(cifContext.read("cif/CuBTC-7108574.cif")); + EXPECT_TRUE(cifContext.generate()); // Check basic info - EXPECT_EQ(cifHandler.spaceGroup(), SpaceGroups::SpaceGroup_225); + EXPECT_EQ(cifContext.spaceGroup(), SpaceGroups::SpaceGroup_225); constexpr auto A = 26.3336; - testBox(cifHandler.generatedConfiguration(), {A, A, A}, {90, 90, 90}, 672); + testBox(cifContext.generatedConfiguration(), {A, A, A}, {90, 90, 90}, 672); // 16 basic formula units per unit cell constexpr auto N = 16; @@ -147,66 +147,66 @@ TEST_F(ImportCIFTest, CuBTC) // Check basic formula (which includes bound water oxygens - with no H - at this point) and using O group EmpiricalFormula::EmpiricalFormulaMap cellFormulaH = { {Elements::Cu, 3 * N}, {Elements::C, 18 * N}, {Elements::H, 6 * N}, {Elements::O, 15 * N}}; - EXPECT_EQ(EmpiricalFormula::formula(cifHandler.generatedConfiguration()->atoms(), + EXPECT_EQ(EmpiricalFormula::formula(cifContext.generatedConfiguration()->atoms(), [](const auto &i) { return i.speciesAtom()->Z(); }), EmpiricalFormula::formula(cellFormulaH)); - EXPECT_EQ(cifHandler.molecularSpecies().size(), 2); + EXPECT_EQ(cifContext.molecularSpecies().size(), 2); // Change active assemblies to get amine-substituted structure EmpiricalFormula::EmpiricalFormulaMap cellFormulaNH2 = cellFormulaH; cellFormulaNH2[Elements::N] = 6 * N; cellFormulaNH2[Elements::H] *= 2; - cifHandler.getAssembly("A").getGroup("1").setActive(false); - cifHandler.getAssembly("B").getGroup("2").setActive(true); - cifHandler.getAssembly("C").getGroup("2").setActive(true); - EXPECT_TRUE(cifHandler.generate()); - EXPECT_EQ(EmpiricalFormula::formula(cifHandler.generatedConfiguration()->atoms(), + cifContext.getAssembly("A").getGroup("1").setActive(false); + cifContext.getAssembly("B").getGroup("2").setActive(true); + cifContext.getAssembly("C").getGroup("2").setActive(true); + EXPECT_TRUE(cifContext.generate()); + EXPECT_EQ(EmpiricalFormula::formula(cifContext.generatedConfiguration()->atoms(), [](const auto &i) { return i.speciesAtom()->Z(); }), EmpiricalFormula::formula(cellFormulaNH2)); // Remove those free oxygens so we just have a framework - cifHandler.setRemoveAtomics(true); - EXPECT_EQ(cifHandler.molecularSpecies().size(), 0); + cifContext.setRemoveAtomics(true); + EXPECT_EQ(cifContext.molecularSpecies().size(), 0); } TEST_F(ImportCIFTest, MoleculeOrdering) { - CIFHandler cifHandler; + CIFContext cifContext; const auto cifFiles = {"cif/molecule-test-simple-ordered.cif", "cif/molecule-test-simple-unordered.cif", "cif/molecule-test-simple-unordered-rotated.cif"}; for (auto cifFile : cifFiles) { // Load the CIF file - ASSERT_TRUE(cifHandler.read(cifFile)); - EXPECT_TRUE(cifHandler.generate()); + ASSERT_TRUE(cifContext.read(cifFile)); + EXPECT_TRUE(cifContext.generate()); - EXPECT_EQ(cifHandler.molecularSpecies().size(), 1); + EXPECT_EQ(cifContext.molecularSpecies().size(), 1); - auto &cifMolecule = cifHandler.molecularSpecies().front(); + auto &cifMolecule = cifContext.molecularSpecies().front(); EmpiricalFormula::EmpiricalFormulaMap moleculeFormula = { {Elements::Cl, 1}, {Elements::O, 1}, {Elements::C, 1}, {Elements::H, 3}}; testMolecularSpecies(cifMolecule, {EmpiricalFormula::formula(moleculeFormula), 6, 6}); - testInstanceConsistency(cifMolecule, cifHandler.cleanedUnitCellSpecies()); + testInstanceConsistency(cifMolecule, cifContext.cleanedUnitCellSpecies()); } } TEST_F(ImportCIFTest, BigMoleculeOrdering) { - CIFHandler cifHandler; + CIFContext cifContext; const auto cifFile = "cif/Bisphen_n_arenes_1517789.cif"; // Load the CIF file - ASSERT_TRUE(cifHandler.read(cifFile)); - EXPECT_TRUE(cifHandler.generate()); + ASSERT_TRUE(cifContext.read(cifFile)); + EXPECT_TRUE(cifContext.generate()); - EXPECT_EQ(cifHandler.molecularSpecies().size(), 1); + EXPECT_EQ(cifContext.molecularSpecies().size(), 1); - auto &cifMolecule = cifHandler.molecularSpecies().front(); + auto &cifMolecule = cifContext.molecularSpecies().front(); EmpiricalFormula::EmpiricalFormulaMap moleculeFormula = {{Elements::O, 6}, {Elements::C, 51}, {Elements::H, 54}}; testMolecularSpecies(cifMolecule, {EmpiricalFormula::formula(moleculeFormula), 4, 111}); - testInstanceConsistency(cifMolecule, cifHandler.cleanedUnitCellSpecies()); + testInstanceConsistency(cifMolecule, cifContext.cleanedUnitCellSpecies()); } } // namespace UnitTest diff --git a/tests/nodes/bragg.cpp b/tests/nodes/bragg.cpp index bd75dbf897..742e72b7c3 100644 --- a/tests/nodes/bragg.cpp +++ b/tests/nodes/bragg.cpp @@ -6,7 +6,7 @@ #include "classes/speciesSites.h" #include "io/import/trajectory.h" #include "math/rangedVector3.h" -#include "nodes/cifMolecularSpecies.h" +#include "nodes/cif/cifMolecularSpecies.h" #include "nodes/gr/gr.h" #include "nodes/importConfigurationTrajectory.h" #include "nodes/iterableGraph.h" @@ -40,11 +40,11 @@ class BraggNodeTest : public ::testing::Test // Create species and configuration from MgO cif file auto root = testGraph_.dissolveGraph(); - ASSERT_TRUE(testGraph_.appendNode("CIFLoader", "CIFLoader")); + ASSERT_TRUE(testGraph_.appendNode("importCIFStructure", "importCIFStructure")); ASSERT_TRUE(testGraph_.fetchHead()->setOption("FilePath", "cif/1000053.cif")); ASSERT_TRUE(testGraph_.appendNode("CIFBondingOptions", "CIFBonds")); - ASSERT_TRUE(root->addEdge({"CIFLoader", "CIFContext", "CIFBonds", "CIFContext"})); + ASSERT_TRUE(root->addEdge({"importCIFStructure", "CIFContext", "CIFBonds", "CIFContext"})); ASSERT_TRUE(testGraph_.fetchHead()->setOption("PreventAllBonds", true)); // Create a supercell that is 5 * unitcell diff --git a/tests/nodes/cif.cpp b/tests/nodes/cif.cpp index 301a357110..04a1e5dd2e 100644 --- a/tests/nodes/cif.cpp +++ b/tests/nodes/cif.cpp @@ -3,8 +3,8 @@ #include "classes/empiricalFormula.h" #include "io/import/species.h" -#include "nodes/cifLoader.h" -#include "nodes/cifMolecularSpecies.h" +#include "nodes/cif/cifMolecularSpecies.h" +#include "nodes/cif/importCIFStructure.h" #include "tests/graphData.h" #include "tests/testData.h" #include @@ -30,7 +30,7 @@ class CIFNodeTest : public ::testing::Test void createGraph(std::string filename) { auto name = cifNameFromFile(filename); - EXPECT_TRUE(testGraph_.appendNode("CIFLoader", name)); + EXPECT_TRUE(testGraph_.appendNode("ImportCIFStructure", name)); testGraph_.fetchHead()->setOption("FilePath", path_ + filename); EXPECT_TRUE(testGraph_.appendNode("CIFBondingOptions", name + "//BondingOptions")); EXPECT_TRUE(testGraph_.appendNode("CIFRemoveAtomic", name + "//RemoveAtomic")); @@ -50,11 +50,11 @@ class CIFNodeTest : public ::testing::Test return name; } // Retrieve CIF context by filename - CIFLoaderNode::CIFContext *getContextByFileName(std::string filename) + CIFContext *getContextByFileName(std::string filename) { auto name = cifNameFromFile(filename); auto node = testGraph_.findNode(name); - auto context = node->getOutputValue("CIFContext"); + auto context = node->getOutputValue("CIFContext"); return context; } // Test Box definition diff --git a/tests/tempFile.h b/tests/tempFile.h index 661b024a7f..fa89ad26de 100644 --- a/tests/tempFile.h +++ b/tests/tempFile.h @@ -41,8 +41,8 @@ class TempFile } // Get the file name on conversion to string - operator std::string() const { return path; } - operator std::filesystem::path() const { return path; } + operator std::string() const { return path.generic_string(); } + operator std::filesystem::path() const { return path.c_str(); } private: // The actual path of the temp file