From 9e91fb76d776ea4c97a604ae061f8a33b7cd9dde Mon Sep 17 00:00:00 2001 From: Kshitij Surjuse Date: Thu, 5 Feb 2026 14:36:57 -0500 Subject: [PATCH 01/22] Set-up preprocessor macro for defining RKB ERIs --- CMakeLists.txt | 203 ++++++++++++++++-------------- cmake/modules/int_am.cmake | 3 +- include/libint2/config.h.cmake.in | 19 +++ include/libint2/cxxapi.h | 2 +- src/bin/libint/build_libint.cc | 5 + 5 files changed, 134 insertions(+), 98 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 4d767bdcc..5b88cf73b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -11,28 +11,28 @@ endif () list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules) include(DynamicVersion) dynamic_version( - PROJECT_PREFIX Libint2Compiler_ - GIT_ARCHIVAL_FILE ${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules/.git_archival.txt - VERSION_FULL_MODE POST - OUTPUT_COMMIT LibintRepository_COMMIT - OUTPUT_VERSION LibintRepository_VERSION - OUTPUT_DESCRIBE LibintRepository_DESCRIBE - OUTPUT_DISTANCE LibintRepository_DISTANCE - OUTPUT_SHORT_HASH LibintRepository_SHORT_HASH - OUTPUT_VERSION_FULL LibintRepository_VERSION_FULL - ) + PROJECT_PREFIX Libint2Compiler_ + GIT_ARCHIVAL_FILE ${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules/.git_archival.txt + VERSION_FULL_MODE POST + OUTPUT_COMMIT LibintRepository_COMMIT + OUTPUT_VERSION LibintRepository_VERSION + OUTPUT_DESCRIBE LibintRepository_DESCRIBE + OUTPUT_DISTANCE LibintRepository_DISTANCE + OUTPUT_SHORT_HASH LibintRepository_SHORT_HASH + OUTPUT_VERSION_FULL LibintRepository_VERSION_FULL +) set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Choose the type of build.") # foil Ninja Debug on Windows project( - Libint2Compiler - VERSION ${LibintRepository_VERSION} - DESCRIPTION - "A library for the evaluation of molecular integrals of many-body operators over Gaussian functions" - HOMEPAGE_URL "http://libint.valeyev.net" - LANGUAGES CXX - ) - # * http://libint.valeyev.net/ redirects to https://github.com/evaleev/libint + Libint2Compiler + VERSION ${LibintRepository_VERSION} + DESCRIPTION + "A library for the evaluation of molecular integrals of many-body operators over Gaussian functions" + HOMEPAGE_URL "http://libint.valeyev.net" + LANGUAGES CXX +) +# * http://libint.valeyev.net/ redirects to https://github.com/evaleev/libint set(${PROJECT_NAME}_AUTHORS "Edward F. Valeev") set(${PROJECT_NAME}_LICENSE "GPL-3.0-only for generator; LGPL-3.0-only for generated library") @@ -109,64 +109,67 @@ option_with_default(CMAKE_BUILD_TYPE "Build type (Release or Debug)" Release) ### compiler-only option_with_print(LIBINT2_BUILD_LIBRARY_AS_SUBPROJECT - "[EXPERT] Build generated library as a subproject: if FALSE will configure and build separately" OFF) + "[EXPERT] Build generated library as a subproject: if FALSE will configure and build separately" OFF) ### library-only option_with_print(LIBINT2_REQUIRE_CXX_API - "C++11 Libint API: define library targets + test (requires Eigen3, Boost is optional but strongly recommended)" ON) + "C++11 Libint API: define library targets + test (requires Eigen3, Boost is optional but strongly recommended)" ON) option_with_print(LIBINT2_REQUIRE_CXX_API_COMPILED - "Build C++11 Compiled (not just header-only) targets (requires Eigen3, Boost strongly recommended)" ON) + "Build C++11 Compiled (not just header-only) targets (requires Eigen3, Boost strongly recommended)" ON) option_with_print(LIBINT2_ENABLE_FORTRAN - "Build Fortran03+ Libint interface (requires Fortran)" OFF) + "Build Fortran03+ Libint interface (requires Fortran)" OFF) option_with_print(LIBINT2_ENABLE_PYTHON - "Build Python bindings (requires Python and Pybind11 and Eigen3)" OFF) + "Build Python bindings (requires Python and Pybind11 and Eigen3)" OFF) option_with_print(LIBINT2_PREFIX_PYTHON_INSTALL - "For LIBINT2_ENABLE_PYTHON=ON, whether to install the Python module in the Linux manner to CMAKE_INSTALL_PREFIX or to not install it. See target libint2-python-wheel for alternate installation in the Python manner to Python_EXECUTABLE's site-packages." OFF) + "For LIBINT2_ENABLE_PYTHON=ON, whether to install the Python module in the Linux manner to CMAKE_INSTALL_PREFIX or to not install it. See target libint2-python-wheel for alternate installation in the Python manner to Python_EXECUTABLE's site-packages." OFF) option_with_print(BUILD_SHARED_LIBS - "Build Libint library as shared, not static" OFF) + "Build Libint library as shared, not static" OFF) option_with_print(LIBINT2_BUILD_SHARED_AND_STATIC_LIBS - "Build both shared and static Libint libraries in one shot. Uses -fPIC." OFF) + "Build both shared and static Libint libraries in one shot. Uses -fPIC." OFF) option_with_print(LIBINT2_ENABLE_MPFR - "Use GNU MPFR library for high-precision testing (EXPERTS ONLY). Consumed at library build-time." OFF) + "Use GNU MPFR library for high-precision testing (EXPERTS ONLY). Consumed at library build-time." OFF) # <<< Which Integrals Classes, Which Derivative Levels >>> option_with_default(LIBINT2_ENABLE_ONEBODY - "Compile with support for up to N-th derivatives of 1-body integrals (-1 for OFF)" 0) + "Compile with support for up to N-th derivatives of 1-body integrals (-1 for OFF)" 0) option_with_default(LIBINT2_ENABLE_ERI - "Compile with support for up to N-th derivatives of 4-center electron repulsion integrals (-1 for OFF)" 0) + "Compile with support for up to N-th derivatives of 4-center electron repulsion integrals (-1 for OFF)" 0) option_with_default(LIBINT2_ENABLE_ERI3 - "Compile with support for up to N-th derivatives of 3-center electron repulsion integrals (-1 for OFF)" -1) + "Compile with support for up to N-th derivatives of 3-center electron repulsion integrals (-1 for OFF)" -1) option_with_default(LIBINT2_ENABLE_ERI2 - "Compile with support for up to N-th derivatives of 2-center electron repulsion integrals (-1 for OFF)" -1) + "Compile with support for up to N-th derivatives of 2-center electron repulsion integrals (-1 for OFF)" -1) +option_with_default(LIBINT2_ENABLE_RKB_ERI + "Compile with support for up to N-th derivatives of relativistic restricted kinetic + balance (RKB) 4-center electron repulsion integrals (-1 for OFF)" 0) option_with_default(LIBINT2_ENABLE_G12 - "Compile with support for N-th derivatives of MP2-F12 energies with Gaussian factors (-1 for OFF)" -1) + "Compile with support for N-th derivatives of MP2-F12 energies with Gaussian factors (-1 for OFF)" -1) option_with_default(LIBINT2_ENABLE_G12DKH - "Compile with support for N-th derivatives of DKH-MP2-F12 energies with Gaussian factors (-1 for OFF)" -1) + "Compile with support for N-th derivatives of DKH-MP2-F12 energies with Gaussian factors (-1 for OFF)" -1) option_with_print(LIBINT2_DISABLE_ONEBODY_PROPERTY_DERIVS - "Disable geometric derivatives of 1-body property integrals (all but overlap, kinetic, elecpot). + "Disable geometric derivatives of 1-body property integrals (all but overlap, kinetic, elecpot). These derivatives are disabled by default to save compile time. (enable with OFF) Note that the libtool build won't enable this- if forcibly enabled, build_libint balks." ON) option_with_print(LIBINT2_ENABLE_T1G12 - "Enable [Ti,G12] integrals when G12 integrals are enabled. Irrelevant when `LIBINT2_ENABLE_G12=OFF`. (disable with OFF)" ON) + "Enable [Ti,G12] integrals when G12 integrals are enabled. Irrelevant when `LIBINT2_ENABLE_G12=OFF`. (disable with OFF)" ON) # <<< Ordering Conventions >>> option_with_default(LIBINT2_SHGAUSS_ORDERING - "Ordering for shells of solid harmonic Gaussians: + "Ordering for shells of solid harmonic Gaussians: standard -- standard ordering (-l, -l+1 ... l) gaussian -- the Gaussian ordering (0, 1, -1, 2, -2, ... l, -l) See https://github.com/evaleev/libint/blob/master/INSTALL.md#solid-harmonic-ordering-scope-and-history ." standard) option_with_default(LIBINT2_CARTGAUSS_ORDERING - "Orderings for shells of cartesian Gaussians: + "Orderings for shells of cartesian Gaussians: standard -- standard ordering (xxx, xxy, xxz, xyy, xyz, xzz, yyy, ...) intv3 -- intv3 ordering (yyy, yyz, yzz, zzz, xyy, xyz, xzz, xxy, xxz, xxx) gamess -- GAMESS ordering (xxx, yyy, zzz, xxy, xxz, yyx, yyz, zzx, zzy, xyz) orca -- ORCA ordering (hydrid between GAMESS and standard) bagel -- axis-permuted version of intv3 (xxx, xxy, xyy, yyy, xxz, xyz, yyz, xzz, yzz, zzz)" standard) option_with_default(LIBINT2_SHELL_SET - "Support computation of shell sets sets subject to these restrictions: + "Support computation of shell sets sets subject to these restrictions: standard -- standard ordering: for (ab|cd): l(a) >= l(b), @@ -195,99 +198,107 @@ option_with_default(LIBINT2_SHELL_SET # `export CMAKE_BUILD_PARALLEL_LEVEL=N`. option_with_default(LIBINT2_MAX_AM - "Support Gaussians of angular momentum up to N. + "Support Gaussians of angular momentum up to N. Can specify values for each derivative level as a semicolon-separated string. If ERI3 ints are enabled, this option also controls the AM of the paired centers." 4) option_with_default(LIBINT2_OPT_AM - "Optimize maximally for up to angular momentum N (N <= max-am). + "Optimize maximally for up to angular momentum N (N <= max-am). Can specify values for each derivative level as a semicolon-separated string. (default: (libint_max_am/2)+1)" -1) option_with_default(LIBINT2_MULTIPOLE_MAX_ORDER - "Maximum order of spherical multipole integrals. There is no maximum" 4) + "Maximum order of spherical multipole integrals. There is no maximum" 4) option_with_default(LIBINT2_ONEBODY_MAX_AM - "Support 1-body ints for Gaussians of angular momentum up to N. + "Support 1-body ints for Gaussians of angular momentum up to N. Can specify values for each derivative level as a semicolon-separated string. (default: max_am)" -1) option_with_default(LIBINT2_ONEBODY_OPT_AM - "Optimize 1-body ints maximally for up to angular momentum N (N <= max-am). + "Optimize 1-body ints maximally for up to angular momentum N (N <= max-am). Can specify values for each derivative level as a semicolon-separated string (default: (max_am/2)+1)" -1) option_with_default(LIBINT2_ERI_MAX_AM - "Support 4-center ERIs for Gaussians of angular momentum up to N. + "Support 4-center ERIs for Gaussians of angular momentum up to N. Can specify values for each derivative level as a semicolon-separated string. (default: max_am)" -1) option_with_default(LIBINT2_ERI_OPT_AM - "Optimize 4-center ERIs maximally for up to angular momentum N (N <= max-am). + "Optimize 4-center ERIs maximally for up to angular momentum N (N <= max-am). Can specify values for each derivative level as a semicolon-separated string (default: (max_am/2)+1)" -1) +option_with_default(LIBINT2_RKB_ERI_MAX_AM + "Support relativistic restricted kinetic balance (RKB) 4-center ERIs for Gaussians of angular momentum up to N. + Can specify values for each derivative level as a semicolon-separated string. (default: max_am)" -1) +option_with_default(LIBINT2_RKB_ERI_OPT_AM + "Optimize relativistic restricted kinetic balance (RKB) 4-center ERIs maximally for up to angular momentum N (N <= max-am). + Can specify values for each derivative level as a semicolon-separated string (default: (max_am/2)+1)" -1) + + option_with_default(LIBINT2_ERI3_MAX_AM - "Support 3-center ERIs for Gaussians of angular momentum up to N. + "Support 3-center ERIs for Gaussians of angular momentum up to N. Can specify values for each derivative level as a semicolon-separated string. (default: max_am) This option controls only the single fitting center. The paired centers use LIBINT2_MAX_AM." -1) option_with_default(LIBINT2_ERI3_OPT_AM - "Optimize 3-center ERIs maximally for up to angular momentum N (N <= max-am). + "Optimize 3-center ERIs maximally for up to angular momentum N (N <= max-am). Can specify values for each derivative level as a semicolon-separated string. (default: (max_am/2)+1)" -1) option_with_print(LIBINT2_ERI3_PURE_SH - "Assume the 'unpaired' center of 3-center ERIs will be transformed to pure solid harmonics" OFF) + "Assume the 'unpaired' center of 3-center ERIs will be transformed to pure solid harmonics" OFF) option_with_default(LIBINT2_ERI2_MAX_AM - "Support 2-center ERIs for Gaussians of angular momentum up to N. + "Support 2-center ERIs for Gaussians of angular momentum up to N. Can specify values for each derivative level as a semicolon-separated string. (default: max_am)" -1) option_with_default(LIBINT2_ERI2_OPT_AM - "Optimize 2-center ERIs maximally for up to angular momentum N (N <= max-am). + "Optimize 2-center ERIs maximally for up to angular momentum N (N <= max-am). Can specify values for each derivative level as a semicolon-separated string. (default: (max_am/2)+1)" -1) option_with_print(LIBINT2_ERI2_PURE_SH - "Assume the 2-center ERIs will be transformed to pure solid harmonics" OFF) + "Assume the 2-center ERIs will be transformed to pure solid harmonics" OFF) option_with_default(LIBINT2_G12_MAX_AM - "Support integrals for G12 methods of angular momentum up to N. (default: max_am)" -1) + "Support integrals for G12 methods of angular momentum up to N. (default: max_am)" -1) option_with_default(LIBINT2_G12_OPT_AM - "Optimize G12 integrals for up to angular momentum N (N <= max-am). (default: (max_am/2)+1)" -1) + "Optimize G12 integrals for up to angular momentum N (N <= max-am). (default: (max_am/2)+1)" -1) option_with_default(LIBINT2_G12DKH_MAX_AM - "Support integrals for relativistic G12 methods of angular momentum up to N. (default: max_am)" -1) + "Support integrals for relativistic G12 methods of angular momentum up to N. (default: max_am)" -1) option_with_default(LIBINT2_G12DKH_OPT_AM - "Optimize G12DKH integrals for up to angular momentum N (N <= max-am). (default: (max_am/2)+1)" -1) + "Optimize G12DKH integrals for up to angular momentum N (N <= max-am). (default: (max_am/2)+1)" -1) # <<< Miscellaneous >>> option_with_print(LIBINT2_CONTRACTED_INTS - "Turn on support for contracted integrals." ON) + "Turn on support for contracted integrals." ON) option_with_default(LIBINT2_ERI_STRATEGY - "(EXPERT) Compute ERIs using the following strategy. (0 for OS, 1 for HGP, 2 for HL)" 1) + "(EXPERT) Compute ERIs using the following strategy. (0 for OS, 1 for HGP, 2 for HL)" 1) option_with_print(LIBINT2_USE_COMPOSITE_EVALUATORS - "Libint will use composite evaluators (i.e. every evaluator will compute one integral type only)" ON) + "Libint will use composite evaluators (i.e. every evaluator will compute one integral type only)" ON) option_with_print(LIBINT2_SINGLE_EVALTYPE - "Generate single evaluator type (i.e. all tasks use the same evaluator). OFF is NYI" ON) + "Generate single evaluator type (i.e. all tasks use the same evaluator). OFF is NYI" ON) option_with_default(LIBINT2_ENABLE_UNROLLING - "Unroll shell sets into integrals (will unroll shell sets larger than N) (0 for never, N for N, 1000000000 for always)" 100) + "Unroll shell sets into integrals (will unroll shell sets larger than N) (0 for never, N for N, 1000000000 for always)" 100) option_with_default(LIBINT2_ALIGN_SIZE - "(EXPERT) if posix_memalign is available, this will specify alignment of Libint data, in units of + "(EXPERT) if posix_memalign is available, this will specify alignment of Libint data, in units of sizeof(LIBINT2_REALTYPE). Default is to use built-in heuristics: system-determined for vectorization off (default) or veclen * sizeof(LIBINT2_REALTYPE) for vectorization on." 0) mark_as_advanced(LIBINT2_ALIGN_SIZE) option_with_default(LIBINT2_REALTYPE - "Specifies the floating-point data type used by the library. Consumed at library build-time." double) + "Specifies the floating-point data type used by the library. Consumed at library build-time." double) option_with_print(LIBINT2_USER_DEFINED_REAL_INCLUDES - "Additional #includes necessary to use the real type." OFF) + "Additional #includes necessary to use the real type." OFF) include(int_userreal) option_with_print(LIBINT2_GENERATE_FMA - "Generate FMA (fused multiply-add) instructions (to benefit must have FMA-capable hardware and compiler)" OFF) + "Generate FMA (fused multiply-add) instructions (to benefit must have FMA-capable hardware and compiler)" OFF) option_with_print(LIBINT2_ENABLE_GENERIC_CODE - "Use manually-written generic code" OFF) + "Use manually-written generic code" OFF) option_with_print(LIBINT2_API_PREFIX - "Prepend this string to every name in the library API (except for the types)." OFF) + "Prepend this string to every name in the library API (except for the types)." OFF) option_with_print(LIBINT2_VECTOR_LENGTH - "Compute integrals in vectors of length N." OFF) + "Compute integrals in vectors of length N." OFF) option_with_default(LIBINT2_VECTOR_METHOD - "Specifies how to vectorize integrals. Irrelevant when `LIBINT2_VECTOR_LENGTH=OFF. Allowed values are 'block' (default), and 'line'." block) + "Specifies how to vectorize integrals. Irrelevant when `LIBINT2_VECTOR_LENGTH=OFF. Allowed values are 'block' (default), and 'line'." block) option_with_print(LIBINT2_ACCUM_INTS - "Accumulate integrals to the buffer, rather than copy (OFF for copy, ON for accum)." OFF) + "Accumulate integrals to the buffer, rather than copy (OFF for copy, ON for accum)." OFF) option_with_print(LIBINT2_FLOP_COUNT - "Support (approximate) FLOP counting by the library. (Generated code will require C++11!)" OFF) + "Support (approximate) FLOP counting by the library. (Generated code will require C++11!)" OFF) option_with_print(LIBINT2_PROFILE - "Turn on profiling instrumentation of the library. (Generated code will require C++11!)" OFF) + "Turn on profiling instrumentation of the library. (Generated code will require C++11!)" OFF) option_with_print(LIBINT2_ENABLE_MPFR - "Use GNU MPFR library for high-precision testing (EXPERTS ONLY). Consumed at library build-time." OFF) + "Use GNU MPFR library for high-precision testing (EXPERTS ONLY). Consumed at library build-time." OFF) option_with_default(LIBINT2_EXPORT_COMPRESSOR - "Export tarball with compression gzip or bzip2" gzip) + "Export tarball with compression gzip or bzip2" gzip) # next one defined by `include(CTest)` message(STATUS "Showing option BUILD_TESTING: ${BUILD_TESTING}") @@ -304,13 +315,13 @@ include(int_am) check_function_exists(posix_memalign HAVE_POSIX_MEMALIGN) if (NOT HAVE_POSIX_MEMALIGN) message(FATAL_ERROR "did not find posix_memalign ... this SHOULD NOT happen. Cannot proceed.") -endif() +endif () check_include_file_cxx(stdint.h HAVE_STDINT_H) # limits.h? if (cxx_std_11 IN_LIST CMAKE_CXX_COMPILE_FEATURES) set(LIBINT_HAS_CXX11 1) -endif() +endif () booleanize01(LIBINT2_ERI3_PURE_SH) booleanize01(LIBINT2_ERI2_PURE_SH) @@ -331,9 +342,9 @@ if (LIBINT2_EXPORT_COMPRESSOR STREQUAL "gzip") elseif (LIBINT2_EXPORT_COMPRESSOR STREQUAL "bzip2") set(LIBINT_EXPORT_COMPRESSOR_CMD "jcf") set(LIBINT_EXPORT_COMPRESSOR_EXT "tbz2") -else() +else () message(FATAL_ERROR "No valid compressor; invoke CMake with -DLIBINT2_EXPORT_COMPRESSOR=gzip|bzip2") -endif() +endif () ################################## Dependencies ################################# @@ -344,9 +355,9 @@ if (LIBINT2_ENABLE_MPFR) # mpfr detected in CMakeLists.txt.export at appropriate time for library, but prechecking here find_package(Multiprecision MODULE REQUIRED COMPONENTS gmpxx mpfr) set(LIBINT_HAS_MPFR 1) -else() +else () find_package(Multiprecision MODULE REQUIRED COMPONENTS gmpxx) -endif() +endif () get_property(_loc TARGET Multiprecision::gmp PROPERTY LOCATION) message(VERBOSE "${Cyan}Found GMP${ColourReset}: ${_loc}") @@ -355,12 +366,12 @@ message(VERBOSE "${Cyan}Found GMPXX${ColourReset}: ${_loc}") if (TARGET Multiprecision::mpfr) get_property(_loc TARGET Multiprecision::mpfr PROPERTY LOCATION) message(VERBOSE "${Cyan}Found MPFR${ColourReset}: ${_loc} (found version ${MPFR_VERSION})") -endif() +endif () find_package(Boost 1.57 REQUIRED) if (TARGET Boost::headers) set(LIBINT_HAS_SYSTEM_BOOST_PREPROCESSOR_VARIADICS 1) -endif() +endif () # deferring find_package(Eigen3) to library (CMakeLists.txt.export) @@ -370,9 +381,9 @@ endif() set(EXPORT_STAGE_DIR ${PROJECT_BINARY_DIR}/libint-${LIBINT_EXT_VERSION}) configure_file( - cmake/modules/int_computed.cmake.in - cmake/modules/int_computed.cmake - @ONLY) + cmake/modules/int_computed.cmake.in + cmake/modules/int_computed.cmake + @ONLY) # CMake data transmitted to C++ via config.h for generator/compiler (_EXPORT_MODE=0). # Same info is positioned for the library export, but _EXPORT_MODE=1 turns on @@ -380,28 +391,28 @@ configure_file( # library build time. set(_EXPORT_MODE 0) # convert user-facing LIBINT2_ variables to LIBINT_ internal variables -foreach(_var API_PREFIX;ERI3_PURE_SH;ERI2_PURE_SH;DISABLE_ONEBODY_PROPERTY_DERIVS;ENABLE_UNROLLING;ENABLE_GENERIC_CODE;VECTOR_LENGTH;VECTOR_METHOD;ALIGN_SIZE;USER_DEFINED_REAL;USER_DEFINED_REAL_INCLUDES;GENERATE_FMA;ACCUM_INTS;FLOP_COUNT;PROFILE;CONTRACTED_INTS;SINGLE_EVALTYPE;USE_COMPOSITE_EVALUATORS;ERI_STRATEGY;MULTIPOLE_MAX_ORDER) +foreach (_var API_PREFIX;ERI3_PURE_SH;ERI2_PURE_SH;DISABLE_ONEBODY_PROPERTY_DERIVS;ENABLE_UNROLLING;ENABLE_GENERIC_CODE;VECTOR_LENGTH;VECTOR_METHOD;ALIGN_SIZE;USER_DEFINED_REAL;USER_DEFINED_REAL_INCLUDES;GENERATE_FMA;ACCUM_INTS;FLOP_COUNT;PROFILE;CONTRACTED_INTS;SINGLE_EVALTYPE;USE_COMPOSITE_EVALUATORS;ERI_STRATEGY;MULTIPOLE_MAX_ORDER) if (DEFINED LIBINT2_${_var}) if (DEFINED LIBINT_${_var}) message(FATAL_ERROR "renaming user-facing LIBINT2_${_var} variable but internal variable LIBINT_${_var} already exists") else () set(LIBINT_${_var} ${LIBINT2_${_var}}) - endif() - endif() + endif () + endif () endforeach () configure_file( - include/libint2/config.h.cmake.in - include/libint2/config.h - @ONLY) + include/libint2/config.h.cmake.in + include/libint2/config.h + @ONLY) set(_EXPORT_MODE 1) configure_file( - include/libint2/config.h.cmake.in - ${EXPORT_STAGE_DIR}/include/libint2/config.h - @ONLY) + include/libint2/config.h.cmake.in + ${EXPORT_STAGE_DIR}/include/libint2/config.h + @ONLY) configure_file( - include/libint2/config2.h.cmake.in - ${EXPORT_STAGE_DIR}/include/libint2/config2.h.cmake.in - COPYONLY) + include/libint2/config2.h.cmake.in + ${EXPORT_STAGE_DIR}/include/libint2/config2.h.cmake.in + COPYONLY) add_subdirectory(src) diff --git a/cmake/modules/int_am.cmake b/cmake/modules/int_am.cmake index c1d61cd55..782048aaf 100644 --- a/cmake/modules/int_am.cmake +++ b/cmake/modules/int_am.cmake @@ -357,6 +357,7 @@ endmacro() process_integrals_class(ONEBODY) process_integrals_class(ERI) +process_integrals_class(RKB_ERI) process_integrals_class(ERI3) process_integrals_class(ERI2) # unlike above, these classes (1) don't do AM_LIST and (2) require value in config.h if enabled @@ -396,7 +397,7 @@ list(REVERSE _amlist) list(APPEND Libint2_ERI_COMPONENTS "${_amlist}") message(VERBOSE "setting components ${_amlist}") -foreach(_cls ONEBODY;ERI;ERI3;ERI2;G12;G12DKH) +foreach(_cls ONEBODY;ERI;RKB_ERI;ERI3;ERI2;G12;G12DKH) if((_cls STREQUAL G12) OR (_cls STREQUAL G12DKH)) add_feature_info( "integral class ${_cls}" diff --git a/include/libint2/config.h.cmake.in b/include/libint2/config.h.cmake.in index 640e68099..6018873e5 100644 --- a/include/libint2/config.h.cmake.in +++ b/include/libint2/config.h.cmake.in @@ -71,6 +71,13 @@ #undef LIBINT_INCLUDE_ERI #endif +/* Support ERI derivatives up to this order */ +#define LIBINT_INCLUDE_RKB_ERI @LIBINT_INCLUDE_RKB_ERI@ +#if @LIBINT_INCLUDE_RKB_ERI@ == -1 +#undef LIBINT_INCLUDE_RKB_ERI +#endif + + /* Support 3-center ERI derivatives up to this order */ #define LIBINT_INCLUDE_ERI3 @LIBINT_INCLUDE_ERI3@ #if @LIBINT_INCLUDE_ERI3@ == -1 @@ -122,6 +129,18 @@ /* Max optimized AM for ERI and its derivatives */ #cmakedefine LIBINT_ERI_OPT_AM_LIST "@LIBINT_ERI_OPT_AM_LIST@" +/* Max AM for RKB_ERI (same for all derivatives; if not defined see LIBINT_ERI_MAX_AM_LIST) */ +#cmakedefine LIBINT_RKB_ERI_MAX_AM @LIBINT_RKB_ERI_MAX_AM@ + +/* Max AM for RKB_ERI and its derivatives */ +#cmakedefine LIBINT_RKB_ERI_MAX_AM_LIST "@LIBINT_RKB_ERI_MAX_AM_LIST@" + +/* Max optimized AM for ERI (same for all derivatives; if not defined see LIBINT_ERI_OPT_AM_LIST) */ +#cmakedefine LIBINT_RKB_ERI_OPT_AM @LIBINT_RKB_ERI_OPT_AM@ + +/* Max optimized AM for ERI and its derivatives */ +#cmakedefine LIBINT_RKB_ERI_OPT_AM_LIST "@LIBINT_RKB_ERI_OPT_AM_LIST@" + /* Max AM for 3-center ERI (same for all derivatives; if not defined see LIBINT_ERI3_MAX_AM_LIST) */ #cmakedefine LIBINT_ERI3_MAX_AM @LIBINT_ERI3_MAX_AM@ diff --git a/include/libint2/cxxapi.h b/include/libint2/cxxapi.h index 0aceb509b..a54802817 100644 --- a/include/libint2/cxxapi.h +++ b/include/libint2/cxxapi.h @@ -35,7 +35,7 @@ #if !defined(LIBINT_INCLUDE_ONEBODY) || \ !(defined(LIBINT_INCLUDE_ERI) || defined(LIBINT_INCLUDE_ERI3) || \ - defined(LIBINT_INCLUDE_ERI2)) + defined(LIBINT_INCLUDE_ERI2) || defined(LIBINT_INCLUDE_RKB_ERI)) #error \ "C++ API is only supported if both 1-body and some (eri, eri3, eri2) 2-body integrals are enabled" #endif diff --git a/src/bin/libint/build_libint.cc b/src/bin/libint/build_libint.cc index 7baef7f1c..c0a69c6bd 100644 --- a/src/bin/libint/build_libint.cc +++ b/src/bin/libint/build_libint.cc @@ -567,6 +567,7 @@ void try_main(int argc, char* argv[]) { taskmgr.add(task_label("eri", d)); } #endif + #ifdef LIBINT_INCLUDE_ERI3 for (unsigned int d = 0; d <= LIBINT_INCLUDE_ERI3; ++d) { taskmgr.add(task_label("3eri", d)); @@ -985,8 +986,12 @@ void print_config(std::ostream& os) { #ifdef LIBINT_INCLUDE_G12DKH os << "Will support G12DKH" << endl; #endif +#ifdef LIBINT_INCLUDE_RKB_ERI + os << "RKB works " << std::endl; +#endif } + #ifdef LIBINT_INCLUDE_ERI void build_TwoPRep_2b_2k(std::ostream& os, const std::shared_ptr& cparams, From 7941816503701ba47473d9660e9380bac7297c9a Mon Sep 17 00:00:00 2001 From: Kshitij Surjuse Date: Sat, 7 Feb 2026 14:32:26 -0500 Subject: [PATCH 02/22] =?UTF-8?q?Can=20generate=20code=20for=20`(LL|SS)`?= =?UTF-8?q?=20type=20integral,=20i.e.,=20`(=CE=BC=20=CE=BD=20|=20(=CF=83.p?= =?UTF-8?q?)=CE=BA=20(=CF=83.p)=CE=BB)`?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/bin/libint/build_libint.cc | 186 ++++++++++++++---- .../comp_11_Coulomb\317\203p\317\203p_11.h" | 167 ++++++++++++++++ .../libint/comp_1_\317\203pV\317\203p_1.h" | 4 +- src/bin/libint/master_ints_list.h | 6 +- src/bin/libint/master_rrs_list.h | 3 + src/bin/libint/oper.h | 58 +++++- src/bin/libint/strategy.cc | 9 + 7 files changed, 383 insertions(+), 50 deletions(-) create mode 100644 "src/bin/libint/comp_11_Coulomb\317\203p\317\203p_11.h" diff --git a/src/bin/libint/build_libint.cc b/src/bin/libint/build_libint.cc index c0a69c6bd..9ff8f5710 100644 --- a/src/bin/libint/build_libint.cc +++ b/src/bin/libint/build_libint.cc @@ -193,12 +193,16 @@ static void config_to_api(const std::shared_ptr& cparams, #ifdef LIBINT_INCLUDE_ERI #define USE_GENERIC_ERI_BUILD 1 #if !USE_GENERIC_ERI_BUILD +template static void build_TwoPRep_2b_2k( - std::ostream& os, const std::shared_ptr& cparams, + std::ostream& os, std::string label, + const std::shared_ptr& cparams, std::shared_ptr& iface); #else +template static void build_TwoPRep_2b_2k( - std::ostream& os, const std::shared_ptr& cparams, + std::ostream& os, std::string label, + const std::shared_ptr& cparams, std::shared_ptr& iface, unsigned int deriv_level); #endif #endif @@ -568,6 +572,23 @@ void try_main(int argc, char* argv[]) { } #endif +#ifdef LIBINT_INCLUDE_RKB_ERI +#define BOOST_PP_RKB_ERI_TASK_TUPLE (coulomb_opop) +#define BOOST_PP_RKB_ERI_TASK_OPER_TUPLE (CoulombσpσpOper) +#define BOOST_PP_RKB_ERI_TASK_LIST \ + BOOST_PP_TUPLE_TO_LIST(BOOST_PP_RKB_ERI_TASK_TUPLE) +#define BOOST_PP_RKB_ERI_TASK_OPER_LIST \ + BOOST_PP_TUPLE_TO_LIST(BOOST_PP_RKB_ERI_TASK_OPER_TUPLE) + + for (unsigned int d = 0; d <= LIBINT_INCLUDE_RKB_ERI; ++d) { +#define BOOST_PP_RKB_ERI_MCR1(r, data, elem) \ + taskmgr.add(task_label(BOOST_PP_STRINGIZE(elem), d)); + + BOOST_PP_LIST_FOR_EACH(BOOST_PP_RKB_ERI_MCR1, _, BOOST_PP_RKB_ERI_TASK_LIST) +#undef BOOST_PP_RKB_ERI_MCR1 + } +#endif + #ifdef LIBINT_INCLUDE_ERI3 for (unsigned int d = 0; d <= LIBINT_INCLUDE_ERI3; ++d) { taskmgr.add(task_label("3eri", d)); @@ -669,6 +690,46 @@ void try_main(int argc, char* argv[]) { cparams->num_bf(task_label("eri", d), 4); } #endif + +#ifdef LIBINT_INCLUDE_RKB_ERI + for (unsigned int d = 0; d <= LIBINT_INCLUDE_RKB_ERI; ++d) { +#if defined(LIBINT_RKB_ERI_MAX_AM_LIST) +#define BOOST_PP_RKB_ERI_MCR2(r, data, elem) \ + cparams->max_am( \ + task_label(BOOST_PP_STRINGIZE(elem), d), \ + token(LIBINT_RKB_ERI_MAX_AM_LIST, ',', d)); + BOOST_PP_LIST_FOR_EACH(BOOST_PP_RKB_ERI_MCR2, _, BOOST_PP_RKB_ERI_TASK_LIST) +#undef BOOST_PP_RKB_ERI_MCR2 +#elif defined(LIBINT_RKB_ERI_MAX_AM) +#define BOOST_PP_RKB_ERI_MCR3(r, data, elem) \ + cparams->max_am(task_label(BOOST_PP_STRINGIZE(elem), d), \ + LIBINT_RKB_ERI_MAX_AM); + BOOST_PP_LIST_FOR_EACH(BOOST_PP_RKB_ERI_MCR3, _, BOOST_PP_RKB_ERI_TASK_LIST) +#undef BOOST_PP_RKB_ERI_MCR3 +#endif +#if defined(LIBINT_RKB_ERI_OPT_AM_LIST) +#define BOOST_PP_RKB_ERI_MCR4(r, data, elem) \ + cparams->max_am_opt( \ + task_label(BOOST_PP_STRINGIZE(elem), d), \ + token(LIBINT_RKB_ERI_OPT_AM_LIST, ',', d)); + BOOST_PP_LIST_FOR_EACH(BOOST_PP_RKB_ERI_MCR4, _, BOOST_PP_RKB_ERI_TASK_LIST) +#undef BOOST_PP_RKB_ERI_MCR4 +#elif defined(LIBINT_RKB_ERI_OPT_AM) +#define BOOST_PP_RKB_ERI_MCR5(r, data, elem) \ + cparams->max_am_opt(task_label(BOOST_PP_STRINGIZE(elem), d), \ + LIBINT_RKB_ERI_OPT_AM); + BOOST_PP_LIST_FOR_EACH(BOOST_PP_RKB_ERI_MCR5, _, BOOST_PP_RKB_ERI_TASK_LIST) +#undef BOOST_PP_RKB_ERI_MCR5 +#endif + } + for (unsigned int d = 0; d <= LIBINT_INCLUDE_RKB_ERI; ++d) { +#define BOOST_PP_RKB_ERI_MCR6(r, data, elem) \ + cparams->num_bf(task_label(BOOST_PP_STRINGIZE(elem), d), 4); + BOOST_PP_LIST_FOR_EACH(BOOST_PP_RKB_ERI_MCR6, _, BOOST_PP_RKB_ERI_TASK_LIST) +#undef BOOST_PP_RKB_ERI_MCR6 + } +#endif // LIBINT_INCLUDE_RKB_ERI + #ifdef LIBINT_INCLUDE_ERI3 for (unsigned int d = 0; d <= LIBINT_INCLUDE_ERI3; ++d) { #if defined(LIBINT_ERI3_MAX_AM_LIST) @@ -853,6 +914,9 @@ void try_main(int argc, char* argv[]) { #ifdef LIBINT_INCLUDE_ERI max_deriv = std::max(LIBINT_INCLUDE_ERI, max_deriv); #endif +#ifdef LIBINT_INCLUDE_RKB_ERI + max_deriv = std::max(LIBINT_INCLUDE_RKB_ERI, max_deriv); +#endif #ifdef LIBINT_INCLUDE_ERI3 max_deriv = std::max(LIBINT_INCLUDE_ERI3, max_deriv); #endif @@ -880,13 +944,25 @@ void try_main(int argc, char* argv[]) { #endif #ifdef LIBINT_INCLUDE_ERI #if !USE_GENERIC_ERI_BUILD - build_TwoPRep_2b_2k(os, cparams, iface); + build_TwoPRep_2b_2k(os, "eri", cparams, iface); #else for (unsigned int d = 0; d <= LIBINT_INCLUDE_ERI; ++d) { - build_TwoPRep_2b_2k(os, cparams, iface, d); + build_TwoPRep_2b_2k(os, "eri", cparams, iface, d); } #endif #endif + +#ifdef LIBINT_INCLUDE_RKB_ERI + for (unsigned int d = 0; d <= LIBINT_INCLUDE_RKB_ERI; ++d) { +#define BOOST_PP_RKB_ERI_MCR7(r, data, i, elem) \ + build_TwoPRep_2b_2k( \ + os, BOOST_PP_STRINGIZE(elem), cparams, iface, d); + BOOST_PP_LIST_FOR_EACH_I(BOOST_PP_RKB_ERI_MCR7, _, + BOOST_PP_RKB_ERI_TASK_LIST) +#undef BOOST_PP_RKB_ERI_MCR7 + } +#endif + #ifdef LIBINT_INCLUDE_ERI3 for (unsigned int d = 0; d <= LIBINT_INCLUDE_ERI3; ++d) { build_TwoPRep_1b_2k(os, cparams, iface, d); @@ -987,18 +1063,22 @@ void print_config(std::ostream& os) { os << "Will support G12DKH" << endl; #endif #ifdef LIBINT_INCLUDE_RKB_ERI - os << "RKB works " << std::endl; + os << "Will support restricted kinetically balance (RKB) 4-center ERIs " + << std::endl; #endif } - #ifdef LIBINT_INCLUDE_ERI -void build_TwoPRep_2b_2k(std::ostream& os, +template +void build_TwoPRep_2b_2k(std::ostream& os, std::string label, const std::shared_ptr& cparams, std::shared_ptr& iface, unsigned int deriv_level) { - const std::string task = task_label("eri", deriv_level); - typedef TwoPRep_11_11_sq TwoPRep_sh_11_11; + typedef GenIntegralSet_11_11 TwoBody_sh_11_11; + typedef typename OperType::Descriptor OperDescrType; + + const std::string task = task_label(label, deriv_level); + vector shells; unsigned int lmax = cparams->max_am(task); for (unsigned int l = 0; l <= lmax; l++) { @@ -1010,6 +1090,7 @@ void build_TwoPRep_2b_2k(std::ostream& os, taskmgr.current(task); iface->to_params(iface->macro_define(std::string("MAX_AM_") + task, lmax)); + const auto nullaux = typename TwoBody_sh_11_11::AuxIndexType(0u); // // Construct graphs for each desired target integral and // 1) generate source code for the found traversal path @@ -1041,9 +1122,25 @@ void build_TwoPRep_2b_2k(std::ostream& os, const int lim = 1; if (!(la == lim && lb == lim && lc == lim && ld == lim)) continue; #endif + // this will hold all target shell sets + std::vector> targets; + + ///////////////////////////////// + // loop over operator components + ///////////////////////////////// + // most important operators have 1 component ... + std::vector descrs(1); // operator descriptors + // important EXCEPTION: multipole moments + if (std::is_same::value) { + // reset descriptors array + descrs.resize(0); + // iterate over quaternion components + for (int p = 0; p != 4; ++p) { + descrs.emplace_back(make_descr(p)); + } + } - // unroll only if max_am <= cparams->max_am_opt(task) - using std::max; + // unroll only if max_am <= cparams->max_am_opt(task) using std::max; const unsigned int max_am = max(max(la, lb), max(lc, ld)); const bool need_to_optimize = (max_am <= cparams->max_am_opt(task)); const bool need_to_unroll = @@ -1072,7 +1169,7 @@ void build_TwoPRep_2b_2k(std::ostream& os, //////////// // NB translational invariance is now handled by CR_DerivGauss CartesianDerivIterator<4> diter(deriv_level); - std::vector> targets; + bool last_deriv = false; do { CGShell a(la); @@ -1089,18 +1186,22 @@ void build_TwoPRep_2b_2k(std::ostream& os, } } - std::shared_ptr abcd = - TwoPRep_sh_11_11::Instance(a, b, c, d, mType(0u)); - targets.push_back(abcd); + // operator component loop + for (unsigned int op = 0; op != descrs.size(); ++op) { + OperType oper(descrs[op]); + + std::shared_ptr abcd = + TwoBody_sh_11_11::Instance(a, b, c, d, nullaux, oper); + targets.push_back(abcd); + } + last_deriv = diter.last(); if (!last_deriv) diter.next(); } while (!last_deriv); // append all derivatives as targets to the graph - for (std::vector>::const_iterator - t = targets.begin(); - t != targets.end(); ++t) { + for (auto it = targets.begin(); it != targets.end(); ++it) { std::shared_ptr t_ptr = - std::dynamic_pointer_cast(*t); + std::dynamic_pointer_cast(*it); dg_xxxx->append_target(t_ptr); } @@ -1112,23 +1213,35 @@ void build_TwoPRep_2b_2k(std::ostream& os, CGShell b(lb); CGShell c(lc); CGShell d(ld); - std::shared_ptr abcd = - TwoPRep_sh_11_11::Instance(a, b, c, d, mType(0u)); - abcd_label = abcd->label(); + + if constexpr (!std::is_same::value) { + OperType oper; + oper = OperType(descrs[0]); + std::shared_ptr abcd = + TwoBody_sh_11_11::Instance(a, b, c, d, nullaux, oper); + abcd_label = abcd->label(); + } else { + std::ostringstream oss; + oss << cparams->api_prefix(); + oss << "_" << a.label() << "_" << b.label(); + oss << "_" << label; + oss << "_" << c.label() << "_" << d.label(); + abcd_label = oss.str(); + } } // + derivative level (if deriv_level > 0) - std::string label; + std::string eval_label; { - label = cparams->api_prefix(); + eval_label = cparams->api_prefix(); if (deriv_level != 0) { std::ostringstream oss; oss << "deriv" << deriv_level; - label += oss.str(); + eval_label += oss.str(); } - label += abcd_label; + eval_label += abcd_label; } - std::cout << "working on " << label << " ... "; + std::cout << "working on " << eval_label << " ... "; std::cout.flush(); std::string prefix(cparams->source_directory()); @@ -1138,7 +1251,8 @@ void build_TwoPRep_2b_2k(std::ostream& os, // this will generate code for these targets, and potentially generate // code for its prerequisites GenerateCode(dg_xxxx, context, cparams, strat, tactic, memman, - decl_filenames, def_filenames, prefix, label, false); + decl_filenames, def_filenames, prefix, eval_label, + false); // update max stack size and # of targets const std::shared_ptr& tparams = @@ -1153,7 +1267,7 @@ void build_TwoPRep_2b_2k(std::ostream& os, oss << context->label_to_name(cparams->api_prefix()) << "libint2_build_" << task << "[" << la << "][" << lb << "][" << lc << "][" << ld - << "] = " << context->label_to_name(label_to_funcname(label)) + << "] = " << context->label_to_name(label_to_funcname(eval_label)) << context->end_of_stat() << endl; iface->to_static_init(oss.str()); @@ -1369,10 +1483,9 @@ void build_TwoPRep_1b_2k(std::ostream& os, iface->to_static_init(oss.str()); // need to declare this function internally - for (std::deque::const_iterator i = decl_filenames.begin(); - i != decl_filenames.end(); ++i) { + for (auto& decl_filename : decl_filenames) { oss.str(""); - oss << "#include <" << *i << ">" << endl; + oss << "#include <" << decl_filename << ">" << endl; iface->to_int_iface(oss.str()); } @@ -1381,7 +1494,7 @@ void build_TwoPRep_1b_2k(std::ostream& os, #endif dg_xxx->reset(); memman->reset(); - + std::cout << "done" << std::endl; } // end of d loop } // end of c loop } // end of bra loop @@ -1566,10 +1679,9 @@ void build_TwoPRep_1b_1k(std::ostream& os, iface->to_static_init(oss.str()); // need to declare this function internally - for (std::deque::const_iterator i = decl_filenames.begin(); - i != decl_filenames.end(); ++i) { + for (auto& decl_filename : decl_filenames) { oss.str(""); - oss << "#include <" << *i << ">" << endl; + oss << "#include <" << decl_filename << ">" << endl; iface->to_int_iface(oss.str()); } @@ -1578,7 +1690,7 @@ void build_TwoPRep_1b_1k(std::ostream& os, #endif dg_xxx->reset(); memman->reset(); - + std::cout << "done" << std::endl; } // end of ket loop } // end of bra loop } diff --git "a/src/bin/libint/comp_11_Coulomb\317\203p\317\203p_11.h" "b/src/bin/libint/comp_11_Coulomb\317\203p\317\203p_11.h" new file mode 100644 index 000000000..252eabf5d --- /dev/null +++ "b/src/bin/libint/comp_11_Coulomb\317\203p\317\203p_11.h" @@ -0,0 +1,167 @@ +/* + * Copyright (C) 2004-2024 Edward F. Valeev + * + * This file is part of Libint compiler. + * + * Libint compiler is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Libint compiler is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Libint compiler. If not, see . + * + */ + +#ifndef LIBINT_COMP_11_COULOMBΣPΣP_11_H +#define LIBINT_COMP_11_COULOMBΣPΣP_11_H + +#include +#include +#include + +namespace libint2 { + +/** + * this computes integral of + * \f$ \frac{1}{r_{ij}} \sigma \cdot \hat{p}_1 \sigma \cdot \hat{p}_2 \f$ over + * CGShell/CGF by rewriting it as a linear combination of integrals over + * derivatives of \frac{1}{r_{ij}} + * @tparam F basis function type. valid choices are CGShell or CGF + */ +template +class CR_11_Coulombσpσp_11 + : public GenericRecurrenceRelation< + CR_11_Coulombσpσp_11, F, + GenIntegralSet_11_11> { + public: + typedef CR_11_Coulombσpσp_11 ThisType; + typedef F BasisFunctionType; + typedef CoulombσpσpOper OperType; + typedef GenIntegralSet_11_11 TargetType; + typedef GenericRecurrenceRelation + ParentType; + friend class GenericRecurrenceRelation; + static const unsigned int max_nchildren = 100; // TODO figure out + + using ParentType::Instance; + + static bool directional() { return false; } + + private: + using ParentType::is_simple; + using ParentType::target_; + using ParentType::RecurrenceRelation::expr_; + using ParentType::RecurrenceRelation::nflops_; + + /// Constructor is private, used by ParentType::Instance that maintains + /// registry of these objects + CR_11_Coulombσpσp_11(const std::shared_ptr &, unsigned int = 0); + + static std::string descr() { return "CR"; } +}; + +template +CR_11_Coulombσpσp_11::CR_11_Coulombσpσp_11( + const std::shared_ptr &Tint, unsigned int) + : ParentType(Tint, 0) { + assert(Tint->num_func_bra(/* particle */ 0) == 1); + assert(Tint->num_func_bra(/* particle */ 1) == 1); + assert(Tint->num_func_ket(/* particle */ 0) == 1); + assert(Tint->num_func_ket(/* particle */ 1) == 1); + + F a(Tint->bra(0, 0)); + F b(Tint->ket(0, 0)); + F c(Tint->bra(1, 0)); + F d(Tint->ket(1, 0)); + + const auto &oper = Tint->oper(); + + // can express integrals of Coulombσpσp in terms of + // derivative integrals of 1/r12 for primitive Gaussians + // only + if (a.contracted() || b.contracted() || c.contracted() || d.contracted()) + return; + + using namespace libint2::algebra; + using namespace libint2::prefactor; + using libint2::algebra::operator*; + + const mType zero_m(0u); + + ChildFactory> + factory(this); + + constexpr auto x = 0; + constexpr auto y = 1; + constexpr auto z = 2; + + F c_x{c}; + c_x.deriv().inc(x); // d(c)/dx = c_x + F c_y{c}; + c_y.deriv().inc(y); // d(c)/dy = c_y + F c_z{c}; + c_z.deriv().inc(z); // d(c)/dz = c_z + + F d_x{d}; + d_x.deriv().inc(x); // d(d)/dx = d_x + F d_y{d}; + d_y.deriv().inc(y); // d(d)/dy = d_y + F d_z{d}; + d_z.deriv().inc(z); // d(d)/dz = d_z + + // Component wise generation for quaternion ( a b | 1/r12 | (σ.p) c (σ.p) d ) + switch (oper->descr().quaternion_index()) { + case 0: { + // zeroth component = (a b | c_x d_x) + (a b | c_y d_y) + (a b | c_z d_z) + auto a_b_cx_dx = factory.make_child(a, b, c_x, d_x, zero_m); + auto a_b_cy_dy = factory.make_child(a, b, c_y, d_y, zero_m); + auto a_b_cz_dz = factory.make_child(a, b, c_z, d_z, zero_m); + if (is_simple()) { + expr_ = a_b_cx_dx + a_b_cy_dy + a_b_cz_dz; + nflops_ += 2; + } + } break; + case 1: { + // x component = (a b | c_y d_z) - (a b | c_z d_y) + auto a_b_cy_dz = factory.make_child(a, b, c_y, d_z, zero_m); + auto a_b_cz_dy = factory.make_child(a, b, c_z, d_y, zero_m); + if (is_simple()) { + expr_ = a_b_cy_dz - a_b_cz_dy; + nflops_ += 1; + } + } break; + case 2: { + // y component = (a b | c_z d_x) - (a b | c_x d_z) + auto a_b_cz_dx = factory.make_child(a, b, c_z, d_x, zero_m); + auto a_b_cx_dz = factory.make_child(a, b, c_x, d_z, zero_m); + if (is_simple()) { + expr_ = a_b_cz_dx - a_b_cx_dz; + nflops_ += 1; + } + } break; + case 3: { + // z component = (a b | c_x d_y) - (a b | c_y d_x) + auto a_b_cx_dy = factory.make_child(a, b, c_x, d_y, zero_m); + auto a_b_cy_dx = factory.make_child(a, b, c_y, d_x, zero_m); + if (is_simple()) { + expr_ = a_b_cx_dy - a_b_cy_dx; + nflops_ += 1; + } + } break; + default: + throw std::runtime_error( + "CR_11_Coulombσpσp_11: invalid quaternionic index"); + } + +} // CR_11_Coulombσpσp_11::CR_11_Coulombσpσp_11 +}; // namespace libint2 + +#endif // LIBINT_COMP_11_COULOMBΣPΣP_11_H diff --git "a/src/bin/libint/comp_1_\317\203pV\317\203p_1.h" "b/src/bin/libint/comp_1_\317\203pV\317\203p_1.h" index cb131ebb8..9fbdef361 100644 --- "a/src/bin/libint/comp_1_\317\203pV\317\203p_1.h" +++ "b/src/bin/libint/comp_1_\317\203pV\317\203p_1.h" @@ -107,7 +107,7 @@ CR_1_σpVσp_1::CR_1_σpVσp_1(const std::shared_ptr &Tint, // (a|W0|b) = (d a/dAx | V | d b/dBx) + (d a/dAy | V | d b/dBy) + (d a/dAz | V // | d b/dBz) - switch (oper->descr().pauli_index()) { + switch (oper->descr().quaternion_index()) { case 0: { auto Dx_a_V_Dx_b = factory.make_child(Dx_a, Dx_b, zero_m); auto Dy_a_V_Dy_b = factory.make_child(Dy_a, Dy_b, zero_m); @@ -146,7 +146,7 @@ CR_1_σpVσp_1::CR_1_σpVσp_1(const std::shared_ptr &Tint, } } break; default: - throw std::runtime_error("CR_1_σpVσp_1: invalid Pauli index"); + throw std::runtime_error("CR_1_σpVσp_1: invalid quaternionic index"); } } // CR_1_σpVσp_1::CR_1_σpVσp_1 diff --git a/src/bin/libint/master_ints_list.h b/src/bin/libint/master_ints_list.h index 1aa8c3e64..c5a6f4655 100644 --- a/src/bin/libint/master_ints_list.h +++ b/src/bin/libint/master_ints_list.h @@ -106,6 +106,9 @@ typedef GenIntegralSet_1_1, CartesianMultipoleOper<1u>, ////////////////////////// typedef GenIntegralSet_11_11 TwoPRep_11_11_sq; typedef GenIntegralSet_11_11 TwoPRep_11_11_int; +typedef GenIntegralSet_11_11 + Coulombσpσp_11_11_sq; +typedef GenIntegralSet_11_11 Coulombσpσp_11_11_int; typedef GenIntegralSet_11_11 R12kG12_11_11_sq; typedef GenIntegralSet_11_11 R12kG12_11_11_int; typedef GenIntegralSet_11_11 @@ -144,7 +147,8 @@ typedef boost::mpl::list< CMultipole_1_1_int_y, CMultipole_1_1_int_z, SMultipole_1_1_sh, SMultipole_1_1_int, #endif - TwoPRep_11_11_sq, TwoPRep_11_11_int, R12kG12_11_11_sq, R12kG12_11_11_int, + TwoPRep_11_11_sq, TwoPRep_11_11_int, Coulombσpσp_11_11_sq, + Coulombσpσp_11_11_int, R12kG12_11_11_sq, R12kG12_11_11_int, R12kR12lG12_11_11_sq, R12kR12lG12_11_11_int, TiG12_11_11_sq, TiG12_11_11_int, G12TiG12_11_11_sq, G12TiG12_11_11_int, DivG12prime_xTx_11_11_sq, DivG12prime_xTx_11_11_int, diff --git a/src/bin/libint/master_rrs_list.h b/src/bin/libint/master_rrs_list.h index f3ec4e2d2..32e56878f 100644 --- a/src/bin/libint/master_rrs_list.h +++ b/src/bin/libint/master_rrs_list.h @@ -21,6 +21,7 @@ #ifndef _libint2_src_bin_libint_masterrrslist_h_ #define _libint2_src_bin_libint_masterrrslist_h_ +#include #include #include #include @@ -266,6 +267,8 @@ typedef CR_DerivGauss Deriv_d_11_TwoPRep_11_int; +typedef CR_11_Coulombσpσp_11 CR_11_Coulombσpσp_11_sh; +typedef CR_11_Coulombσpσp_11 CR_11_Coulombσpσp_11_int; }; // namespace libint2 #endif // header guard diff --git a/src/bin/libint/oper.h b/src/bin/libint/oper.h index 091df3ac0..daa6aa644 100644 --- a/src/bin/libint/oper.h +++ b/src/bin/libint/oper.h @@ -289,22 +289,22 @@ BOOST_PP_LIST_FOR_EACH(BOOST_PP_DECLARE_HERMITIAN_ONEBODY_DESCRIPTOR, struct σpVσp_Descr : public Contractable<σpVσp_Descr> { typedef MultiplicativeODep1Body_Props Properties; - σpVσp_Descr() : pauli_index_(0) {} - σpVσp_Descr(int pauli_index) : pauli_index_(pauli_index) { - assert(pauli_index <= 3); + σpVσp_Descr() : quaternion_index_(0) {} + σpVσp_Descr(int quaternion_index) : quaternion_index_(quaternion_index) { + assert(quaternion_index <= 3); } static const unsigned int max_key = 4; - unsigned int key() const { return pauli_index(); } + unsigned int key() const { return quaternion_index(); } std::string description() const { std::string descr("opVop["); - if (pauli_index() == 0) + if (quaternion_index() == 0) descr += "0"; - else if (pauli_index() == 1) + else if (quaternion_index() == 1) descr += "Z"; - else if (pauli_index() == 2) + else if (quaternion_index() == 2) descr += "X"; - else if (pauli_index() == 3) + else if (quaternion_index() == 3) descr += "Y"; else abort(); @@ -314,10 +314,10 @@ struct σpVσp_Descr : public Contractable<σpVσp_Descr> { int psymm(int i, int j) const { abort(); } int hermitian(int i) const { return +1; } - int pauli_index() const { return pauli_index_; } + int quaternion_index() const { return quaternion_index_; } private: - const int pauli_index_ = -1; + const int quaternion_index_ = -1; }; typedef GenOper<σpVσp_Descr> σpVσpOper; @@ -399,6 +399,44 @@ struct TwoPRep_Descr : public Contractable { }; typedef GenOper TwoPRep; +/** Coulombσpσp is the two-body repulsion operator. + */ +struct Coulombσpσp_Descr : public Contractable { + typedef MultiplicativeSymm2Body_Props Properties; + + Coulombσpσp_Descr() : quaternion_index_(0) {} + Coulombσpσp_Descr(int quaternion_index) + : quaternion_index_(quaternion_index) { + assert(quaternion_index <= 3); + } + + static const unsigned int max_key = 4; + unsigned int key() const { return quaternion_index(); } + std::string description() const { + std::string descr("coulomb_opop["); + if (quaternion_index() == 0) + descr += "0"; + else if (quaternion_index() == 1) + descr += "Z"; + else if (quaternion_index() == 2) + descr += "X"; + else if (quaternion_index() == 3) + descr += "Y"; + else + abort(); + return descr + "]"; + } + std::string label() const { return description(); } + int psymm(int i, int j) const { abort(); } + int hermitian(int i) const { return +1; } + + int quaternion_index() const { return quaternion_index_; } + + private: + const int quaternion_index_ = -1; +}; +typedef GenOper CoulombσpσpOper; + /** GTG_1d is the two-body 1-dimensional Gaussian geminal */ struct GTG_1d_Descr : public Contractable { diff --git a/src/bin/libint/strategy.cc b/src/bin/libint/strategy.cc index bbfc3fe21..bcab9dcad 100644 --- a/src/bin/libint/strategy.cc +++ b/src/bin/libint/strategy.cc @@ -115,6 +115,15 @@ struct MasterStrategy { }; #endif +template <> +struct MasterStrategy { + typedef boost::mpl::list value; +}; +template <> +struct MasterStrategy { + typedef boost::mpl::list value; +}; + #if LIBINT_SHELLQUARTET_STRATEGY == LIBINT_SHELLQUARTET_STRATEGY_A0C0 template <> struct MasterStrategy { From 05e9ff5c31bc4f3670f6b165f4851ef9169a2112 Mon Sep 17 00:00:00 2001 From: Kshitij Surjuse Date: Mon, 9 Feb 2026 11:24:33 -0500 Subject: [PATCH 03/22] =?UTF-8?q?bugfix:=20added=20the=20missing=20`make?= =?UTF-8?q?=5Fdescr`?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cmake/modules/int_am.cmake | 2 + include/libint2/cxxapi.h | 2 +- include/libint2/engine.h | 12 ++++++ include/libint2/engine.impl.h | 74 +++++++++++++++++++--------------- src/bin/libint/build_libint.cc | 6 +++ 5 files changed, 62 insertions(+), 34 deletions(-) diff --git a/cmake/modules/int_am.cmake b/cmake/modules/int_am.cmake index 782048aaf..cc86b7aa7 100644 --- a/cmake/modules/int_am.cmake +++ b/cmake/modules/int_am.cmake @@ -434,6 +434,8 @@ foreach(_cls ONEBODY;ERI;RKB_ERI;ERI3;ERI2;G12;G12DKH) list(APPEND _amlist "onebody_${_am${_l}}${_am${_l}}_d${_d}") elseif (_cls STREQUAL "G12") list(APPEND _amlist "g12_${_am${_l}}${_am${_l}}${_am${_l}}${_am${_l}}_d${_d}") + elseif (_cls STREQUAL "RKB_ERI") + list(APPEND _amlist "rkb_eri_${_am${_l}}${_am${_l}}${_am${_l}}${_am${_l}}_d${_d}") endif() endforeach() if (_cls STREQUAL "ERI3") diff --git a/include/libint2/cxxapi.h b/include/libint2/cxxapi.h index a54802817..22686f958 100644 --- a/include/libint2/cxxapi.h +++ b/include/libint2/cxxapi.h @@ -37,7 +37,7 @@ !(defined(LIBINT_INCLUDE_ERI) || defined(LIBINT_INCLUDE_ERI3) || \ defined(LIBINT_INCLUDE_ERI2) || defined(LIBINT_INCLUDE_RKB_ERI)) #error \ - "C++ API is only supported if both 1-body and some (eri, eri3, eri2) 2-body integrals are enabled" + "C++ API is only supported if both 1-body and some (eri, eri3, eri2, rkb_eri) 2-body integrals are enabled" #endif #include diff --git a/include/libint2/engine.h b/include/libint2/engine.h index 7e29eb710..81048b049 100644 --- a/include/libint2/engine.h +++ b/include/libint2/engine.h @@ -153,6 +153,9 @@ enum class Operator { coulomb, /// alias for Operator::coulomb r12_m1 = coulomb, + /// (2-body) \f$ r_{12}^{-1} (σ.p_{k1})(σ.p_{k2})\f$ where k1 & k2 are + /// centers of ket1 and ket2, respectively + coulomb_opop, /// contracted Gaussian geminal cgtg, /// contracted Gaussian geminal times Coulomb @@ -246,6 +249,7 @@ struct operator_traits typedef const libint2::FmEval_Reference core_eval_type; #endif }; + template <> struct operator_traits : public operator_traits { @@ -346,6 +350,14 @@ struct operator_traits typedef const libint2::FmEval_Reference core_eval_type; #endif }; + +template <> +struct operator_traits + : public operator_traits { + static constexpr auto nopers = 4; + static constexpr auto intrinsic_deriv_order = 2; +}; + namespace detail { template struct cgtg_operator_traits : public detail::default_operator_traits { diff --git a/include/libint2/engine.impl.h b/include/libint2/engine.impl.h index 05d5808ea..8e107449b 100644 --- a/include/libint2/engine.impl.h +++ b/include/libint2/engine.impl.h @@ -70,30 +70,31 @@ typename std::remove_all_extents::type* to_ptr1(T (&a)[N]) { /// These MUST appear in the same order as in Operator. /// You must also update BOOST_PP_NBODY_OPERATOR_LAST_ONEBODY_INDEX when you add /// one-body ints -#define BOOST_PP_NBODY_OPERATOR_LIST \ - (overlap, /* overlap */ \ - (kinetic, /* kinetic */ \ - (elecpot, /* nuclear */ \ - (elecpot, /* erf_nuclear */ \ - (elecpot, /* erfc_nuclear */ \ - (elecpot, /* erfx_nuclear */ \ - (1emultipole, /* emultipole1 */ \ - (2emultipole, /* emultipole2 */ \ - (3emultipole, /* emultipole3 */ \ - (sphemultipole, /* sphemultipole */ \ - (opVop, /* opVop */ \ - (eri, /* delta */ \ - (eri, /* coulomb */ \ - (eri, /* cgtg */ \ - (eri, /* cgtg_x_coulomb */ \ - (eri, /* delcgtg2 */ \ - (eri, /* r12 */ \ - (eri, /* erf_coulomb */ \ - (eri, /* erfc_coulomb */ \ - (eri, /* erfx_coulomb */ \ - (eri, /* stg */ \ - (eri, /* yukawa */ \ - BOOST_PP_NIL)))))))))))))))))))))) +#define BOOST_PP_NBODY_OPERATOR_LIST \ + (overlap, /* overlap */ \ + (kinetic, /* kinetic */ \ + (elecpot, /* nuclear */ \ + (elecpot, /* erf_nuclear */ \ + (elecpot, /* erfc_nuclear */ \ + (elecpot, /* erfx_nuclear */ \ + (1emultipole, /* emultipole1 */ \ + (2emultipole, /* emultipole2 */ \ + (3emultipole, /* emultipole3 */ \ + (sphemultipole, /* sphemultipole */ \ + (opVop, /* opVop */ \ + (eri, /* delta */ \ + (eri, /* coulomb */ \ + (eri, /* coulomb_opop */ \ + (eri, /* cgtg */ \ + (eri, /* cgtg_x_coulomb */ \ + (eri, /* delcgtg2 */ \ + (eri, /* r12 */ \ + (eri, /* erf_coulomb */ \ + (eri, /* erfc_coulomb */ \ + (eri, /* erfx_coulomb */ \ + (eri, /* stg */ \ + (eri, /* yukawa */ \ + BOOST_PP_NIL))))))))))))))))))))))) #define BOOST_PP_NBODY_OPERATOR_INDEX_TUPLE \ BOOST_PP_MAKE_TUPLE(BOOST_PP_LIST_SIZE(BOOST_PP_NBODY_OPERATOR_LIST)) @@ -663,23 +664,23 @@ __libint2_engine_inline void Engine::initialize(size_t max_nprim) { // validate braket #ifndef LIBINT_INCLUDE_ONEBODY assert(braket_ != BraKet::x_x && - "this braket type not supported by the library; give --enable-1body " - "to configure"); + "this braket type not supported by the library; configure with " + "-DLIBINT_INCLUDE_ONEBODY >= 0"); #endif #ifndef LIBINT_INCLUDE_ERI assert(braket_ != BraKet::xx_xx && - "this braket type not supported by the library; give --enable-eri to " - "configure"); + "this braket type not supported by the library; configure with " + "-DLIBINT_INCLUDE_ERI >= 0"); #endif #ifndef LIBINT_INCLUDE_ERI3 assert((braket_ != BraKet::xs_xx && braket_ != BraKet::xx_xs) && - "this braket type not supported by the library; give --enable-eri3 to " - "configure"); + "this braket type not supported by the library; configure with " + "-DLIBINT_INCLUDE_ERI3 >= 0"); #endif #ifndef LIBINT_INCLUDE_ERI2 assert(braket_ != BraKet::xs_xs && - "this braket type not supported by the library; give --enable-eri2 to " - "configure"); + "this braket type not supported by the library; configure with " + "-DLIBINT_INCLUDE_ERI2 >= 0"); #endif // make sure it's no default initialized @@ -1421,7 +1422,7 @@ __libint2_engine_inline const Engine::target_ptr_vec& Engine::compute2( const scalar_type rho = gammap * gammaq * oogammapq; const scalar_type T = PQ2 * rho; auto* gm_ptr = &(primdata.LIBINT_T_SS_EREP_SS(0)[0]); - const auto mmax = l + deriv_order_; + const auto mmax = l + deriv_order_ + intrinsic_deriv_order(); if (!skip_core_ints) { switch (oper_) { @@ -1432,6 +1433,13 @@ __libint2_engine_inline const Engine::target_ptr_vec& Engine::compute2( .first(); core_eval_ptr->eval(gm_ptr, T, mmax); } break; + case Operator::coulomb_opop: { + const auto& core_eval_ptr = + any_cast&>(core_eval_pack_) + .first(); + core_eval_ptr->eval(gm_ptr, T, mmax); + } break; case Operator::cgtg_x_coulomb: { const auto& core_eval_ptr = any_cast σpVσp_Descr make_descr<σpVσp_Descr>(int p, int, int) { return σpVσp_Descr(p); } + +template <> +Coulombσpσp_Descr make_descr(int p, int, int) { + return Coulombσpσp_Descr(p); +} + } // namespace template From f3274a99e927bfaa40837831d64056e3762c2e55 Mon Sep 17 00:00:00 2001 From: Kshitij Surjuse Date: Tue, 10 Feb 2026 12:03:00 -0500 Subject: [PATCH 04/22] `Engine` can initialize for coulomb_opop operator --- include/libint2/engine.impl.h | 4 ++-- src/bin/libint/build_libint.cc | 14 ++++++++++++-- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/include/libint2/engine.impl.h b/include/libint2/engine.impl.h index 8e107449b..40d50763b 100644 --- a/include/libint2/engine.impl.h +++ b/include/libint2/engine.impl.h @@ -84,7 +84,7 @@ typename std::remove_all_extents::type* to_ptr1(T (&a)[N]) { (opVop, /* opVop */ \ (eri, /* delta */ \ (eri, /* coulomb */ \ - (eri, /* coulomb_opop */ \ + (coulomb_opop, /* coulomb_opop */ \ (eri, /* cgtg */ \ (eri, /* cgtg_x_coulomb */ \ (eri, /* delcgtg2 */ \ @@ -1462,7 +1462,7 @@ __libint2_engine_inline const Engine::target_ptr_vec& Engine::compute2( const detail::core_eval_pack_type&>( core_eval_pack_) .first(); - const auto& core_ints_params = + ` const auto& core_ints_params = any_cast::oper_params_type&>( core_ints_params_); diff --git a/src/bin/libint/build_libint.cc b/src/bin/libint/build_libint.cc index 1a3a93b95..8acb3814d 100644 --- a/src/bin/libint/build_libint.cc +++ b/src/bin/libint/build_libint.cc @@ -1071,6 +1071,9 @@ void print_config(std::ostream& os) { #ifdef LIBINT_INCLUDE_RKB_ERI os << "Will support restricted kinetically balance (RKB) 4-center ERIs " << std::endl; + if (LIBINT_INCLUDE_RKB_ERI > 0) + os << "(deriv order = " << LIBINT_INCLUDE_RKB_ERI << ")"; + os << endl; #endif } @@ -2262,6 +2265,12 @@ void config_to_api(const std::shared_ptr& cparams, iface->to_params(iface->macro_define("DERIV_ERI_ORDER", LIBINT_INCLUDE_ERI)); max_deriv_order = std::max(max_deriv_order, LIBINT_INCLUDE_ERI); #endif +#ifdef LIBINT_INCLUDE_RKB_ERI + iface->to_params(iface->macro_define("SUPPORT_RKB_ERI", 1)); + iface->to_params( + iface->macro_define("DERIV_RKB_ERI_ORDER", LIBINT_INCLUDE_RKB_ERI)); + max_deriv_order = std::max(max_deriv_order, LIBINT_INCLUDE_RKB_ERI); +#endif #ifdef LIBINT_INCLUDE_ERI3 iface->to_params(iface->macro_define("SUPPORT_ERI3", 1)); iface->to_params( @@ -2317,8 +2326,9 @@ void config_to_api(const std::shared_ptr& cparams, { // 2-body ints -#define BOOST_PP_TWOBODY_TASKOPER_TUPLE \ - ("eri", "r12kg12", "r12_0_g12", "r12_2_g12", "g12_T1_g12", "g12dkh") +#define BOOST_PP_TWOBODY_TASKOPER_TUPLE \ + ("eri", "coulomb_opop", "r12kg12", "r12_0_g12", "r12_2_g12", "g12_T1_g12", \ + "g12dkh") #define BOOST_PP_TWOBODY_TASKOPER_LIST \ BOOST_PP_TUPLE_TO_LIST(BOOST_PP_TWOBODY_TASKOPER_TUPLE) From 2bf62f641d175110b367c7ed0217cfc748b96fbb Mon Sep 17 00:00:00 2001 From: Kshitij Surjuse Date: Tue, 10 Feb 2026 21:20:58 -0500 Subject: [PATCH 05/22] use and invoke correct `buildfnptr`s if engine is initialized with`Operator::coulomb_opop` --- include/libint2/engine.impl.h | 7 ++++--- "src/bin/libint/comp_11_Coulomb\317\203p\317\203p_11.h" | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/include/libint2/engine.impl.h b/include/libint2/engine.impl.h index 40d50763b..3f5669d96 100644 --- a/include/libint2/engine.impl.h +++ b/include/libint2/engine.impl.h @@ -1462,7 +1462,7 @@ __libint2_engine_inline const Engine::target_ptr_vec& Engine::compute2( const detail::core_eval_pack_type&>( core_eval_pack_) .first(); - ` const auto& core_ints_params = + const auto& core_ints_params = any_cast::oper_params_type&>( core_ints_params_); @@ -1754,7 +1754,7 @@ __libint2_engine_inline const Engine::target_ptr_vec& Engine::compute2( #endif // prefactors for derivative ERI relations - if (deriv_order_ > 0) { + if (deriv_order_ + intrinsic_deriv_order() > 0) { #if LIBINT2_DEFINED(eri, alpha1_rho_over_zeta2) primdata.alpha1_rho_over_zeta2[0] = alpha0 * (oogammap * gammaq_o_gammapgammaq); @@ -1837,7 +1837,8 @@ __libint2_engine_inline const Engine::target_ptr_vec& Engine::compute2( } // compute directly (ss|ss) - const auto compute_directly = lmax == 0 && deriv_order_ == 0; + const auto compute_directly = + lmax == 0 && deriv_order_ == 0 & intrinsic_deriv_order() == 0; if (compute_directly) { #ifdef LIBINT2_ENGINE_TIMERS diff --git "a/src/bin/libint/comp_11_Coulomb\317\203p\317\203p_11.h" "b/src/bin/libint/comp_11_Coulomb\317\203p\317\203p_11.h" index 252eabf5d..82ee512ef 100644 --- "a/src/bin/libint/comp_11_Coulomb\317\203p\317\203p_11.h" +++ "b/src/bin/libint/comp_11_Coulomb\317\203p\317\203p_11.h" @@ -1,5 +1,5 @@ /* - * Copyright (C) 2004-2024 Edward F. Valeev + * Copyright (C) 2004-2026 Edward F. Valeev * * This file is part of Libint compiler. * From b6c376be3812b1195b323c3c895c9230f40be1eb Mon Sep 17 00:00:00 2001 From: Kshitij Surjuse Date: Wed, 11 Feb 2026 13:18:47 -0500 Subject: [PATCH 06/22] Apply @loriab 's review suggestions --- include/libint2/config.h.cmake.in | 4 ++-- include/libint2/engine.impl.h | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/include/libint2/config.h.cmake.in b/include/libint2/config.h.cmake.in index 6018873e5..3eb32e2f0 100644 --- a/include/libint2/config.h.cmake.in +++ b/include/libint2/config.h.cmake.in @@ -129,13 +129,13 @@ /* Max optimized AM for ERI and its derivatives */ #cmakedefine LIBINT_ERI_OPT_AM_LIST "@LIBINT_ERI_OPT_AM_LIST@" -/* Max AM for RKB_ERI (same for all derivatives; if not defined see LIBINT_ERI_MAX_AM_LIST) */ +/* Max AM for RKB_ERI (same for all derivatives; if not defined see LIBINT_RKB_ERI_MAX_AM_LIST) */ #cmakedefine LIBINT_RKB_ERI_MAX_AM @LIBINT_RKB_ERI_MAX_AM@ /* Max AM for RKB_ERI and its derivatives */ #cmakedefine LIBINT_RKB_ERI_MAX_AM_LIST "@LIBINT_RKB_ERI_MAX_AM_LIST@" -/* Max optimized AM for ERI (same for all derivatives; if not defined see LIBINT_ERI_OPT_AM_LIST) */ +/* Max optimized AM for ERI (same for all derivatives; if not defined see LIBINT_RKB_ERI_OPT_AM_LIST) */ #cmakedefine LIBINT_RKB_ERI_OPT_AM @LIBINT_RKB_ERI_OPT_AM@ /* Max optimized AM for ERI and its derivatives */ diff --git a/include/libint2/engine.impl.h b/include/libint2/engine.impl.h index 3f5669d96..9381b972a 100644 --- a/include/libint2/engine.impl.h +++ b/include/libint2/engine.impl.h @@ -665,22 +665,22 @@ __libint2_engine_inline void Engine::initialize(size_t max_nprim) { #ifndef LIBINT_INCLUDE_ONEBODY assert(braket_ != BraKet::x_x && "this braket type not supported by the library; configure with " - "-DLIBINT_INCLUDE_ONEBODY >= 0"); + "-DLIBINT2_ENABLE_ONEBODY >= 0"); #endif #ifndef LIBINT_INCLUDE_ERI assert(braket_ != BraKet::xx_xx && "this braket type not supported by the library; configure with " - "-DLIBINT_INCLUDE_ERI >= 0"); + "-DLIBINT2_ENABLE_ERI >= 0"); #endif #ifndef LIBINT_INCLUDE_ERI3 assert((braket_ != BraKet::xs_xx && braket_ != BraKet::xx_xs) && "this braket type not supported by the library; configure with " - "-DLIBINT_INCLUDE_ERI3 >= 0"); + "-DLIBINT2_ENABLE_ERI3 >= 0"); #endif #ifndef LIBINT_INCLUDE_ERI2 assert(braket_ != BraKet::xs_xs && "this braket type not supported by the library; configure with " - "-DLIBINT_INCLUDE_ERI2 >= 0"); + "-DLIBINT2_ENABLE_ERI2 >= 0"); #endif // make sure it's no default initialized From 0aa7b83f6116d8140563eace3d8119fa777dc5f5 Mon Sep 17 00:00:00 2001 From: Kshitij Surjuse Date: Fri, 13 Feb 2026 19:16:53 -0500 Subject: [PATCH 07/22] Added unittest for `Operator::coulomb_opop` and fixed logic errors in unique am shell sets and phase change for this operator --- export/tests/unit/test-2body.cc | 167 ++++++++++++++++++++++++++++++-- include/libint2/engine.impl.h | 38 +++++--- src/bin/libint/build_libint.cc | 15 ++- src/bin/libint/oper.h | 12 +-- 4 files changed, 198 insertions(+), 34 deletions(-) diff --git a/export/tests/unit/test-2body.cc b/export/tests/unit/test-2body.cc index fd602a910..ee08ed972 100644 --- a/export/tests/unit/test-2body.cc +++ b/export/tests/unit/test-2body.cc @@ -1,5 +1,5 @@ /* - * Copyright (C) 2004-2024 Edward F. Valeev + * Copyright (C) 2004-2026 Edward F. Valeev * * This file is part of Libint library. * @@ -344,6 +344,153 @@ TEST_CASE("eri geometric derivatives", "[engine][2-body]") { } } +TEST_CASE("RKB Coulomb integrals", "[engine][2-body]") { + std::vector obs{ + // pseudorandom s + Shell{{1.0, 0.3}, {{0, false, {0.9, 0.3}}}, {{0.0, 0.0, 0.0}}}, + // pseudorandom p + Shell{{2.0, 0.4}, {{1, false, {0.8, -0.2}}}, {{1.0, 1.0, 1.0}}}}; + + const auto max_nprim = libint2::max_nprim(obs); + const auto max_l = libint2::max_l(obs); + typedef std::array der_idx; + + SECTION("Coulombσpσp") { + Engine engine; + try { + engine = Engine(Operator::coulomb_opop, max_nprim, max_l, 0); + // TODO: need another unit test for derivatives of RKB ERIs + } catch ( + Engine::lmax_exceeded &) { // skip the test if lmax exceeded or libint2 + // not configured with RKB support + return; + } + + const auto nshell = obs.size(); + for (int s0 = 0; s0 != nshell; ++s0) { + for (int s1 = 0; s1 != nshell; ++s1) { + for (int s2 = 0; s2 != nshell; ++s2) { + for (int s3 = 0; s3 != nshell; ++s3) { + const auto &results = + engine.compute(obs[s0], obs[s1], obs[s2], obs[s3]); + assert(results.size() == + 4); // we get 4 buffers for each quaternion component + + LIBINT2_REF_REALTYPE Aref[3]; + for (int i = 0; i < 3; ++i) Aref[i] = obs[s0].O[i]; + LIBINT2_REF_REALTYPE Bref[3]; + for (int i = 0; i < 3; ++i) Bref[i] = obs[s1].O[i]; + LIBINT2_REF_REALTYPE Cref[3]; + for (int i = 0; i < 3; ++i) Cref[i] = obs[s2].O[i]; + LIBINT2_REF_REALTYPE Dref[3]; + for (int i = 0; i < 3; ++i) Dref[i] = obs[s3].O[i]; + + int ijkl = 0; + + int l0, m0, n0; + FOR_CART(l0, m0, n0, obs[s0].contr[0].l) + + int l1, m1, n1; + FOR_CART(l1, m1, n1, obs[s1].contr[0].l) + + int l2, m2, n2; + FOR_CART(l2, m2, n2, obs[s2].contr[0].l) + + int l3, m3, n3; + FOR_CART(l3, m3, n3, obs[s3].contr[0].l) + + std::array ref_coulomb_opop{0.0, 0.0, 0.0, + 0.0}; + uint p0123 = 0; + for (uint p0 = 0; p0 < obs[s0].nprim(); p0++) { + for (uint p1 = 0; p1 < obs[s1].nprim(); p1++) { + for (uint p2 = 0; p2 < obs[s2].nprim(); p2++) { + for (uint p3 = 0; p3 < obs[s3].nprim(); p3++, p0123++) { + const LIBINT2_REF_REALTYPE alpha0 = obs[s0].alpha[p0]; + const LIBINT2_REF_REALTYPE alpha1 = obs[s1].alpha[p1]; + const LIBINT2_REF_REALTYPE alpha2 = obs[s2].alpha[p2]; + const LIBINT2_REF_REALTYPE alpha3 = obs[s3].alpha[p3]; + + const LIBINT2_REF_REALTYPE c0 = obs[s0].contr[0].coeff[p0]; + const LIBINT2_REF_REALTYPE c1 = obs[s1].contr[0].coeff[p1]; + const LIBINT2_REF_REALTYPE c2 = obs[s2].contr[0].coeff[p2]; + const LIBINT2_REF_REALTYPE c3 = obs[s3].contr[0].coeff[p3]; + const LIBINT2_REF_REALTYPE c0123 = c0 * c1 * c2 * c3; + + auto eri_drr = [&](der_idx d_rr) { + return eri(d_rr.data(), l0, m0, n0, alpha0, Aref, l1, m1, + n1, alpha1, Bref, l2, m2, n2, alpha2, Cref, l3, + m3, n3, alpha3, Dref, 0); + }; + + // e.g. d_xx maps the derivative index of derivative w.r.t x + // coord of ket1 and x coord of ket2 in Chemist notation. + der_idx d_xx = {0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0}; + der_idx d_yy = {0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0}; + der_idx d_zz = {0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1}; + ref_coulomb_opop[0] += + c0123 * (eri_drr(d_xx) + eri_drr(d_yy) + eri_drr(d_zz)); + + der_idx d_yz = {0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1}; + der_idx d_zy = {0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0}; + ref_coulomb_opop[1] += + c0123 * (eri_drr(d_yz) - eri_drr(d_zy)); + + der_idx d_zx = {0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0}; + der_idx d_xz = {0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1}; + ref_coulomb_opop[2] += + c0123 * (eri_drr(d_zx) - eri_drr(d_xz)); + + der_idx d_xy = {0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0}; + der_idx d_yx = {0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0}; + ref_coulomb_opop[3] += + c0123 * (eri_drr(d_xy) - eri_drr(d_yx)); + } + } + } + } + + const double ABSOLUTE_DEVIATION_THRESHOLD = 5.0E-14; + const double RELATIVE_DEVIATION_THRESHOLD = + 1.0E-9; // For more detail on choice of these thresholds, see + // the comments in the TEST_CASE "eri geometric + // derivatives" + + std::array abs_errs; + std::array rel_abs_errs; + + for (auto comp = 0; comp < 4; ++comp) { + abs_errs[comp] = + abs(ref_coulomb_opop[comp] - results[comp][ijkl]); + rel_abs_errs[comp] = abs(abs_errs[comp] / ref_coulomb_opop[comp]); + bool not_ok = rel_abs_errs[comp] > RELATIVE_DEVIATION_THRESHOLD && + abs_errs[comp] > ABSOLUTE_DEVIATION_THRESHOLD; + // no 3^n prefactor here since the intrinsic deriv order is 2 + if (not_ok) { + std::cout << "(l0 l1| l2 l3) = " + << "(" << s0 << " " << s1 << " | " << s2 << " " << s3 + << ") " + << "Elem " << ijkl << " comp= " << comp + << " : ref = " << ref_coulomb_opop[comp] + << " libint = " << results[comp][ijkl] + << " relabs_error = " << rel_abs_errs[comp] + << " abs_error = " << abs_errs[comp] << std::endl; + } + REQUIRE(!not_ok); + } + + ++ijkl; + END_FOR_CART + END_FOR_CART + END_FOR_CART + END_FOR_CART + } + } + } + } + } +} + TEST_CASE("Erfx_Coulomb integrals", "[engine][2-body]") { // pseudorandom s shells std::vector obs{ @@ -374,12 +521,12 @@ TEST_CASE("Erfx_Coulomb integrals", "[engine][2-body]") { REQUIRE(results[0] != nullptr); switch (k) { /* VALIDATION WOLFRAM CODE: -(* Integral of Coulomb kernel damped by (\[Lambda] Erf[\[Omega] r] + \ -\[Sigma] Erfc[\[Omega] r]), over unit-normalized s functions, \ -see Eq 52 in DOI 10.1039/b605188j *) -F0[T_] := If[T == 0, 1, Sqrt[\[Pi]/T]*Erf[Sqrt[T]]/2]; -sN[a_] := ((2 a)/\[Pi])^(3/4); -VVeeErfx[\[Alpha]1_, A1_List, \[Alpha]2_, A2_List, \[Beta]1_, + (* Integral of Coulomb kernel damped by (\[Lambda] Erf[\[Omega] r] + \ + \[Sigma] Erfc[\[Omega] r]), over unit-normalized s functions, \ + see Eq 52 in DOI 10.1039/b605188j *) + F0[T_] := If[T == 0, 1, Sqrt[\[Pi]/T]*Erf[Sqrt[T]]/2]; + sN[a_] := ((2 a)/\[Pi])^(3/4); + VVeeErfx[\[Alpha]1_, A1_List, \[Alpha]2_, A2_List, \[Beta]1_, B1_List, \[Beta]2_, B2_List, \[Omega]_, \[Lambda]_, \[Sigma]_] := Module[{\[Gamma]1, \[Gamma]2, P1, P2, K1, K2, T, result, \[Rho]}, \[Gamma]1 = \[Alpha]1 + \[Beta]1; @@ -397,13 +544,13 @@ VVeeErfx[\[Alpha]1_, A1_List, \[Alpha]2_, A2_List, \[Beta]1_, T]) sN[\[Alpha]1] sN[\[Alpha]2] sN[\[Beta]1] sN[\[Beta]2]; Return[result]; ]; -Print[CForm[ + Print[CForm[ N[VVeeErfx[1, {0, 0, 0}, 3, {2, 2, 2}, 2, {1, 1, 1}, 4, {3, 3, 3}, 1.1, 1, 0], 20]]] -Print[CForm[ + Print[CForm[ N[VVeeErfx[1, {0, 0, 0}, 3, {2, 2, 2}, 2, {1, 1, 1}, 4, {3, 3, 3}, 1.1, 0, 1], 20]]] -Print[CForm[ + Print[CForm[ N[VVeeErfx[1, {0, 0, 0}, 3, {2, 2, 2}, 2, {1, 1, 1}, 4, {3, 3, 3}, 1.1, 2, 3], 20]]] */ diff --git a/include/libint2/engine.impl.h b/include/libint2/engine.impl.h index 9381b972a..ec570c045 100644 --- a/include/libint2/engine.impl.h +++ b/include/libint2/engine.impl.h @@ -1215,17 +1215,21 @@ __libint2_engine_inline const Engine::target_ptr_vec& Engine::compute2( LIBINT2_SHELLQUARTET_SET_STANDARD // standard angular momentum ordering const auto swap_tbra = (tbra1.contr[0].l < tbra2.contr[0].l); const auto swap_tket = (tket1.contr[0].l < tket2.contr[0].l); - const auto swap_braket = - ((braket_ == BraKet::xx_xx) && (tbra1.contr[0].l + tbra2.contr[0].l > - tket1.contr[0].l + tket2.contr[0].l)) || - braket_ == BraKet::xx_xs; + const auto swap_braket = ((braket_ == BraKet::xx_xx) && + (tbra1.contr[0].l + tbra2.contr[0].l < + tket1.contr[0].l + tket2.contr[0].l) && + (oper_ != Operator::coulomb_opop)) || + braket_ == BraKet::xx_xs; + // N.B. cannot swap bra and ket for coulomb_opop since the ket is mutated by + // this operator #else // orca angular momentum ordering const auto swap_tbra = (tbra1.contr[0].l > tbra2.contr[0].l); const auto swap_tket = (tket1.contr[0].l > tket2.contr[0].l); - const auto swap_braket = - ((braket_ == BraKet::xx_xx) && (tbra1.contr[0].l + tbra2.contr[0].l < - tket1.contr[0].l + tket2.contr[0].l)) || - braket_ == BraKet::xx_xs; + const auto swap_braket = ((braket_ == BraKet::xx_xx) && + (tbra1.contr[0].l + tbra2.contr[0].l < + tket1.contr[0].l + tket2.contr[0].l) && + (oper_ != Operator::coulomb_opop)) || + braket_ == BraKet::xx_xs; assert(false && "feature not implemented"); abort(); #endif @@ -1659,7 +1663,7 @@ __libint2_engine_inline const Engine::target_ptr_vec& Engine::compute2( const auto Wz = (gammap_o_gammapgammaq * P[2] + gammaq_o_gammapgammaq * Q[2]); - if (deriv_order_ > 0 || lmax_bra > 0) { + if (deriv_order_ + intrinsic_deriv_order() > 0 || lmax_bra > 0) { #if LIBINT2_DEFINED(eri, WP_x) primdata.WP_x[0] = Wx - P[0]; #endif @@ -1670,7 +1674,7 @@ __libint2_engine_inline const Engine::target_ptr_vec& Engine::compute2( primdata.WP_z[0] = Wz - P[2]; #endif } - if (deriv_order_ > 0 || lmax_ket > 0) { + if (deriv_order_ + intrinsic_deriv_order() > 0 || lmax_ket > 0) { #if LIBINT2_DEFINED(eri, WQ_x) primdata.WQ_x[0] = Wx - Q[0]; #endif @@ -1916,8 +1920,10 @@ __libint2_engine_inline const Engine::target_ptr_vec& Engine::compute2( "the angular momentum limit is exceeded"); assert(ket2.contr[0].l <= ket_lmax && "the angular momentum limit is exceeded"); + buildfnidx = (bra1.contr[0].l * ket_lmax + ket1.contr[0].l) * ket_lmax + ket2.contr[0].l; + #ifdef LIBINT_ERI3_PURE_SH if (bra1.contr[0].l > 1) assert(bra1.contr[0].pure && @@ -2110,9 +2116,15 @@ __libint2_engine_inline const Engine::target_ptr_vec& Engine::compute2( const auto tgt_row_idx = !swap_tbra ? r1 * nr2 + r2 : r2 * nr1 + r1; Map tgt_blk_mat(tgt_ptr + tgt_row_idx * ncol, nc1_tgt, nc2_tgt); - if (swap_tket) - tgt_blk_mat = src_blk_mat.transpose(); - else + if (swap_tket) { + Shell::real_t oper_cart_component_phase = 1.0; + if (oper_ == Operator::coulomb_opop && s > 0) + oper_cart_component_phase = + -1.0; // x,y,z quaternion components flip sign on + // swapping ket for coulomb_opop + tgt_blk_mat = + oper_cart_component_phase * src_blk_mat.transpose(); + } else tgt_blk_mat = src_blk_mat; } } // end of loop diff --git a/src/bin/libint/build_libint.cc b/src/bin/libint/build_libint.cc index 8acb3814d..dfb48fbd0 100644 --- a/src/bin/libint/build_libint.cc +++ b/src/bin/libint/build_libint.cc @@ -1118,10 +1118,13 @@ void build_TwoPRep_2b_2k(std::ostream& os, std::string label, for (unsigned int lb = 0; lb <= lmax; lb++) { for (unsigned int lc = 0; lc <= lmax; lc++) { for (unsigned int ld = 0; ld <= lmax; ld++) { - if (!ShellQuartetSetPredicate( - LIBINT_SHELL_SET)>::value(la, lb, lc, ld)) - continue; - + if (std::is_same::value) { + if (!(la >= lb && lc >= ld)) continue; + } else { + if (!ShellQuartetSetPredicate( + LIBINT_SHELL_SET)>::value(la, lb, lc, ld)) + continue; + } // std::shared_ptr tactic(new ParticleDirectionTactic(la+lb > // lc+ld ? false : true)); std::shared_ptr tactic( @@ -1223,7 +1226,7 @@ void build_TwoPRep_2b_2k(std::ostream& os, std::string label, CGShell c(lc); CGShell d(ld); - if constexpr (!std::is_same::value) { + if constexpr (std::is_same::value) { OperType oper; oper = OperType(descrs[0]); std::shared_ptr abcd = @@ -1236,6 +1239,8 @@ void build_TwoPRep_2b_2k(std::ostream& os, std::string label, oss << "_" << label; oss << "_" << c.label() << "_" << d.label(); abcd_label = oss.str(); + std::cout << "(" << a.label() << " " << b.label() << " | " + << c.label() << " " << d.label() << ") "; } } // + derivative level (if deriv_level > 0) diff --git a/src/bin/libint/oper.h b/src/bin/libint/oper.h index daa6aa644..36292613a 100644 --- a/src/bin/libint/oper.h +++ b/src/bin/libint/oper.h @@ -301,11 +301,11 @@ struct σpVσp_Descr : public Contractable<σpVσp_Descr> { if (quaternion_index() == 0) descr += "0"; else if (quaternion_index() == 1) - descr += "Z"; - else if (quaternion_index() == 2) descr += "X"; - else if (quaternion_index() == 3) + else if (quaternion_index() == 2) descr += "Y"; + else if (quaternion_index() == 3) + descr += "Z"; else abort(); return descr + "]"; @@ -417,11 +417,11 @@ struct Coulombσpσp_Descr : public Contractable { if (quaternion_index() == 0) descr += "0"; else if (quaternion_index() == 1) - descr += "Z"; - else if (quaternion_index() == 2) descr += "X"; - else if (quaternion_index() == 3) + else if (quaternion_index() == 2) descr += "Y"; + else if (quaternion_index() == 3) + descr += "Z"; else abort(); return descr + "]"; From 62142d9671e87c397e7938f6535eff0972e127ee Mon Sep 17 00:00:00 2001 From: Kshitij Surjuse Date: Sat, 14 Feb 2026 10:54:02 -0500 Subject: [PATCH 08/22] bugfix: revert incorrect braket swapping criteria for `Operator::coulomb` --- include/libint2/engine.impl.h | 2 +- src/bin/libint/build_libint.cc | 9 ++++----- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/include/libint2/engine.impl.h b/include/libint2/engine.impl.h index ec570c045..834fd9c6d 100644 --- a/include/libint2/engine.impl.h +++ b/include/libint2/engine.impl.h @@ -1216,7 +1216,7 @@ __libint2_engine_inline const Engine::target_ptr_vec& Engine::compute2( const auto swap_tbra = (tbra1.contr[0].l < tbra2.contr[0].l); const auto swap_tket = (tket1.contr[0].l < tket2.contr[0].l); const auto swap_braket = ((braket_ == BraKet::xx_xx) && - (tbra1.contr[0].l + tbra2.contr[0].l < + (tbra1.contr[0].l + tbra2.contr[0].l > tket1.contr[0].l + tket2.contr[0].l) && (oper_ != Operator::coulomb_opop)) || braket_ == BraKet::xx_xs; diff --git a/src/bin/libint/build_libint.cc b/src/bin/libint/build_libint.cc index dfb48fbd0..f70d16644 100644 --- a/src/bin/libint/build_libint.cc +++ b/src/bin/libint/build_libint.cc @@ -77,8 +77,9 @@ struct ShellQuartetSetPredicate { }; template <> struct ShellQuartetSetPredicate { - static bool value(int la, int lb, int lc, int ld) { - return la >= lb && lc >= ld && la + lb <= lc + ld; + static bool value(int la, int lb, int lc, int ld, + bool p1p2_swappable = true) { + return la >= lb && lc >= ld && (!p1p2_swappable || la + lb <= lc + ld); } }; template <> @@ -1118,7 +1119,7 @@ void build_TwoPRep_2b_2k(std::ostream& os, std::string label, for (unsigned int lb = 0; lb <= lmax; lb++) { for (unsigned int lc = 0; lc <= lmax; lc++) { for (unsigned int ld = 0; ld <= lmax; ld++) { - if (std::is_same::value) { + if constexpr (std::is_same::value) { if (!(la >= lb && lc >= ld)) continue; } else { if (!ShellQuartetSetPredicate( @@ -1239,8 +1240,6 @@ void build_TwoPRep_2b_2k(std::ostream& os, std::string label, oss << "_" << label; oss << "_" << c.label() << "_" << d.label(); abcd_label = oss.str(); - std::cout << "(" << a.label() << " " << b.label() << " | " - << c.label() << " " << d.label() << ") "; } } // + derivative level (if deriv_level > 0) From b2b79ab24c02f9405614e28d4262f86eaacdc0e0 Mon Sep 17 00:00:00 2001 From: Kshitij Surjuse Date: Wed, 18 Feb 2026 17:19:34 -0500 Subject: [PATCH 09/22] generate code for missing contracted kernels for `deriv(>0)-eri` + cleanup --- src/bin/libint/build_libint.cc | 34 +++++++++---------- .../comp_11_Coulomb\317\203p\317\203p_11.h" | 8 ++--- 2 files changed, 19 insertions(+), 23 deletions(-) diff --git a/src/bin/libint/build_libint.cc b/src/bin/libint/build_libint.cc index f70d16644..c892dd618 100644 --- a/src/bin/libint/build_libint.cc +++ b/src/bin/libint/build_libint.cc @@ -73,7 +73,7 @@ enum ShellSetType { template struct ShellQuartetSetPredicate { // return true if this set of angular momenta is included - static bool value(int la, int lb, int lc, int ld); + static bool value(int la, int lb, int lc, int ld, bool p1p2_swappable = true); }; template <> struct ShellQuartetSetPredicate { @@ -84,8 +84,10 @@ struct ShellQuartetSetPredicate { }; template <> struct ShellQuartetSetPredicate { - static bool value(int la, int lb, int lc, int ld) { - return la <= lb && lc <= ld && (la < lc || (la == lc && lb <= ld)); + static bool value(int la, int lb, int lc, int ld, + bool p1p2_swappable = true) { + return la <= lb && lc <= ld && + (!p1p2_swappable || (la < lc || (la == lc && lb <= ld))); } }; template @@ -1115,17 +1117,16 @@ void build_TwoPRep_2b_2k(std::ostream& os, std::string label, std::shared_ptr context(new CppCodeContext(cparams)); std::shared_ptr memman(new WorstFitMemoryManager()); + bool p1_p2_swappable = !std::is_same::value; + for (unsigned int la = 0; la <= lmax; la++) { for (unsigned int lb = 0; lb <= lmax; lb++) { for (unsigned int lc = 0; lc <= lmax; lc++) { for (unsigned int ld = 0; ld <= lmax; ld++) { - if constexpr (std::is_same::value) { - if (!(la >= lb && lc >= ld)) continue; - } else { - if (!ShellQuartetSetPredicate( - LIBINT_SHELL_SET)>::value(la, lb, lc, ld)) - continue; - } + if (!ShellQuartetSetPredicate( + LIBINT_SHELL_SET)>::value(la, lb, lc, ld, p1_p2_swappable)) + continue; + // std::shared_ptr tactic(new ParticleDirectionTactic(la+lb > // lc+ld ? false : true)); std::shared_ptr tactic( @@ -1141,9 +1142,7 @@ void build_TwoPRep_2b_2k(std::ostream& os, std::string label, ///////////////////////////////// // loop over operator components ///////////////////////////////// - // most important operators have 1 component ... - std::vector descrs(1); // operator descriptors - // important EXCEPTION: multipole moments + std::vector descrs(1); if (std::is_same::value) { // reset descriptors array descrs.resize(0); @@ -1156,8 +1155,9 @@ void build_TwoPRep_2b_2k(std::ostream& os, std::string label, // unroll only if max_am <= cparams->max_am_opt(task) using std::max; const unsigned int max_am = max(max(la, lb), max(lc, ld)); const bool need_to_optimize = (max_am <= cparams->max_am_opt(task)); + const auto nopers = descrs.size(); const bool need_to_unroll = - l_to_cgshellsize(la) * l_to_cgshellsize(lb) * + nopers * l_to_cgshellsize(la) * l_to_cgshellsize(lb) * l_to_cgshellsize(lc) * l_to_cgshellsize(ld) <= cparams->unroll_threshold(); const unsigned int unroll_threshold = @@ -1285,11 +1285,9 @@ void build_TwoPRep_2b_2k(std::ostream& os, std::string label, iface->to_static_init(oss.str()); // need to declare this function internally - for (std::deque::const_iterator i = - decl_filenames.begin(); - i != decl_filenames.end(); ++i) { + for (auto& decl_filename : decl_filenames) { oss.str(""); - oss << "#include <" << *i << ">" << endl; + oss << "#include <" << decl_filename << ">" << endl; iface->to_int_iface(oss.str()); } diff --git "a/src/bin/libint/comp_11_Coulomb\317\203p\317\203p_11.h" "b/src/bin/libint/comp_11_Coulomb\317\203p\317\203p_11.h" index 82ee512ef..14621936b 100644 --- "a/src/bin/libint/comp_11_Coulomb\317\203p\317\203p_11.h" +++ "b/src/bin/libint/comp_11_Coulomb\317\203p\317\203p_11.h" @@ -83,11 +83,9 @@ CR_11_Coulombσpσp_11::CR_11_Coulombσpσp_11( const auto &oper = Tint->oper(); - // can express integrals of Coulombσpσp in terms of - // derivative integrals of 1/r12 for primitive Gaussians - // only - if (a.contracted() || b.contracted() || c.contracted() || d.contracted()) - return; + // TODO: need to do this only for uncontracted gaussians + // if (a.contracted() || b.contracted() || c.contracted() || d.contracted()) + // return; using namespace libint2::algebra; using namespace libint2::prefactor; From d4247b0563b541a2b4ffce48fb16578c550a3e5f Mon Sep 17 00:00:00 2001 From: Kshitij Surjuse Date: Fri, 20 Feb 2026 16:56:32 -0500 Subject: [PATCH 10/22] =?UTF-8?q?reverted=20allowing=20contracted=20shells?= =?UTF-8?q?=20for=20Coulomb=CF=83p=CF=83p=20and=20use=20additional=20diffe?= =?UTF-8?q?rentiator=20when=20on=20MacOS?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- include/libint2.h | 17 ++++++++------- .../comp_11_Coulomb\317\203p\317\203p_11.h" | 5 ++--- src/bin/libint/gauss.cc | 21 ++++++++++++++++--- 3 files changed, 29 insertions(+), 14 deletions(-) diff --git a/include/libint2.h b/include/libint2.h index 59d17ef65..25a231be7 100644 --- a/include/libint2.h +++ b/include/libint2.h @@ -22,18 +22,19 @@ #define _libint2_header_ #define LIBINT_T_SS_EREP_SS(mValue) \ - _aB_s___0__s___1___TwoPRep_s___0__s___1___Ab__up_##mValue + _aB_s____0__s____1___TwoPRep_s____0__s____1___Ab__up_##mValue #define LIBINT_T_SS_Km1G12_SS(mValue) \ - _aB_s___0__s___1___r12_minus_1_g12_s___0__s___1___Ab__up_##mValue + _aB_s____0__s____1___r12_minus_1_g12_s____0__s____1___Ab__up_##mValue #define LIBINT_T_SS_K0G12_SS_0 \ - _aB_s___0__s___1___r12_0_g12_s___0__s___1___Ab__up_0 + _aB_s____0__s____1___r12_0_g12_s____0__s____1___Ab__up_0 #define LIBINT_T_SS_K2G12_SS_0 \ - _aB_s___0__s___1___r12_2_g12_s___0__s___1___Ab__up_0 + _aB_s____0__s____1___r12_2_g12_s____0__s____1___Ab__up_0 #define LIBINT_T_SS_K4G12_SS_0 \ - _aB_s___0__s___1___r12_4_g12_s___0__s___1___Ab__up_0 -#define LIBINT_T_S_OVERLAP_S _aB_s___0___Overlap_s___0___Ab__up_ -#define LIBINT_T_S_KINETIC_S _aB_s___0___Kinetic_s___0___Ab__up_ -#define LIBINT_T_S_ELECPOT_S(mValue) _aB_s___0___ElecPot_s___0___Ab__up_##mValue + _aB_s____0__s____1___r12_4_g12_s____0__s____1___Ab__up_0 +#define LIBINT_T_S_OVERLAP_S _aB_s____0___Overlap_s____0___Ab__up_ +#define LIBINT_T_S_KINETIC_S _aB_s____0___Kinetic_s____0___Ab__up_ +#define LIBINT_T_S_ELECPOT_S(mValue) \ + _aB_s____0___ElecPot_s____0___Ab__up_##mValue #include #include diff --git "a/src/bin/libint/comp_11_Coulomb\317\203p\317\203p_11.h" "b/src/bin/libint/comp_11_Coulomb\317\203p\317\203p_11.h" index 14621936b..315135994 100644 --- "a/src/bin/libint/comp_11_Coulomb\317\203p\317\203p_11.h" +++ "b/src/bin/libint/comp_11_Coulomb\317\203p\317\203p_11.h" @@ -83,9 +83,8 @@ CR_11_Coulombσpσp_11::CR_11_Coulombσpσp_11( const auto &oper = Tint->oper(); - // TODO: need to do this only for uncontracted gaussians - // if (a.contracted() || b.contracted() || c.contracted() || d.contracted()) - // return; + if (a.contracted() || b.contracted() || c.contracted() || d.contracted()) + return; using namespace libint2::algebra; using namespace libint2::prefactor; diff --git a/src/bin/libint/gauss.cc b/src/bin/libint/gauss.cc index 3899283ab..aa60b1de7 100644 --- a/src/bin/libint/gauss.cc +++ b/src/bin/libint/gauss.cc @@ -115,8 +115,15 @@ std::string CGF::label() const { unsigned int am = qn_[0] + qn_[1] + qn_[2]; std::string deriv_label; if (!deriv_.zero()) deriv_label = deriv_.label(); - const std::string am_string = am_to_symbol(am, contracted()); + std::string am_string = am_to_symbol(am, contracted()); std::ostringstream oss; + + // Some OSs can have case-insensitive filesystem e.g., MacOS. So here we add + // additional identifier for primitive function labels + if (!this->contracted()) { + am_string += "_"; + } + oss << (pure_sh_ && am > 0 ? "W" : "") << am_string << deriv_label << "_"; if (am == 0) return oss.str(); @@ -223,8 +230,16 @@ CGShell::~CGShell() {} std::string CGShell::label() const { if (is_unit()) return "unit"; - std::string result = std::string(pure_sh_ && qn_[0] > 0 ? "W" : "") + - am_to_symbol(qn_[0], contracted()); + std::string am_symbol = am_to_symbol(qn_[0], contracted()); + + // Some OSs can have case-insensitive filesystem e.g., MacOS. So here we add + // additional identifier for primitive shell labels + if (!this->contracted()) { + am_symbol += "_"; + } + + std::string result = + std::string(pure_sh_ && qn_[0] > 0 ? "W" : "") + am_symbol; if (!deriv_.zero()) result += deriv_.label(); return result; } From 054b8eba4a03140eeb3ceb66be90d4f3d355da5a Mon Sep 17 00:00:00 2001 From: Kshitij Surjuse Date: Sat, 21 Feb 2026 11:51:43 -0500 Subject: [PATCH 11/22] can generate code for all small component RKB ERIs i.e.,`(SS|SS)` --- src/bin/libint/build_libint.cc | 15 +- ...3p\317\203pCoulomb\317\203p\317\203p_11.h" | 264 ++++++++++++++++++ src/bin/libint/master_ints_list.h | 12 +- src/bin/libint/master_rrs_list.h | 4 + src/bin/libint/oper.h | 36 +++ src/bin/libint/strategy.cc | 8 + 6 files changed, 330 insertions(+), 9 deletions(-) create mode 100644 "src/bin/libint/comp_11_\317\203p\317\203pCoulomb\317\203p\317\203p_11.h" diff --git a/src/bin/libint/build_libint.cc b/src/bin/libint/build_libint.cc index c892dd618..8712580a5 100644 --- a/src/bin/libint/build_libint.cc +++ b/src/bin/libint/build_libint.cc @@ -287,6 +287,11 @@ Coulombσpσp_Descr make_descr(int p, int, int) { return Coulombσpσp_Descr(p); } +template <> +σpσpCoulombσpσp_Descr make_descr<σpσpCoulombσpσp_Descr>(int p, int, int) { + return σpσpCoulombσpσp_Descr(p); +} + } // namespace template @@ -582,8 +587,8 @@ void try_main(int argc, char* argv[]) { #endif #ifdef LIBINT_INCLUDE_RKB_ERI -#define BOOST_PP_RKB_ERI_TASK_TUPLE (coulomb_opop) -#define BOOST_PP_RKB_ERI_TASK_OPER_TUPLE (CoulombσpσpOper) +#define BOOST_PP_RKB_ERI_TASK_TUPLE (coulomb_opop, opop_coulomb_opop) +#define BOOST_PP_RKB_ERI_TASK_OPER_TUPLE (CoulombσpσpOper, σpσpCoulombσpσpOper) #define BOOST_PP_RKB_ERI_TASK_LIST \ BOOST_PP_TUPLE_TO_LIST(BOOST_PP_RKB_ERI_TASK_TUPLE) #define BOOST_PP_RKB_ERI_TASK_OPER_LIST \ @@ -2328,9 +2333,9 @@ void config_to_api(const std::shared_ptr& cparams, { // 2-body ints -#define BOOST_PP_TWOBODY_TASKOPER_TUPLE \ - ("eri", "coulomb_opop", "r12kg12", "r12_0_g12", "r12_2_g12", "g12_T1_g12", \ - "g12dkh") +#define BOOST_PP_TWOBODY_TASKOPER_TUPLE \ + ("eri", "coulomb_opop", "opop_coulomb_opop", "r12kg12", "r12_0_g12", \ + "r12_2_g12", "g12_T1_g12", "g12dkh") #define BOOST_PP_TWOBODY_TASKOPER_LIST \ BOOST_PP_TUPLE_TO_LIST(BOOST_PP_TWOBODY_TASKOPER_TUPLE) diff --git "a/src/bin/libint/comp_11_\317\203p\317\203pCoulomb\317\203p\317\203p_11.h" "b/src/bin/libint/comp_11_\317\203p\317\203pCoulomb\317\203p\317\203p_11.h" new file mode 100644 index 000000000..1cacc7b3a --- /dev/null +++ "b/src/bin/libint/comp_11_\317\203p\317\203pCoulomb\317\203p\317\203p_11.h" @@ -0,0 +1,264 @@ +/* + * Copyright (C) 2004-2026 Edward F. Valeev + * + * This file is part of Libint compiler. + * + * Libint compiler is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Libint compiler is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Libint compiler. If not, see . + * + */ + +#ifndef LIBINT_COMP_11_ΣPΣPCOULOMBΣPΣP_11_H +#define LIBINT_COMP_11_ΣPΣPCOULOMBΣPΣP_11_H + +#include +#include +#include + +namespace libint2 { + +/** + * this computes integral of + * \sigma \cdot \hat{p}_1 \sigma \cdot \hat{p}_2 \f$ \frac{1}{r_{ij}} \sigma + * \cdot \hat{p}_3 \sigma \cdot \hat{p}_4 \f$ over CGShell/CGF by rewriting it + * as a linear combination of integrals over derivatives of \frac{1}{r_{ij}} + * @tparam F basis function type. valid choices are CGShell or CGF + */ +template +class CR_11_σpσpCoulombσpσp_11 + : public GenericRecurrenceRelation< + CR_11_σpσpCoulombσpσp_11, F, + GenIntegralSet_11_11> { + public: + typedef CR_11_σpσpCoulombσpσp_11 ThisType; + typedef F BasisFunctionType; + typedef σpσpCoulombσpσpOper OperType; + typedef GenIntegralSet_11_11 TargetType; + typedef GenericRecurrenceRelation + ParentType; + friend class GenericRecurrenceRelation; + static const unsigned int max_nchildren = 100; // TODO figure out + + using ParentType::Instance; + + static bool directional() { return false; } + + private: + using ParentType::is_simple; + using ParentType::target_; + using ParentType::RecurrenceRelation::expr_; + using ParentType::RecurrenceRelation::nflops_; + + /// Constructor is private, used by ParentType::Instance that maintains + /// registry of these objects + CR_11_σpσpCoulombσpσp_11(const std::shared_ptr &, + unsigned int = 0); + + static std::string descr() { return "CR"; } +}; + +template +CR_11_σpσpCoulombσpσp_11::CR_11_σpσpCoulombσpσp_11( + const std::shared_ptr &Tint, unsigned int) + : ParentType(Tint, 0) { + assert(Tint->num_func_bra(/* particle */ 0) == 1); + assert(Tint->num_func_bra(/* particle */ 1) == 1); + assert(Tint->num_func_ket(/* particle */ 0) == 1); + assert(Tint->num_func_ket(/* particle */ 1) == 1); + + F a(Tint->bra(0, 0)); + F b(Tint->ket(0, 0)); + F c(Tint->bra(1, 0)); + F d(Tint->ket(1, 0)); + + const auto &oper = Tint->oper(); + + if (a.contracted() || b.contracted() || c.contracted() || d.contracted()) + return; + + using namespace libint2::algebra; + using namespace libint2::prefactor; + using libint2::algebra::operator*; + + const mType zero_m(0u); + + ChildFactory> + factory(this); + + constexpr auto x = 0; + constexpr auto y = 1; + constexpr auto z = 2; + + auto mc = [&](const int r1, const int r2, const int r3, const int r4) { + F a_r1{a}; + a_r1.deriv().inc(r1); + F b_r2{b}; + b_r2.deriv().inc(r2); + F c_r3{c}; + c_r3.deriv().inc(r3); + F d_r4{d}; + d_r4.deriv().inc(r4); + return factory.make_child(a_r1, b_r2, c_r3, d_r4, zero_m); + }; + + // Component wise generation for quaternion : + // ( (σ.p) a (σ.p)b | 1/r12 | (σ.p) c (σ.p) d ) + switch (oper->descr().quaternion_index()) { + case 0: { + // zeroth component = + // x1 x2 x3 x4 + y1 y2 x3 x4 - y1 x2 y3 x4 + x1 y2 y3 x4 + y1 x2 x3 y4 - + // x1 y2 x3 y4 + x1 x2 y3 y4 + y1 y2 y3 y4 + z1 z2 x3 x4 + z1 z2 y3 y4 - + // z1 x2 z3 x4 - z1 y2 z3 y4 + x1 z2 z3 x4 + y1 z2 z3 y4 + z1 x2 x3 z4 + + // z1 y2 y3 z4 - x1 z2 x3 z4 - y1 z2 y3 z4 + x1 x2 z3 z4 + y1 y2 z3 z4 + + // z1 z2 z3 z4 + auto xxxx = mc(x, x, x, x); + auto yyxx = mc(y, y, x, x); + auto yxyx = mc(y, x, y, x); + auto xyyx = mc(x, y, y, x); + auto yxxy = mc(y, x, x, y); + auto xyxy = mc(x, y, x, y); + auto xxyy = mc(x, x, y, y); + auto yyyy = mc(y, y, y, y); + auto zzxx = mc(z, z, x, x); + auto zzyy = mc(z, z, y, y); + auto zxzx = mc(z, x, z, x); + auto zyzy = mc(z, y, z, y); + auto xzzx = mc(x, z, z, x); + auto yzzy = mc(y, z, z, y); + auto zxxz = mc(z, x, x, z); + auto zyyz = mc(z, y, y, z); + auto xzxz = mc(x, z, x, z); + auto yzyz = mc(y, z, y, z); + auto xxzz = mc(x, x, z, z); + auto yyzz = mc(y, y, z, z); + auto zzzz = mc(z, z, z, z); + if (is_simple()) { + expr_ = xxxx + yyxx - yxyx + xyyx + yxxy - xyxy + xxyy + yyyy + zzxx + + zzyy - zxzx - zyzy + xzzx + yzzy + zxxz + zyyz - xzxz - yzyz + + xxzz + yyzz + zzzz; + nflops_ += 20; + } + } break; + case 1: { + // x component = + // - z1 y2 x3 x4 + z1 x2 y3 x4 - z1 x2 x3 y4 - z1 y2 y3 y4 + y1 z2 x3 x4 - + // x1 z2 y3 x4 + x1 z2 x3 y4 + y1 z2 y3 y4 - y1 x2 z3 x4 + x1 y2 z3 x4 - + // x1 x2 z3 y4 - y1 y2 z3 y4 - z1 z2 z3 y4 + y1 x2 x3 z4 - x1 y2 x3 z4 + + // x1 x2 y3 z4 + y1 y2 y3 z4 + z1 z2 y3 z4 - z1 y2 z3 z4 + y1 z2 z3 z4 + auto zyxx = mc(z, y, x, x); + auto zxyx = mc(z, x, y, x); + auto zxxy = mc(z, x, x, y); + auto zyyy = mc(z, y, y, y); + auto yzxx = mc(y, z, x, x); + auto xzyx = mc(x, z, y, x); + auto xzxy = mc(x, z, x, y); + auto yzyy = mc(y, z, y, y); + auto yxzx = mc(y, x, z, x); + auto xyzx = mc(x, y, z, x); + auto xxzy = mc(x, x, z, y); + auto yyzy = mc(y, y, z, y); + auto zzzy = mc(z, z, z, y); + auto yxxz = mc(y, x, x, z); + auto xyxz = mc(x, y, x, z); + auto xxyz = mc(x, x, y, z); + auto yyyz = mc(y, y, y, z); + auto zzyz = mc(z, z, y, z); + auto zyzz = mc(z, y, z, z); + auto yzzz = mc(y, z, z, z); + if (is_simple()) { + // swapped order of first two terms compiler does not like negative sign + // in front of first term + expr_ = zxyx - zyxx - zxxy - zyyy + yzxx - xzyx + xzxy + yzyy - yxzx + + xyzx - xxzy - yyzy - zzzy + yxxz - xyxz + xxyz + yyyz + zzyz - + zyzz + yzzz; + nflops_ += 19; + } + } break; + case 2: { + // y component = + // z1 x2 x3 x4 + z1 y2 y3 x4 - z1 y2 x3 y4 + z1 x2 y3 y4 - x1 z2 x3 x4 - + // y1 z2 y3 x4 + y1 z2 x3 y4 - x1 z2 y3 y4 + x1 x2 z3 x4 + y1 y2 z3 x4 - + // y1 x2 z3 y4 + x1 y2 z3 y4 + z1 z2 z3 x4 - x1 x2 x3 z4 - y1 y2 x3 z4 + + // y1 x2 y3 z4 - x1 y2 y3 z4 - z1 z2 x3 z4 + z1 x2 z3 z4 - x1 z2 z3 z4 + auto zxxx = mc(z, x, x, x); + auto zyyx = mc(z, y, y, x); + auto zyxy = mc(z, y, x, y); + auto zxyy = mc(z, x, y, y); + auto xzxx = mc(x, z, x, x); + auto yzyx = mc(y, z, y, x); + auto yzxy = mc(y, z, x, y); + auto xzyy = mc(x, z, y, y); + auto xxzx = mc(x, x, z, x); + auto yyzx = mc(y, y, z, x); + auto yxzy = mc(y, x, z, y); + auto xyzy = mc(x, y, z, y); + auto zzzx = mc(z, z, z, x); + auto xxxz = mc(x, x, x, z); + auto yyxz = mc(y, y, x, z); + auto yxyz = mc(y, x, y, z); + auto xyyz = mc(x, y, y, z); + auto zzxz = mc(z, z, x, z); + auto zxzz = mc(z, x, z, z); + auto xzzz = mc(x, z, z, z); + + if (is_simple()) { + expr_ = zxxx + zyyx - zyxy + zxyy - xzxx - yzyx + yzxy - xzyy + xxzx + + yyzx - yxzy + xyzy + zzzx - xxxz - yyxz + yxyz - xyyz - zzxz + + zxzz - xzzz; + nflops_ += 19; + } + } break; + case 3: { + // z component = + // - y1 x2 x3 x4 + x1 y2 x3 x4 - x1 x2 y3 x4 - y1 y2 y3 x4 + x1 x2 x3 y4 + + // y1 y2 x3 y4 - y1 x2 y3 y4 + x1 y2 y3 y4 - z1 z2 y3 x4 + z1 z2 x3 y4 + + // z1 y2 z3 x4 - z1 x2 z3 y4 - y1 z2 z3 x4 + x1 z2 z3 y4 - z1 y2 x3 z4 + + // z1 x2 y3 z4 + y1 z2 x3 z4 - x1 z2 y3 z4 - y1 x2 z3 z4 + x1 y2 z3 z4 + auto yxxx = mc(y, x, x, x); + auto xyxx = mc(x, y, x, x); + auto xxyx = mc(x, x, y, x); + auto yyyx = mc(y, y, y, x); + auto xxxy = mc(x, x, x, y); + auto yyxy = mc(y, y, x, y); + auto yxyy = mc(y, x, y, y); + auto xyyy = mc(x, y, y, y); + auto zzyx = mc(z, z, y, x); + auto zzxy = mc(z, z, x, y); + auto zyzx = mc(z, y, z, x); + auto zxzy = mc(z, x, z, y); + auto yzzx = mc(y, z, z, x); + auto xzzy = mc(x, z, z, y); + auto zyxz = mc(z, y, x, z); + auto zxyz = mc(z, x, y, z); + auto yzxz = mc(y, z, x, z); + auto xzyz = mc(x, z, y, z); + auto yxzz = mc(y, x, z, z); + auto xyzz = mc(x, y, z, z); + if (is_simple()) { + expr_ = xyxx - yxxx - xxyx - yyyx + xxxy + yyxy - yxyy + xyyy - zzyx + + zzxy + zyzx - zxzy - yzzx + xzzy - zyxz + zxyz + yzxz - xzyz - + yxzz + xyzz; + nflops_ += 19; + } + } break; + default: + throw std::runtime_error( + "CR_11_σpσpCoulombσpσp_11: invalid quaternionic index"); + } + +} // CR_11_σpσpCoulombσpσp_11::CR_11_σpσpCoulombσpσp_11 +}; // namespace libint2 + +#endif // LIBINT_COMP_11_ΣPΣPCOULOMBΣPΣP_11_H diff --git a/src/bin/libint/master_ints_list.h b/src/bin/libint/master_ints_list.h index c5a6f4655..37bfa29a7 100644 --- a/src/bin/libint/master_ints_list.h +++ b/src/bin/libint/master_ints_list.h @@ -109,6 +109,10 @@ typedef GenIntegralSet_11_11 TwoPRep_11_11_int; typedef GenIntegralSet_11_11 Coulombσpσp_11_11_sq; typedef GenIntegralSet_11_11 Coulombσpσp_11_11_int; +typedef GenIntegralSet_11_11 + σpσpCoulombσpσp_11_11_sq; +typedef GenIntegralSet_11_11 + σpσpCoulombσpσp_11_11_int; typedef GenIntegralSet_11_11 R12kG12_11_11_sq; typedef GenIntegralSet_11_11 R12kG12_11_11_int; typedef GenIntegralSet_11_11 @@ -148,10 +152,10 @@ typedef boost::mpl::list< SMultipole_1_1_int, #endif TwoPRep_11_11_sq, TwoPRep_11_11_int, Coulombσpσp_11_11_sq, - Coulombσpσp_11_11_int, R12kG12_11_11_sq, R12kG12_11_11_int, - R12kR12lG12_11_11_sq, R12kR12lG12_11_11_int, TiG12_11_11_sq, - TiG12_11_11_int, G12TiG12_11_11_sq, G12TiG12_11_11_int, - DivG12prime_xTx_11_11_sq, DivG12prime_xTx_11_11_int, + Coulombσpσp_11_11_int, σpσpCoulombσpσp_11_11_sq, σpσpCoulombσpσp_11_11_int, + R12kG12_11_11_sq, R12kG12_11_11_int, R12kR12lG12_11_11_sq, + R12kR12lG12_11_11_int, TiG12_11_11_sq, TiG12_11_11_int, G12TiG12_11_11_sq, + G12TiG12_11_11_int, DivG12prime_xTx_11_11_sq, DivG12prime_xTx_11_11_int, DummySymmIntegral_11_11_sq, DummySymmIntegral_11_11_int> MasterIntegralTypeList; diff --git a/src/bin/libint/master_rrs_list.h b/src/bin/libint/master_rrs_list.h index 32e56878f..d55cfa301 100644 --- a/src/bin/libint/master_rrs_list.h +++ b/src/bin/libint/master_rrs_list.h @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -269,6 +270,9 @@ typedef CR_DerivGauss CR_11_Coulombσpσp_11_sh; typedef CR_11_Coulombσpσp_11 CR_11_Coulombσpσp_11_int; + +typedef CR_11_σpσpCoulombσpσp_11 CR_11_σpσpCoulombσpσp_11_sh; +typedef CR_11_σpσpCoulombσpσp_11 CR_11_σpσpCoulombσpσp_11_int; }; // namespace libint2 #endif // header guard diff --git a/src/bin/libint/oper.h b/src/bin/libint/oper.h index 36292613a..cecbda72b 100644 --- a/src/bin/libint/oper.h +++ b/src/bin/libint/oper.h @@ -437,6 +437,42 @@ struct Coulombσpσp_Descr : public Contractable { }; typedef GenOper CoulombσpσpOper; +struct σpσpCoulombσpσp_Descr : public Contractable<σpσpCoulombσpσp_Descr> { + typedef MultiplicativeSymm2Body_Props Properties; + + σpσpCoulombσpσp_Descr() : quaternion_index_(0) {} + σpσpCoulombσpσp_Descr(int quaternion_index) + : quaternion_index_(quaternion_index) { + assert(quaternion_index <= 3); + } + + static const unsigned int max_key = 4; + unsigned int key() const { return quaternion_index(); } + std::string description() const { + std::string descr("opop_coulomb_opop["); + if (quaternion_index() == 0) + descr += "0"; + else if (quaternion_index() == 1) + descr += "X"; + else if (quaternion_index() == 2) + descr += "Y"; + else if (quaternion_index() == 3) + descr += "Z"; + else + abort(); + return descr + "]"; + } + std::string label() const { return description(); } + int psymm(int i, int j) const { abort(); } + int hermitian(int i) const { return +1; } + + int quaternion_index() const { return quaternion_index_; } + + private: + const int quaternion_index_ = -1; +}; +typedef GenOper<σpσpCoulombσpσp_Descr> σpσpCoulombσpσpOper; + /** GTG_1d is the two-body 1-dimensional Gaussian geminal */ struct GTG_1d_Descr : public Contractable { diff --git a/src/bin/libint/strategy.cc b/src/bin/libint/strategy.cc index bcab9dcad..58fb8d2bd 100644 --- a/src/bin/libint/strategy.cc +++ b/src/bin/libint/strategy.cc @@ -123,6 +123,14 @@ template <> struct MasterStrategy { typedef boost::mpl::list value; }; +template <> +struct MasterStrategy<σpσpCoulombσpσp_11_11_sq> { + typedef boost::mpl::list value; +}; +template <> +struct MasterStrategy<σpσpCoulombσpσp_11_11_int> { + typedef boost::mpl::list value; +}; #if LIBINT_SHELLQUARTET_STRATEGY == LIBINT_SHELLQUARTET_STRATEGY_A0C0 template <> From 60e2117db25cbdc062a930c1dcb9e83837dfb21f Mon Sep 17 00:00:00 2001 From: Kshitij Surjuse Date: Sat, 28 Feb 2026 21:38:37 -0500 Subject: [PATCH 12/22] Can evaluate `(SS|SS)` integrals. Only for `STANDARD` shell quartet for now. --- export/tests/unit/test-2body.cc | 346 ++++++++++++------ include/libint2/engine.h | 30 +- include/libint2/engine.impl.h | 75 ++-- src/bin/libint/build_libint.cc | 80 ++-- ...3p\317\203pCoulomb\317\203p\317\203p_11.h" | 132 ++----- 5 files changed, 383 insertions(+), 280 deletions(-) diff --git a/export/tests/unit/test-2body.cc b/export/tests/unit/test-2body.cc index ee08ed972..bf3b6a60d 100644 --- a/export/tests/unit/test-2body.cc +++ b/export/tests/unit/test-2body.cc @@ -345,20 +345,83 @@ TEST_CASE("eri geometric derivatives", "[engine][2-body]") { } TEST_CASE("RKB Coulomb integrals", "[engine][2-body]") { - std::vector obs{ - // pseudorandom s - Shell{{1.0, 0.3}, {{0, false, {0.9, 0.3}}}, {{0.0, 0.0, 0.0}}}, - // pseudorandom p - Shell{{2.0, 0.4}, {{1, false, {0.8, -0.2}}}, {{1.0, 1.0, 1.0}}}}; + std::vector obs{// pseudorandom s + Shell{{1.0}, {{0, false, {1.0}}}, {{0.0, 0.0, 0.0}}}, + // pseudorandom p + Shell{{2.0}, {{1, false, {1.0}}}, {{1.0, 1.0, 1.0}}}}; const auto max_nprim = libint2::max_nprim(obs); const auto max_l = libint2::max_l(obs); typedef std::array der_idx; - SECTION("Coulombσpσp") { - Engine engine; + // e.g. d_xx maps the derivative index of derivative w.r.t x + // coord of ket1 and x coord of ket2 in Chemist notation. + // deriv indices for (LL|SS) + der_idx d_xx = {0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0}; + der_idx d_yy = {0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0}; + der_idx d_zz = {0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1}; + der_idx d_yz = {0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1}; + der_idx d_zy = {0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0}; + der_idx d_zx = {0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0}; + der_idx d_xz = {0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1}; + der_idx d_xy = {0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0}; + der_idx d_yx = {0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0}; + + // deriv indices for (SS|SS) + // 0th component + der_idx xxxx = {1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0}; + der_idx yyxx = {0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0}; + der_idx zzxx = {0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0}; + der_idx yxyx = {0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0}; + der_idx xyyx = {1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0}; + der_idx yxxy = {0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0}; + der_idx xyxy = {1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0}; + der_idx xxyy = {1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0}; + der_idx yyyy = {0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0}; + der_idx zzyy = {0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0}; + der_idx xxzz = {1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1}; + der_idx yyzz = {0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1}; + der_idx zzzz = {0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1}; + + // x-component + der_idx zxzx = {0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0}; + der_idx xzzx = {1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0}; + der_idx zyzy = {0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0}; + der_idx yzzy = {0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0}; + der_idx zxxz = {0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1}; + der_idx xzxz = {1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1}; + der_idx zyyz = {0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1}; + der_idx yzyz = {0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1}; + + // y-component + der_idx zyzx = {0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0}; + der_idx yzzx = {0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0}; + der_idx zxzy = {0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0}; + der_idx xzzy = {1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0}; + der_idx zyxz = {0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1}; + der_idx yzxz = {0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1}; + der_idx zxyz = {0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1}; + der_idx xzyz = {1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1}; + + // z-component + der_idx yxxx = {0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0}; + der_idx xyxx = {1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0}; + der_idx xxyx = {1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0}; + der_idx yyyx = {0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0}; + der_idx zzyx = {0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0}; + der_idx xxxy = {1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0}; + der_idx yyxy = {0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0}; + der_idx zzxy = {0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0}; + der_idx yxyy = {0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0}; + der_idx xyyy = {1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0}; + der_idx yxzz = {0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1}; + der_idx xyzz = {1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1}; + + SECTION("Coulombσpσp and σpσpCoulombσpσp") { + Engine engine_llss, engine_ssss; try { - engine = Engine(Operator::coulomb_opop, max_nprim, max_l, 0); + engine_llss = Engine(Operator::coulomb_opop, max_nprim, max_l, 0); + engine_ssss = Engine(Operator::opop_coulomb_opop, max_nprim, max_l, 0); // TODO: need another unit test for derivatives of RKB ERIs } catch ( Engine::lmax_exceeded &) { // skip the test if lmax exceeded or libint2 @@ -371,119 +434,172 @@ TEST_CASE("RKB Coulomb integrals", "[engine][2-body]") { for (int s1 = 0; s1 != nshell; ++s1) { for (int s2 = 0; s2 != nshell; ++s2) { for (int s3 = 0; s3 != nshell; ++s3) { - const auto &results = - engine.compute(obs[s0], obs[s1], obs[s2], obs[s3]); - assert(results.size() == - 4); // we get 4 buffers for each quaternion component - - LIBINT2_REF_REALTYPE Aref[3]; - for (int i = 0; i < 3; ++i) Aref[i] = obs[s0].O[i]; - LIBINT2_REF_REALTYPE Bref[3]; - for (int i = 0; i < 3; ++i) Bref[i] = obs[s1].O[i]; - LIBINT2_REF_REALTYPE Cref[3]; - for (int i = 0; i < 3; ++i) Cref[i] = obs[s2].O[i]; - LIBINT2_REF_REALTYPE Dref[3]; - for (int i = 0; i < 3; ++i) Dref[i] = obs[s3].O[i]; - - int ijkl = 0; - - int l0, m0, n0; - FOR_CART(l0, m0, n0, obs[s0].contr[0].l) - - int l1, m1, n1; - FOR_CART(l1, m1, n1, obs[s1].contr[0].l) - - int l2, m2, n2; - FOR_CART(l2, m2, n2, obs[s2].contr[0].l) - - int l3, m3, n3; - FOR_CART(l3, m3, n3, obs[s3].contr[0].l) - - std::array ref_coulomb_opop{0.0, 0.0, 0.0, - 0.0}; - uint p0123 = 0; - for (uint p0 = 0; p0 < obs[s0].nprim(); p0++) { - for (uint p1 = 0; p1 < obs[s1].nprim(); p1++) { - for (uint p2 = 0; p2 < obs[s2].nprim(); p2++) { - for (uint p3 = 0; p3 < obs[s3].nprim(); p3++, p0123++) { - const LIBINT2_REF_REALTYPE alpha0 = obs[s0].alpha[p0]; - const LIBINT2_REF_REALTYPE alpha1 = obs[s1].alpha[p1]; - const LIBINT2_REF_REALTYPE alpha2 = obs[s2].alpha[p2]; - const LIBINT2_REF_REALTYPE alpha3 = obs[s3].alpha[p3]; - - const LIBINT2_REF_REALTYPE c0 = obs[s0].contr[0].coeff[p0]; - const LIBINT2_REF_REALTYPE c1 = obs[s1].contr[0].coeff[p1]; - const LIBINT2_REF_REALTYPE c2 = obs[s2].contr[0].coeff[p2]; - const LIBINT2_REF_REALTYPE c3 = obs[s3].contr[0].coeff[p3]; - const LIBINT2_REF_REALTYPE c0123 = c0 * c1 * c2 * c3; - - auto eri_drr = [&](der_idx d_rr) { - return eri(d_rr.data(), l0, m0, n0, alpha0, Aref, l1, m1, - n1, alpha1, Bref, l2, m2, n2, alpha2, Cref, l3, - m3, n3, alpha3, Dref, 0); - }; - - // e.g. d_xx maps the derivative index of derivative w.r.t x - // coord of ket1 and x coord of ket2 in Chemist notation. - der_idx d_xx = {0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0}; - der_idx d_yy = {0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0}; - der_idx d_zz = {0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1}; - ref_coulomb_opop[0] += - c0123 * (eri_drr(d_xx) + eri_drr(d_yy) + eri_drr(d_zz)); - - der_idx d_yz = {0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1}; - der_idx d_zy = {0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0}; - ref_coulomb_opop[1] += - c0123 * (eri_drr(d_yz) - eri_drr(d_zy)); - - der_idx d_zx = {0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0}; - der_idx d_xz = {0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1}; - ref_coulomb_opop[2] += - c0123 * (eri_drr(d_zx) - eri_drr(d_xz)); - - der_idx d_xy = {0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0}; - der_idx d_yx = {0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0}; - ref_coulomb_opop[3] += - c0123 * (eri_drr(d_xy) - eri_drr(d_yx)); + if (s0 == 0 && s1 == 1 && s2 == 0 && s3 == 1) { + const auto &results_llss = + engine_llss.compute(obs[s0], obs[s1], obs[s2], obs[s3]); + const auto &results_ssss = + engine_ssss.compute(obs[s0], obs[s1], obs[s2], obs[s3]); + assert(results_llss.size() == + 4); // we get 4 buffers for each quaternion component + + LIBINT2_REF_REALTYPE Aref[3]; + for (int i = 0; i < 3; ++i) Aref[i] = obs[s0].O[i]; + LIBINT2_REF_REALTYPE Bref[3]; + for (int i = 0; i < 3; ++i) Bref[i] = obs[s1].O[i]; + LIBINT2_REF_REALTYPE Cref[3]; + for (int i = 0; i < 3; ++i) Cref[i] = obs[s2].O[i]; + LIBINT2_REF_REALTYPE Dref[3]; + for (int i = 0; i < 3; ++i) Dref[i] = obs[s3].O[i]; + + int ijkl = 0; + + int l0, m0, n0; + FOR_CART(l0, m0, n0, obs[s0].contr[0].l) + + int l1, m1, n1; + FOR_CART(l1, m1, n1, obs[s1].contr[0].l) + + int l2, m2, n2; + FOR_CART(l2, m2, n2, obs[s2].contr[0].l) + + int l3, m3, n3; + FOR_CART(l3, m3, n3, obs[s3].contr[0].l) + + std::array ref_coulomb_opop{0.0, 0.0, + 0.0, 0.0}; + std::array ref_opop_coulomb_opop{ + 0.0, 0.0, 0.0, 0.0}; + uint p0123 = 0; + for (uint p0 = 0; p0 < obs[s0].nprim(); p0++) { + for (uint p1 = 0; p1 < obs[s1].nprim(); p1++) { + for (uint p2 = 0; p2 < obs[s2].nprim(); p2++) { + for (uint p3 = 0; p3 < obs[s3].nprim(); p3++, p0123++) { + const LIBINT2_REF_REALTYPE alpha0 = obs[s0].alpha[p0]; + const LIBINT2_REF_REALTYPE alpha1 = obs[s1].alpha[p1]; + const LIBINT2_REF_REALTYPE alpha2 = obs[s2].alpha[p2]; + const LIBINT2_REF_REALTYPE alpha3 = obs[s3].alpha[p3]; + + const LIBINT2_REF_REALTYPE c0 = + obs[s0].contr[0].coeff[p0]; + const LIBINT2_REF_REALTYPE c1 = + obs[s1].contr[0].coeff[p1]; + const LIBINT2_REF_REALTYPE c2 = + obs[s2].contr[0].coeff[p2]; + const LIBINT2_REF_REALTYPE c3 = + obs[s3].contr[0].coeff[p3]; + const LIBINT2_REF_REALTYPE c0123 = c0 * c1 * c2 * c3; + + auto eri_drrrr = [&](der_idx d_rrrr) { + return eri(d_rrrr.data(), l0, m0, n0, alpha0, Aref, l1, + m1, n1, alpha1, Bref, l2, m2, n2, alpha2, + Cref, l3, m3, n3, alpha3, Dref, 0); + }; + + // (LL|SS) + ref_coulomb_opop[0] += + c0123 * + (eri_drrrr(d_xx) + eri_drrrr(d_yy) + eri_drrrr(d_zz)); + ref_coulomb_opop[1] += + c0123 * (eri_drrrr(d_yz) - eri_drrrr(d_zy)); + ref_coulomb_opop[2] += + c0123 * (eri_drrrr(d_zx) - eri_drrrr(d_xz)); + ref_coulomb_opop[3] += + c0123 * (eri_drrrr(d_xy) - eri_drrrr(d_yx)); + + // (SS|SS) + ref_opop_coulomb_opop[0] += + c0123 * + (eri_drrrr(xxxx) + eri_drrrr(yyxx) + eri_drrrr(zzxx) - + eri_drrrr(yxyx) + eri_drrrr(xyyx) + eri_drrrr(yxxy) - + eri_drrrr(xyxy) + eri_drrrr(xxyy) + eri_drrrr(yyyy) + + eri_drrrr(zzyy) + eri_drrrr(xxzz) + eri_drrrr(yyzz) + + eri_drrrr(zzzz)); + ref_opop_coulomb_opop[1] += + c0123 * + (eri_drrrr(zxzx) - eri_drrrr(xzzx) - eri_drrrr(zyzy) + + eri_drrrr(yzzy) - eri_drrrr(zxxz) + eri_drrrr(xzxz) + + eri_drrrr(zyyz) - eri_drrrr(yzyz)); + ref_opop_coulomb_opop[2] += + c0123 * + (-eri_drrrr(zyzx) + eri_drrrr(yzzx) - + eri_drrrr(zxzy) + eri_drrrr(xzzy) + eri_drrrr(zyxz) - + eri_drrrr(yzxz) + eri_drrrr(zxyz) - eri_drrrr(xzyz)); + ref_opop_coulomb_opop[3] += + c0123 * + (-eri_drrrr(yxxx) + eri_drrrr(xyxx) - + eri_drrrr(xxyx) - eri_drrrr(yyyx) - eri_drrrr(zzyx) + + eri_drrrr(xxxy) + eri_drrrr(yyxy) + eri_drrrr(zzxy) - + eri_drrrr(yxyy) + eri_drrrr(xyyy) - eri_drrrr(yxzz) + + eri_drrrr(xyzz)); + } } } } - } - const double ABSOLUTE_DEVIATION_THRESHOLD = 5.0E-14; - const double RELATIVE_DEVIATION_THRESHOLD = - 1.0E-9; // For more detail on choice of these thresholds, see - // the comments in the TEST_CASE "eri geometric - // derivatives" - - std::array abs_errs; - std::array rel_abs_errs; - - for (auto comp = 0; comp < 4; ++comp) { - abs_errs[comp] = - abs(ref_coulomb_opop[comp] - results[comp][ijkl]); - rel_abs_errs[comp] = abs(abs_errs[comp] / ref_coulomb_opop[comp]); - bool not_ok = rel_abs_errs[comp] > RELATIVE_DEVIATION_THRESHOLD && - abs_errs[comp] > ABSOLUTE_DEVIATION_THRESHOLD; - // no 3^n prefactor here since the intrinsic deriv order is 2 - if (not_ok) { - std::cout << "(l0 l1| l2 l3) = " - << "(" << s0 << " " << s1 << " | " << s2 << " " << s3 - << ") " - << "Elem " << ijkl << " comp= " << comp - << " : ref = " << ref_coulomb_opop[comp] - << " libint = " << results[comp][ijkl] - << " relabs_error = " << rel_abs_errs[comp] - << " abs_error = " << abs_errs[comp] << std::endl; + const double ABSOLUTE_DEVIATION_THRESHOLD = 5.0E-14; + const double RELATIVE_DEVIATION_THRESHOLD = + 1.0E-9; // For more detail on choice of these thresholds, see + // the comments in the TEST_CASE "eri geometric + // derivatives" + + std::array abs_errs_llss; + std::array rel_abs_errs_llss; + + std::array abs_errs_ssss; + std::array rel_abs_errs_ssss; + + for (auto comp = 0; comp < 4; ++comp) { + abs_errs_llss[comp] = + abs(ref_coulomb_opop[comp] - results_llss[comp][ijkl]); + rel_abs_errs_llss[comp] = + abs(abs_errs_llss[comp] / ref_coulomb_opop[comp]); + + abs_errs_ssss[comp] = + abs(ref_opop_coulomb_opop[comp] - results_ssss[comp][ijkl]); + rel_abs_errs_ssss[comp] = + abs(abs_errs_ssss[comp] / ref_opop_coulomb_opop[comp]); + + bool llss_not_ok = + rel_abs_errs_llss[comp] > RELATIVE_DEVIATION_THRESHOLD && + abs_errs_llss[comp] > ABSOLUTE_DEVIATION_THRESHOLD; + + bool ssss_not_ok = + rel_abs_errs_ssss[comp] > RELATIVE_DEVIATION_THRESHOLD && + abs_errs_ssss[comp] > ABSOLUTE_DEVIATION_THRESHOLD; + + // no 3^n prefactor here since the intrinsic deriv order is 2 + if (llss_not_ok) { + std::cout << "(l0 l1| l2 l3) = " + << "(" << s0 << " " << s1 << " | " << s2 << " " + << s3 << ") " + << "Elem " << ijkl << " comp= " << comp + << " : ref = " << ref_coulomb_opop[comp] + << " libint = " << results_llss[comp][ijkl] + << " relabs_error = " << rel_abs_errs_llss[comp] + << " abs_error = " << abs_errs_llss[comp] + << std::endl; + } + if (ssss_not_ok) { + std::cout << "(l0 l1| l2 l3) = " + << "(" << s0 << " " << s1 << " | " << s2 << " " + << s3 << ") " + << "Elem " << ijkl << " comp= " << comp + << " : ref = " << ref_opop_coulomb_opop[comp] + << " libint = " << results_ssss[comp][ijkl] + << " relabs_error = " << rel_abs_errs_ssss[comp] + << " abs_error = " << abs_errs_ssss[comp] + << std::endl; + } + REQUIRE(!llss_not_ok); + REQUIRE(!ssss_not_ok); } - REQUIRE(!not_ok); - } - ++ijkl; - END_FOR_CART - END_FOR_CART - END_FOR_CART - END_FOR_CART + ++ijkl; + END_FOR_CART + END_FOR_CART + END_FOR_CART + END_FOR_CART + } } } } diff --git a/include/libint2/engine.h b/include/libint2/engine.h index 81048b049..7253ce417 100644 --- a/include/libint2/engine.h +++ b/include/libint2/engine.h @@ -156,6 +156,10 @@ enum class Operator { /// (2-body) \f$ r_{12}^{-1} (σ.p_{k1})(σ.p_{k2})\f$ where k1 & k2 are /// centers of ket1 and ket2, respectively coulomb_opop, + /// (2-body) \f$ (σ.p_{b1})(σ.p_{b2}) r_{12}^{-1} (σ.p_{k1})(σ.p_{k2})\f$ + /// where b1 & b2 are centers of bra1 and bra2 and k1 & k2 are centers of + /// ket1 and ket2, respectively + opop_coulomb_opop, /// contracted Gaussian geminal cgtg, /// contracted Gaussian geminal times Coulomb @@ -357,6 +361,12 @@ struct operator_traits static constexpr auto nopers = 4; static constexpr auto intrinsic_deriv_order = 2; }; +template <> +struct operator_traits + : public operator_traits { + static constexpr auto nopers = 4; + static constexpr auto intrinsic_deriv_order = 4; +}; namespace detail { template @@ -851,16 +861,16 @@ class Engine { const Shell& ket2, const ShellPair* spbra, const ShellPair* spket); // clang-format off - /** this specifies target precision for computing the integrals, i.e. - * the target absolute (i.e., not relative) error of the integrals. - * It is used to screen out primitive integrals. For some screening - * methods precision can be almost guaranteed (due to finite precision - * of the precomputed interpolation tables used to evaluate the core integrals - * it is not in general possible to guarantee precision rigorously). - * - * @param[in] prec the target precision - * @sa ScreeningMethod - */ + /** this specifies target precision for computing the integrals, i.e. + * the target absolute (i.e., not relative) error of the integrals. + * It is used to screen out primitive integrals. For some screening + * methods precision can be almost guaranteed (due to finite precision + * of the precomputed interpolation tables used to evaluate the core integrals + * it is not in general possible to guarantee precision rigorously). + * + * @param[in] prec the target precision + * @sa ScreeningMethod + */ // clang-format on Engine& set_precision(scalar_type prec) { if (prec <= 0.) { diff --git a/include/libint2/engine.impl.h b/include/libint2/engine.impl.h index 834fd9c6d..da5275c4c 100644 --- a/include/libint2/engine.impl.h +++ b/include/libint2/engine.impl.h @@ -70,31 +70,32 @@ typename std::remove_all_extents::type* to_ptr1(T (&a)[N]) { /// These MUST appear in the same order as in Operator. /// You must also update BOOST_PP_NBODY_OPERATOR_LAST_ONEBODY_INDEX when you add /// one-body ints -#define BOOST_PP_NBODY_OPERATOR_LIST \ - (overlap, /* overlap */ \ - (kinetic, /* kinetic */ \ - (elecpot, /* nuclear */ \ - (elecpot, /* erf_nuclear */ \ - (elecpot, /* erfc_nuclear */ \ - (elecpot, /* erfx_nuclear */ \ - (1emultipole, /* emultipole1 */ \ - (2emultipole, /* emultipole2 */ \ - (3emultipole, /* emultipole3 */ \ - (sphemultipole, /* sphemultipole */ \ - (opVop, /* opVop */ \ - (eri, /* delta */ \ - (eri, /* coulomb */ \ - (coulomb_opop, /* coulomb_opop */ \ - (eri, /* cgtg */ \ - (eri, /* cgtg_x_coulomb */ \ - (eri, /* delcgtg2 */ \ - (eri, /* r12 */ \ - (eri, /* erf_coulomb */ \ - (eri, /* erfc_coulomb */ \ - (eri, /* erfx_coulomb */ \ - (eri, /* stg */ \ - (eri, /* yukawa */ \ - BOOST_PP_NIL))))))))))))))))))))))) +#define BOOST_PP_NBODY_OPERATOR_LIST \ + (overlap, /* overlap */ \ + (kinetic, /* kinetic */ \ + (elecpot, /* nuclear */ \ + (elecpot, /* erf_nuclear */ \ + (elecpot, /* erfc_nuclear */ \ + (elecpot, /* erfx_nuclear */ \ + (1emultipole, /* emultipole1 */ \ + (2emultipole, /* emultipole2 */ \ + (3emultipole, /* emultipole3 */ \ + (sphemultipole, /* sphemultipole */ \ + (opVop, /* opVop */ \ + (eri, /* delta */ \ + (eri, /* coulomb */ \ + (coulomb_opop, /* coulomb_opop */ \ + (opop_coulomb_opop, /* coulomb_opop */ \ + (eri, /* cgtg */ \ + (eri, /* cgtg_x_coulomb */ \ + (eri, /* delcgtg2 */ \ + (eri, /* r12 */ \ + (eri, /* erf_coulomb */ \ + (eri, /* erfc_coulomb */ \ + (eri, /* erfx_coulomb */ \ + (eri, /* stg */ \ + (eri, /* yukawa */ \ + BOOST_PP_NIL)))))))))))))))))))))))) #define BOOST_PP_NBODY_OPERATOR_INDEX_TUPLE \ BOOST_PP_MAKE_TUPLE(BOOST_PP_LIST_SIZE(BOOST_PP_NBODY_OPERATOR_LIST)) @@ -702,6 +703,7 @@ __libint2_engine_inline void Engine::initialize(size_t max_nprim) { // target indices. const auto permutable_targets = deriv_order_ > 0 && + (braket_ == BraKet::xx_xx || braket_ == BraKet::xs_xx || braket_ == BraKet::xx_xs); if (permutable_targets) @@ -1213,13 +1215,21 @@ __libint2_engine_inline const Engine::target_ptr_vec& Engine::compute2( #if LIBINT2_SHELLQUARTET_SET == \ LIBINT2_SHELLQUARTET_SET_STANDARD // standard angular momentum ordering - const auto swap_tbra = (tbra1.contr[0].l < tbra2.contr[0].l); - const auto swap_tket = (tket1.contr[0].l < tket2.contr[0].l); const auto swap_braket = ((braket_ == BraKet::xx_xx) && (tbra1.contr[0].l + tbra2.contr[0].l > tket1.contr[0].l + tket2.contr[0].l) && (oper_ != Operator::coulomb_opop)) || braket_ == BraKet::xx_xs; + bool swap_tbra, swap_tket; + if (oper_ == Operator::opop_coulomb_opop) { + bool swap_p1p2 = swap_braket ? (tbra1.contr[0].l < tbra2.contr[0].l) + : (tket1.contr[0].l < tket2.contr[0].l); + swap_tbra = swap_tket = swap_p1p2; + } else { + swap_tbra = (tbra1.contr[0].l < tbra2.contr[0].l); + swap_tket = (tket1.contr[0].l < tket2.contr[0].l); + } + // N.B. cannot swap bra and ket for coulomb_opop since the ket is mutated by // this operator #else // orca angular momentum ordering @@ -1444,6 +1454,13 @@ __libint2_engine_inline const Engine::target_ptr_vec& Engine::compute2( .first(); core_eval_ptr->eval(gm_ptr, T, mmax); } break; + case Operator::opop_coulomb_opop: { + const auto& core_eval_ptr = + any_cast&>(core_eval_pack_) + .first(); + core_eval_ptr->eval(gm_ptr, T, mmax); + } break; case Operator::cgtg_x_coulomb: { const auto& core_eval_ptr = any_cast 0) oper_cart_component_phase = -1.0; // x,y,z quaternion components flip sign on diff --git a/src/bin/libint/build_libint.cc b/src/bin/libint/build_libint.cc index 8712580a5..278e2668a 100644 --- a/src/bin/libint/build_libint.cc +++ b/src/bin/libint/build_libint.cc @@ -75,19 +75,35 @@ struct ShellQuartetSetPredicate { // return true if this set of angular momenta is included static bool value(int la, int lb, int lc, int ld, bool p1p2_swappable = true); }; + +/** + * standard ordering for angular momenta la, lb, lc, ld + * @param p1p2_swappable whether operator allows swaps of particle 1 and 2 + * functions (e.g., not allowed for Coulombσpσp but allowed + * for Coulomb (TwoPRep)). + * @param bra_ket_coswappable whether need to swap within both bra and ket. + * Not individually swapping of either ket of bra allowed + * ( e.g., for σpσpCoulombσpσp) + */ template <> struct ShellQuartetSetPredicate { - static bool value(int la, int lb, int lc, int ld, - bool p1p2_swappable = true) { - return la >= lb && lc >= ld && (!p1p2_swappable || la + lb <= lc + ld); + static bool value(int la, int lb, int lc, int ld, bool p1p2_swappable = true, + bool bra_ket_coswappable = false) { + if (bra_ket_coswappable) + return (la + lb <= lc + ld) && lc >= ld; + else + return la >= lb && lc >= ld && (!p1p2_swappable || la + lb <= lc + ld); } }; template <> struct ShellQuartetSetPredicate { - static bool value(int la, int lb, int lc, int ld, - bool p1p2_swappable = true) { - return la <= lb && lc <= ld && - (!p1p2_swappable || (la < lc || (la == lc && lb <= ld))); + static bool value(int la, int lb, int lc, int ld, bool p1p2_swappable = true, + bool bra_ket_coswappable = false) { + if (bra_ket_coswappable) + return (la < lc || (la == lc && lb <= ld)); + else + return la <= lb && lc <= ld && + (!p1p2_swappable || (la < lc || (la == lc && lb <= ld))); } }; template @@ -1123,13 +1139,17 @@ void build_TwoPRep_2b_2k(std::ostream& os, std::string label, std::shared_ptr memman(new WorstFitMemoryManager()); bool p1_p2_swappable = !std::is_same::value; + bool bra_ket_coswappable = std::is_same::value; + // Note: la, lb, lc, ld generate code for chemist notation (ab|O|cd), where O + // is a two-body operator. for (unsigned int la = 0; la <= lmax; la++) { for (unsigned int lb = 0; lb <= lmax; lb++) { for (unsigned int lc = 0; lc <= lmax; lc++) { for (unsigned int ld = 0; ld <= lmax; ld++) { if (!ShellQuartetSetPredicate( - LIBINT_SHELL_SET)>::value(la, lb, lc, ld, p1_p2_swappable)) + LIBINT_SHELL_SET)>::value(la, lb, lc, ld, p1_p2_swappable, + bra_ket_coswappable)) continue; // std::shared_ptr tactic(new ParticleDirectionTactic(la+lb > @@ -1148,7 +1168,8 @@ void build_TwoPRep_2b_2k(std::ostream& os, std::string label, // loop over operator components ///////////////////////////////// std::vector descrs(1); - if (std::is_same::value) { + if (std::is_same::value || + std::is_same::value) { // reset descriptors array descrs.resize(0); // iterate over quaternion components @@ -1157,7 +1178,8 @@ void build_TwoPRep_2b_2k(std::ostream& os, std::string label, } } - // unroll only if max_am <= cparams->max_am_opt(task) using std::max; + // unroll only if max_am <= cparams->max_am_opt(task) using + // std::max; const unsigned int max_am = max(max(la, lb), max(lc, ld)); const bool need_to_optimize = (max_am <= cparams->max_am_opt(task)); const auto nopers = descrs.size(); @@ -1266,8 +1288,8 @@ void build_TwoPRep_2b_2k(std::ostream& os, std::string label, std::deque decl_filenames; std::deque def_filenames; - // this will generate code for these targets, and potentially generate - // code for its prerequisites + // this will generate code for these targets, and potentially + // generate code for its prerequisites GenerateCode(dg_xxxx, context, cparams, strat, tactic, memman, decl_filenames, def_filenames, prefix, eval_label, false); @@ -1359,9 +1381,9 @@ void build_TwoPRep_1b_2k(std::ostream& os, LIBINT_SHELL_SET)>::value(lbra, lc, ld)) continue; - // I will use 4-center recurrence relations and integrals, and have one - // center carry an s function unfortunately, depending on the direction - // in which the build goes it must be A(0) or B(1) + // I will use 4-center recurrence relations and integrals, and have + // one center carry an s function unfortunately, depending on the + // direction in which the build goes it must be A(0) or B(1) const unsigned int dummy_center = (LIBINT_SHELL_SET == LIBINT_SHELL_SET_ORCA) ? 0 : 1; @@ -1560,8 +1582,8 @@ void build_TwoPRep_1b_1k(std::ostream& os, const unsigned int dummy_center2 = (LIBINT_SHELL_SET == LIBINT_SHELL_SET_ORCA) ? 2 : 3; - // std::shared_ptr tactic(new ParticleDirectionTactic(lbra > lket - // ? false : true)); + // std::shared_ptr tactic(new ParticleDirectionTactic(lbra > + // lket ? false : true)); std::shared_ptr tactic(new FourCenter_OS_Tactic( dummy_center1 == 0 ? 0 : lbra, dummy_center1 == 1 ? 0 : lbra, dummy_center2 == 2 ? 0 : lket, dummy_center2 == 3 ? 0 : lket)); @@ -1674,8 +1696,8 @@ void build_TwoPRep_1b_1k(std::ostream& os, std::deque decl_filenames; std::deque def_filenames; - // this will generate code for this targets, and potentially generate code - // for its prerequisites + // this will generate code for this targets, and potentially generate + // code for its prerequisites GenerateCode(dg_xxx, context, cparams, strat, tactic, memman, decl_filenames, def_filenames, prefix, label, false); @@ -1684,7 +1706,8 @@ void build_TwoPRep_1b_1k(std::ostream& os, taskmgr.current().params(); tparams->max_stack_size(max_am, memman->max_memory_used()); tparams->max_ntarget(targets.size()); - // os << " Max memory used = " << memman->max_memory_used() << std::endl; + // os << " Max memory used = " << memman->max_memory_used() << + // std::endl; // set pointer to the top-level evaluator function ostringstream oss; @@ -1875,8 +1898,8 @@ void build_R12kG12_2b_2k(std::ostream& os, std::deque decl_filenames; std::deque def_filenames; - // this will generate code for this targets, and potentially generate - // code for its prerequisites + // this will generate code for this targets, and potentially + // generate code for its prerequisites GenerateCode(dg_xxxx, context, cparams, strat, tactic, memman, decl_filenames, def_filenames, prefix, label, false); @@ -2230,11 +2253,11 @@ void build_G12DKH_2b_2k(std::ostream& os, oss << "#include <" << decl_filename << ">" << endl; iface->to_int_iface(oss.str()); - // For the most expensive (i.e. presumably complete) graph extract all - // precomputed quantities -- these will be members of the evaluator - // structure also extract all RRs -- need to keep track of these to - // figure out which external symbols appearing in RR code belong to - // this task also + // For the most expensive (i.e. presumably complete) graph extract + // all precomputed quantities -- these will be members of the + // evaluator structure also extract all RRs -- need to keep track of + // these to figure out which external symbols appearing in RR code + // belong to this task also if (la == lmax && lb == lmax && lc == lmax && ld == lmax) extract_symbols(dg_xxxx); @@ -2307,7 +2330,8 @@ void config_to_api(const std::shared_ptr& cparams, // generated tasks declare all tasks in a range of valid tasks as defined or // not LibraryTaskManager& taskmgr = LibraryTaskManager::Instance(); - // the range is defined by max # of centers, max deriv order, and operator set + // the range is defined by max # of centers, max deriv order, and operator + // set const size_t max_ncenter = 4; for (unsigned int ncenter = 0; ncenter <= max_ncenter; ++ncenter) { std::stringstream oss; diff --git "a/src/bin/libint/comp_11_\317\203p\317\203pCoulomb\317\203p\317\203p_11.h" "b/src/bin/libint/comp_11_\317\203p\317\203pCoulomb\317\203p\317\203p_11.h" index 1cacc7b3a..7bf0a4b9a 100644 --- "a/src/bin/libint/comp_11_\317\203p\317\203pCoulomb\317\203p\317\203p_11.h" +++ "b/src/bin/libint/comp_11_\317\203p\317\203pCoulomb\317\203p\317\203p_11.h" @@ -114,143 +114,75 @@ CR_11_σpσpCoulombσpσp_11::CR_11_σpσpCoulombσpσp_11( }; // Component wise generation for quaternion : - // ( (σ.p) a (σ.p)b | 1/r12 | (σ.p) c (σ.p) d ) + // ( (σ.p) a (σ.p) b | 1/r12 | (σ.p) c (σ.p) d ) switch (oper->descr().quaternion_index()) { case 0: { - // zeroth component = - // x1 x2 x3 x4 + y1 y2 x3 x4 - y1 x2 y3 x4 + x1 y2 y3 x4 + y1 x2 x3 y4 - - // x1 y2 x3 y4 + x1 x2 y3 y4 + y1 y2 y3 y4 + z1 z2 x3 x4 + z1 z2 y3 y4 - - // z1 x2 z3 x4 - z1 y2 z3 y4 + x1 z2 z3 x4 + y1 z2 z3 y4 + z1 x2 x3 z4 + - // z1 y2 y3 z4 - x1 z2 x3 z4 - y1 z2 y3 z4 + x1 x2 z3 z4 + y1 y2 z3 z4 + - // z1 z2 z3 z4 auto xxxx = mc(x, x, x, x); auto yyxx = mc(y, y, x, x); + auto zzxx = mc(z, z, x, x); auto yxyx = mc(y, x, y, x); auto xyyx = mc(x, y, y, x); auto yxxy = mc(y, x, x, y); auto xyxy = mc(x, y, x, y); auto xxyy = mc(x, x, y, y); auto yyyy = mc(y, y, y, y); - auto zzxx = mc(z, z, x, x); auto zzyy = mc(z, z, y, y); - auto zxzx = mc(z, x, z, x); - auto zyzy = mc(z, y, z, y); - auto xzzx = mc(x, z, z, x); - auto yzzy = mc(y, z, z, y); - auto zxxz = mc(z, x, x, z); - auto zyyz = mc(z, y, y, z); - auto xzxz = mc(x, z, x, z); - auto yzyz = mc(y, z, y, z); auto xxzz = mc(x, x, z, z); auto yyzz = mc(y, y, z, z); auto zzzz = mc(z, z, z, z); if (is_simple()) { - expr_ = xxxx + yyxx - yxyx + xyyx + yxxy - xyxy + xxyy + yyyy + zzxx + - zzyy - zxzx - zyzy + xzzx + yzzy + zxxz + zyyz - xzxz - yzyz + - xxzz + yyzz + zzzz; - nflops_ += 20; + expr_ = xxxx + yyxx + zzxx - yxyx + xyyx + yxxy - xyxy + xxyy + yyyy + + zzyy + xxzz + yyzz + zzzz; + nflops_ += 12; } } break; case 1: { - // x component = - // - z1 y2 x3 x4 + z1 x2 y3 x4 - z1 x2 x3 y4 - z1 y2 y3 y4 + y1 z2 x3 x4 - - // x1 z2 y3 x4 + x1 z2 x3 y4 + y1 z2 y3 y4 - y1 x2 z3 x4 + x1 y2 z3 x4 - - // x1 x2 z3 y4 - y1 y2 z3 y4 - z1 z2 z3 y4 + y1 x2 x3 z4 - x1 y2 x3 z4 + - // x1 x2 y3 z4 + y1 y2 y3 z4 + z1 z2 y3 z4 - z1 y2 z3 z4 + y1 z2 z3 z4 - auto zyxx = mc(z, y, x, x); - auto zxyx = mc(z, x, y, x); - auto zxxy = mc(z, x, x, y); - auto zyyy = mc(z, y, y, y); - auto yzxx = mc(y, z, x, x); - auto xzyx = mc(x, z, y, x); - auto xzxy = mc(x, z, x, y); - auto yzyy = mc(y, z, y, y); - auto yxzx = mc(y, x, z, x); - auto xyzx = mc(x, y, z, x); - auto xxzy = mc(x, x, z, y); - auto yyzy = mc(y, y, z, y); - auto zzzy = mc(z, z, z, y); - auto yxxz = mc(y, x, x, z); - auto xyxz = mc(x, y, x, z); - auto xxyz = mc(x, x, y, z); - auto yyyz = mc(y, y, y, z); - auto zzyz = mc(z, z, y, z); - auto zyzz = mc(z, y, z, z); - auto yzzz = mc(y, z, z, z); + auto zxzx = mc(z, x, z, x); + auto xzzx = mc(x, z, z, x); + auto zyzy = mc(z, y, z, y); + auto yzzy = mc(y, z, z, y); + auto zxxz = mc(z, x, x, z); + auto xzxz = mc(x, z, x, z); + auto zyyz = mc(z, y, y, z); + auto yzyz = mc(y, z, y, z); if (is_simple()) { - // swapped order of first two terms compiler does not like negative sign - // in front of first term - expr_ = zxyx - zyxx - zxxy - zyyy + yzxx - xzyx + xzxy + yzyy - yxzx + - xyzx - xxzy - yyzy - zzzy + yxxz - xyxz + xxyz + yyyz + zzyz - - zyzz + yzzz; - nflops_ += 19; + expr_ = zxzx - xzzx - zyzy + yzzy - zxxz + xzxz + zyyz - yzyz; + nflops_ += 7; } } break; case 2: { - // y component = - // z1 x2 x3 x4 + z1 y2 y3 x4 - z1 y2 x3 y4 + z1 x2 y3 y4 - x1 z2 x3 x4 - - // y1 z2 y3 x4 + y1 z2 x3 y4 - x1 z2 y3 y4 + x1 x2 z3 x4 + y1 y2 z3 x4 - - // y1 x2 z3 y4 + x1 y2 z3 y4 + z1 z2 z3 x4 - x1 x2 x3 z4 - y1 y2 x3 z4 + - // y1 x2 y3 z4 - x1 y2 y3 z4 - z1 z2 x3 z4 + z1 x2 z3 z4 - x1 z2 z3 z4 - auto zxxx = mc(z, x, x, x); - auto zyyx = mc(z, y, y, x); - auto zyxy = mc(z, y, x, y); - auto zxyy = mc(z, x, y, y); - auto xzxx = mc(x, z, x, x); - auto yzyx = mc(y, z, y, x); - auto yzxy = mc(y, z, x, y); - auto xzyy = mc(x, z, y, y); - auto xxzx = mc(x, x, z, x); - auto yyzx = mc(y, y, z, x); - auto yxzy = mc(y, x, z, y); - auto xyzy = mc(x, y, z, y); - auto zzzx = mc(z, z, z, x); - auto xxxz = mc(x, x, x, z); - auto yyxz = mc(y, y, x, z); - auto yxyz = mc(y, x, y, z); - auto xyyz = mc(x, y, y, z); - auto zzxz = mc(z, z, x, z); - auto zxzz = mc(z, x, z, z); - auto xzzz = mc(x, z, z, z); - + auto zyzx = mc(z, y, z, x); + auto yzzx = mc(y, z, z, x); + auto zxzy = mc(z, x, z, y); + auto xzzy = mc(x, z, z, y); + auto zyxz = mc(z, y, x, z); + auto yzxz = mc(y, z, x, z); + auto zxyz = mc(z, x, y, z); + auto xzyz = mc(x, z, y, z); if (is_simple()) { - expr_ = zxxx + zyyx - zyxy + zxyy - xzxx - yzyx + yzxy - xzyy + xxzx + - yyzx - yxzy + xyzy + zzzx - xxxz - yyxz + yxyz - xyyz - zzxz + - zxzz - xzzz; - nflops_ += 19; + // swapped order of first two terms compiler does not like negative sign + // in front of first term + expr_ = yzzx - zyzx - zxzy + xzzy + zyxz - yzxz + zxyz - xzyz; + nflops_ += 7; } } break; case 3: { - // z component = - // - y1 x2 x3 x4 + x1 y2 x3 x4 - x1 x2 y3 x4 - y1 y2 y3 x4 + x1 x2 x3 y4 + - // y1 y2 x3 y4 - y1 x2 y3 y4 + x1 y2 y3 y4 - z1 z2 y3 x4 + z1 z2 x3 y4 + - // z1 y2 z3 x4 - z1 x2 z3 y4 - y1 z2 z3 x4 + x1 z2 z3 y4 - z1 y2 x3 z4 + - // z1 x2 y3 z4 + y1 z2 x3 z4 - x1 z2 y3 z4 - y1 x2 z3 z4 + x1 y2 z3 z4 auto yxxx = mc(y, x, x, x); auto xyxx = mc(x, y, x, x); auto xxyx = mc(x, x, y, x); auto yyyx = mc(y, y, y, x); + auto zzyx = mc(z, z, y, x); auto xxxy = mc(x, x, x, y); auto yyxy = mc(y, y, x, y); + auto zzxy = mc(z, z, x, y); auto yxyy = mc(y, x, y, y); auto xyyy = mc(x, y, y, y); - auto zzyx = mc(z, z, y, x); - auto zzxy = mc(z, z, x, y); - auto zyzx = mc(z, y, z, x); - auto zxzy = mc(z, x, z, y); - auto yzzx = mc(y, z, z, x); - auto xzzy = mc(x, z, z, y); - auto zyxz = mc(z, y, x, z); - auto zxyz = mc(z, x, y, z); - auto yzxz = mc(y, z, x, z); - auto xzyz = mc(x, z, y, z); auto yxzz = mc(y, x, z, z); auto xyzz = mc(x, y, z, z); if (is_simple()) { - expr_ = xyxx - yxxx - xxyx - yyyx + xxxy + yyxy - yxyy + xyyy - zzyx + - zzxy + zyzx - zxzy - yzzx + xzzy - zyxz + zxyz + yzxz - xzyz - - yxzz + xyzz; - nflops_ += 19; + expr_ = xyxx - yxxx - xxyx - yyyx - zzyx + xxxy + yyxy + zzxy - yxyy + + xyyy - yxzz + xyzz; + nflops_ += 11; } } break; default: From e29428ab8005654eb789860c1be9b666b946bba1 Mon Sep 17 00:00:00 2001 From: Kshitij Surjuse Date: Mon, 9 Mar 2026 17:29:27 -0400 Subject: [PATCH 13/22] cleanup: Remove debugging statements from `RKB Coulomb integrals` test and more cleanup --- export/tests/unit/test-2body.cc | 313 ++++++++++++++++---------------- src/bin/libint/build_libint.cc | 17 +- 2 files changed, 161 insertions(+), 169 deletions(-) diff --git a/export/tests/unit/test-2body.cc b/export/tests/unit/test-2body.cc index bf3b6a60d..2f0383fc1 100644 --- a/export/tests/unit/test-2body.cc +++ b/export/tests/unit/test-2body.cc @@ -434,172 +434,165 @@ TEST_CASE("RKB Coulomb integrals", "[engine][2-body]") { for (int s1 = 0; s1 != nshell; ++s1) { for (int s2 = 0; s2 != nshell; ++s2) { for (int s3 = 0; s3 != nshell; ++s3) { - if (s0 == 0 && s1 == 1 && s2 == 0 && s3 == 1) { - const auto &results_llss = - engine_llss.compute(obs[s0], obs[s1], obs[s2], obs[s3]); - const auto &results_ssss = - engine_ssss.compute(obs[s0], obs[s1], obs[s2], obs[s3]); - assert(results_llss.size() == - 4); // we get 4 buffers for each quaternion component - - LIBINT2_REF_REALTYPE Aref[3]; - for (int i = 0; i < 3; ++i) Aref[i] = obs[s0].O[i]; - LIBINT2_REF_REALTYPE Bref[3]; - for (int i = 0; i < 3; ++i) Bref[i] = obs[s1].O[i]; - LIBINT2_REF_REALTYPE Cref[3]; - for (int i = 0; i < 3; ++i) Cref[i] = obs[s2].O[i]; - LIBINT2_REF_REALTYPE Dref[3]; - for (int i = 0; i < 3; ++i) Dref[i] = obs[s3].O[i]; - - int ijkl = 0; - - int l0, m0, n0; - FOR_CART(l0, m0, n0, obs[s0].contr[0].l) - - int l1, m1, n1; - FOR_CART(l1, m1, n1, obs[s1].contr[0].l) - - int l2, m2, n2; - FOR_CART(l2, m2, n2, obs[s2].contr[0].l) - - int l3, m3, n3; - FOR_CART(l3, m3, n3, obs[s3].contr[0].l) - - std::array ref_coulomb_opop{0.0, 0.0, - 0.0, 0.0}; - std::array ref_opop_coulomb_opop{ - 0.0, 0.0, 0.0, 0.0}; - uint p0123 = 0; - for (uint p0 = 0; p0 < obs[s0].nprim(); p0++) { - for (uint p1 = 0; p1 < obs[s1].nprim(); p1++) { - for (uint p2 = 0; p2 < obs[s2].nprim(); p2++) { - for (uint p3 = 0; p3 < obs[s3].nprim(); p3++, p0123++) { - const LIBINT2_REF_REALTYPE alpha0 = obs[s0].alpha[p0]; - const LIBINT2_REF_REALTYPE alpha1 = obs[s1].alpha[p1]; - const LIBINT2_REF_REALTYPE alpha2 = obs[s2].alpha[p2]; - const LIBINT2_REF_REALTYPE alpha3 = obs[s3].alpha[p3]; - - const LIBINT2_REF_REALTYPE c0 = - obs[s0].contr[0].coeff[p0]; - const LIBINT2_REF_REALTYPE c1 = - obs[s1].contr[0].coeff[p1]; - const LIBINT2_REF_REALTYPE c2 = - obs[s2].contr[0].coeff[p2]; - const LIBINT2_REF_REALTYPE c3 = - obs[s3].contr[0].coeff[p3]; - const LIBINT2_REF_REALTYPE c0123 = c0 * c1 * c2 * c3; - - auto eri_drrrr = [&](der_idx d_rrrr) { - return eri(d_rrrr.data(), l0, m0, n0, alpha0, Aref, l1, - m1, n1, alpha1, Bref, l2, m2, n2, alpha2, - Cref, l3, m3, n3, alpha3, Dref, 0); - }; - - // (LL|SS) - ref_coulomb_opop[0] += - c0123 * - (eri_drrrr(d_xx) + eri_drrrr(d_yy) + eri_drrrr(d_zz)); - ref_coulomb_opop[1] += - c0123 * (eri_drrrr(d_yz) - eri_drrrr(d_zy)); - ref_coulomb_opop[2] += - c0123 * (eri_drrrr(d_zx) - eri_drrrr(d_xz)); - ref_coulomb_opop[3] += - c0123 * (eri_drrrr(d_xy) - eri_drrrr(d_yx)); - - // (SS|SS) - ref_opop_coulomb_opop[0] += - c0123 * - (eri_drrrr(xxxx) + eri_drrrr(yyxx) + eri_drrrr(zzxx) - - eri_drrrr(yxyx) + eri_drrrr(xyyx) + eri_drrrr(yxxy) - - eri_drrrr(xyxy) + eri_drrrr(xxyy) + eri_drrrr(yyyy) + - eri_drrrr(zzyy) + eri_drrrr(xxzz) + eri_drrrr(yyzz) + - eri_drrrr(zzzz)); - ref_opop_coulomb_opop[1] += - c0123 * - (eri_drrrr(zxzx) - eri_drrrr(xzzx) - eri_drrrr(zyzy) + - eri_drrrr(yzzy) - eri_drrrr(zxxz) + eri_drrrr(xzxz) + - eri_drrrr(zyyz) - eri_drrrr(yzyz)); - ref_opop_coulomb_opop[2] += - c0123 * - (-eri_drrrr(zyzx) + eri_drrrr(yzzx) - - eri_drrrr(zxzy) + eri_drrrr(xzzy) + eri_drrrr(zyxz) - - eri_drrrr(yzxz) + eri_drrrr(zxyz) - eri_drrrr(xzyz)); - ref_opop_coulomb_opop[3] += - c0123 * - (-eri_drrrr(yxxx) + eri_drrrr(xyxx) - - eri_drrrr(xxyx) - eri_drrrr(yyyx) - eri_drrrr(zzyx) + - eri_drrrr(xxxy) + eri_drrrr(yyxy) + eri_drrrr(zzxy) - - eri_drrrr(yxyy) + eri_drrrr(xyyy) - eri_drrrr(yxzz) + - eri_drrrr(xyzz)); - } + const auto &results_llss = + engine_llss.compute(obs[s0], obs[s1], obs[s2], obs[s3]); + const auto &results_ssss = + engine_ssss.compute(obs[s0], obs[s1], obs[s2], obs[s3]); + assert(results_llss.size() == + 4); // we get 4 buffers for each quaternion component + + LIBINT2_REF_REALTYPE Aref[3]; + for (int i = 0; i < 3; ++i) Aref[i] = obs[s0].O[i]; + LIBINT2_REF_REALTYPE Bref[3]; + for (int i = 0; i < 3; ++i) Bref[i] = obs[s1].O[i]; + LIBINT2_REF_REALTYPE Cref[3]; + for (int i = 0; i < 3; ++i) Cref[i] = obs[s2].O[i]; + LIBINT2_REF_REALTYPE Dref[3]; + for (int i = 0; i < 3; ++i) Dref[i] = obs[s3].O[i]; + + int ijkl = 0; + + int l0, m0, n0; + FOR_CART(l0, m0, n0, obs[s0].contr[0].l) + + int l1, m1, n1; + FOR_CART(l1, m1, n1, obs[s1].contr[0].l) + + int l2, m2, n2; + FOR_CART(l2, m2, n2, obs[s2].contr[0].l) + + int l3, m3, n3; + FOR_CART(l3, m3, n3, obs[s3].contr[0].l) + + std::array ref_coulomb_opop{0.0, 0.0, 0.0, + 0.0}; + std::array ref_opop_coulomb_opop{0.0, 0.0, + 0.0, 0.0}; + uint p0123 = 0; + for (uint p0 = 0; p0 < obs[s0].nprim(); p0++) { + for (uint p1 = 0; p1 < obs[s1].nprim(); p1++) { + for (uint p2 = 0; p2 < obs[s2].nprim(); p2++) { + for (uint p3 = 0; p3 < obs[s3].nprim(); p3++, p0123++) { + const LIBINT2_REF_REALTYPE alpha0 = obs[s0].alpha[p0]; + const LIBINT2_REF_REALTYPE alpha1 = obs[s1].alpha[p1]; + const LIBINT2_REF_REALTYPE alpha2 = obs[s2].alpha[p2]; + const LIBINT2_REF_REALTYPE alpha3 = obs[s3].alpha[p3]; + + const LIBINT2_REF_REALTYPE c0 = obs[s0].contr[0].coeff[p0]; + const LIBINT2_REF_REALTYPE c1 = obs[s1].contr[0].coeff[p1]; + const LIBINT2_REF_REALTYPE c2 = obs[s2].contr[0].coeff[p2]; + const LIBINT2_REF_REALTYPE c3 = obs[s3].contr[0].coeff[p3]; + const LIBINT2_REF_REALTYPE c0123 = c0 * c1 * c2 * c3; + + auto eri_drrrr = [&](der_idx d_rrrr) { + return eri(d_rrrr.data(), l0, m0, n0, alpha0, Aref, l1, + m1, n1, alpha1, Bref, l2, m2, n2, alpha2, Cref, + l3, m3, n3, alpha3, Dref, 0); + }; + + // (LL|SS) + ref_coulomb_opop[0] += + c0123 * + (eri_drrrr(d_xx) + eri_drrrr(d_yy) + eri_drrrr(d_zz)); + ref_coulomb_opop[1] += + c0123 * (eri_drrrr(d_yz) - eri_drrrr(d_zy)); + ref_coulomb_opop[2] += + c0123 * (eri_drrrr(d_zx) - eri_drrrr(d_xz)); + ref_coulomb_opop[3] += + c0123 * (eri_drrrr(d_xy) - eri_drrrr(d_yx)); + + // (SS|SS) + ref_opop_coulomb_opop[0] += + c0123 * + (eri_drrrr(xxxx) + eri_drrrr(yyxx) + eri_drrrr(zzxx) - + eri_drrrr(yxyx) + eri_drrrr(xyyx) + eri_drrrr(yxxy) - + eri_drrrr(xyxy) + eri_drrrr(xxyy) + eri_drrrr(yyyy) + + eri_drrrr(zzyy) + eri_drrrr(xxzz) + eri_drrrr(yyzz) + + eri_drrrr(zzzz)); + ref_opop_coulomb_opop[1] += + c0123 * + (eri_drrrr(zxzx) - eri_drrrr(xzzx) - eri_drrrr(zyzy) + + eri_drrrr(yzzy) - eri_drrrr(zxxz) + eri_drrrr(xzxz) + + eri_drrrr(zyyz) - eri_drrrr(yzyz)); + ref_opop_coulomb_opop[2] += + c0123 * + (-eri_drrrr(zyzx) + eri_drrrr(yzzx) - eri_drrrr(zxzy) + + eri_drrrr(xzzy) + eri_drrrr(zyxz) - eri_drrrr(yzxz) + + eri_drrrr(zxyz) - eri_drrrr(xzyz)); + ref_opop_coulomb_opop[3] += + c0123 * + (-eri_drrrr(yxxx) + eri_drrrr(xyxx) - eri_drrrr(xxyx) - + eri_drrrr(yyyx) - eri_drrrr(zzyx) + eri_drrrr(xxxy) + + eri_drrrr(yyxy) + eri_drrrr(zzxy) - eri_drrrr(yxyy) + + eri_drrrr(xyyy) - eri_drrrr(yxzz) + eri_drrrr(xyzz)); } } } + } - const double ABSOLUTE_DEVIATION_THRESHOLD = 5.0E-14; - const double RELATIVE_DEVIATION_THRESHOLD = - 1.0E-9; // For more detail on choice of these thresholds, see - // the comments in the TEST_CASE "eri geometric - // derivatives" - - std::array abs_errs_llss; - std::array rel_abs_errs_llss; - - std::array abs_errs_ssss; - std::array rel_abs_errs_ssss; - - for (auto comp = 0; comp < 4; ++comp) { - abs_errs_llss[comp] = - abs(ref_coulomb_opop[comp] - results_llss[comp][ijkl]); - rel_abs_errs_llss[comp] = - abs(abs_errs_llss[comp] / ref_coulomb_opop[comp]); - - abs_errs_ssss[comp] = - abs(ref_opop_coulomb_opop[comp] - results_ssss[comp][ijkl]); - rel_abs_errs_ssss[comp] = - abs(abs_errs_ssss[comp] / ref_opop_coulomb_opop[comp]); - - bool llss_not_ok = - rel_abs_errs_llss[comp] > RELATIVE_DEVIATION_THRESHOLD && - abs_errs_llss[comp] > ABSOLUTE_DEVIATION_THRESHOLD; - - bool ssss_not_ok = - rel_abs_errs_ssss[comp] > RELATIVE_DEVIATION_THRESHOLD && - abs_errs_ssss[comp] > ABSOLUTE_DEVIATION_THRESHOLD; - - // no 3^n prefactor here since the intrinsic deriv order is 2 - if (llss_not_ok) { - std::cout << "(l0 l1| l2 l3) = " - << "(" << s0 << " " << s1 << " | " << s2 << " " - << s3 << ") " - << "Elem " << ijkl << " comp= " << comp - << " : ref = " << ref_coulomb_opop[comp] - << " libint = " << results_llss[comp][ijkl] - << " relabs_error = " << rel_abs_errs_llss[comp] - << " abs_error = " << abs_errs_llss[comp] - << std::endl; - } - if (ssss_not_ok) { - std::cout << "(l0 l1| l2 l3) = " - << "(" << s0 << " " << s1 << " | " << s2 << " " - << s3 << ") " - << "Elem " << ijkl << " comp= " << comp - << " : ref = " << ref_opop_coulomb_opop[comp] - << " libint = " << results_ssss[comp][ijkl] - << " relabs_error = " << rel_abs_errs_ssss[comp] - << " abs_error = " << abs_errs_ssss[comp] - << std::endl; - } - REQUIRE(!llss_not_ok); - REQUIRE(!ssss_not_ok); + const double ABSOLUTE_DEVIATION_THRESHOLD = 5.0E-14; + const double RELATIVE_DEVIATION_THRESHOLD = + 1.0E-9; // For more detail on choice of these thresholds, see + // the comments in the TEST_CASE "eri geometric + // derivatives" + + std::array abs_errs_llss; + std::array rel_abs_errs_llss; + + std::array abs_errs_ssss; + std::array rel_abs_errs_ssss; + + for (auto comp = 0; comp < 4; ++comp) { + abs_errs_llss[comp] = + abs(ref_coulomb_opop[comp] - results_llss[comp][ijkl]); + rel_abs_errs_llss[comp] = + abs(abs_errs_llss[comp] / ref_coulomb_opop[comp]); + + abs_errs_ssss[comp] = + abs(ref_opop_coulomb_opop[comp] - results_ssss[comp][ijkl]); + rel_abs_errs_ssss[comp] = + abs(abs_errs_ssss[comp] / ref_opop_coulomb_opop[comp]); + + bool llss_not_ok = + rel_abs_errs_llss[comp] > RELATIVE_DEVIATION_THRESHOLD && + abs_errs_llss[comp] > ABSOLUTE_DEVIATION_THRESHOLD; + + bool ssss_not_ok = + rel_abs_errs_ssss[comp] > RELATIVE_DEVIATION_THRESHOLD && + abs_errs_ssss[comp] > ABSOLUTE_DEVIATION_THRESHOLD; + + // no 3^n prefactor here since the intrinsic deriv order is 2 + if (llss_not_ok) { + std::cout << "(l0 l1| l2 l3) = " + << "(" << s0 << " " << s1 << " | " << s2 << " " << s3 + << ") " + << "Elem " << ijkl << " comp= " << comp + << " : ref = " << ref_coulomb_opop[comp] + << " libint = " << results_llss[comp][ijkl] + << " relabs_error = " << rel_abs_errs_llss[comp] + << " abs_error = " << abs_errs_llss[comp] + << std::endl; } - - ++ijkl; - END_FOR_CART - END_FOR_CART - END_FOR_CART - END_FOR_CART + if (ssss_not_ok) { + std::cout << "(l0 l1| l2 l3) = " + << "(" << s0 << " " << s1 << " | " << s2 << " " << s3 + << ") " + << "Elem " << ijkl << " comp= " << comp + << " : ref = " << ref_opop_coulomb_opop[comp] + << " libint = " << results_ssss[comp][ijkl] + << " relabs_error = " << rel_abs_errs_ssss[comp] + << " abs_error = " << abs_errs_ssss[comp] + << std::endl; + } + REQUIRE(!llss_not_ok); + REQUIRE(!ssss_not_ok); } + + ++ijkl; + END_FOR_CART + END_FOR_CART + END_FOR_CART + END_FOR_CART } } } diff --git a/src/bin/libint/build_libint.cc b/src/bin/libint/build_libint.cc index 4bf4bf383..c2a9de6cd 100644 --- a/src/bin/libint/build_libint.cc +++ b/src/bin/libint/build_libint.cc @@ -1178,8 +1178,8 @@ void build_TwoPRep_2b_2k(std::ostream& os, std::string label, } } - // unroll only if max_am <= cparams->max_am_opt(task) using - // std::max; + // unroll only if max_am <= cparams->max_am_opt(task) + using std::max; const unsigned int max_am = max(max(la, lb), max(lc, ld)); const bool need_to_optimize = (max_am <= cparams->max_am_opt(task)); const auto nopers = descrs.size(); @@ -1262,7 +1262,6 @@ void build_TwoPRep_2b_2k(std::ostream& os, std::string label, abcd_label = abcd->label(); } else { std::ostringstream oss; - oss << cparams->api_prefix(); oss << "_" << a.label() << "_" << b.label(); oss << "_" << label; oss << "_" << c.label() << "_" << d.label(); @@ -1272,7 +1271,7 @@ void build_TwoPRep_2b_2k(std::ostream& os, std::string label, // + derivative level (if deriv_level > 0) std::string eval_label; { - eval_label = cparams->api_prefix(); + eval_label = ""; if (deriv_level != 0) { std::ostringstream oss; oss << "deriv" << deriv_level; @@ -1380,9 +1379,9 @@ void build_TwoPRep_1b_2k(std::ostream& os, LIBINT_SHELL_SET)>::value(lbra, lc, ld)) continue; - // I will use 4-center recurrence relations and integrals, and have - // one center carry an s function unfortunately, depending on the - // direction in which the build goes it must be A(0) or B(1) + // I will use 4-center recurrence relations and integrals, and have one + // center carry an s function unfortunately, depending on the direction + // in which the build goes it must be A(0) or B(1) const unsigned int dummy_center = (LIBINT_SHELL_SET == LIBINT_SHELL_SET_ORCA) ? 0 : 1; @@ -1581,8 +1580,8 @@ void build_TwoPRep_1b_1k(std::ostream& os, const unsigned int dummy_center2 = (LIBINT_SHELL_SET == LIBINT_SHELL_SET_ORCA) ? 2 : 3; - // std::shared_ptr tactic(new ParticleDirectionTactic(lbra > - // lket ? false : true)); + // std::shared_ptr tactic(new ParticleDirectionTactic(lbra > lket + // ? false : true)); std::shared_ptr tactic(new FourCenter_OS_Tactic( dummy_center1 == 0 ? 0 : lbra, dummy_center1 == 1 ? 0 : lbra, dummy_center2 == 2 ? 0 : lket, dummy_center2 == 3 ? 0 : lket)); From 921da582fe6c830bcd142f778d313ad5132a2c04 Mon Sep 17 00:00:00 2001 From: Kshitij Surjuse Date: Sat, 21 Mar 2026 14:00:30 -0400 Subject: [PATCH 14/22] Optimize RKB integral code generation: braket symmetry + disable CSE + progress bar + sign fix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - ShellQuartetSetPredicate: add braket-swap tiebreaker for bra_ket_coswappable operators (σpσpCoulombσpσp). When la+lb == lc+ld, use max(la,lb) <= lc to pick one canonical representative, reducing duplicate quartet generation. - Engine (engine.impl.h): update swap_braket logic for opop_coulomb_opop to match the new predicate tiebreaker. Add coupled-swap sign correction in the swap_braket branch (was missing — exposed by d-shell testing). - build_libint.cc: disable CSE (do_cse/condense_expr) for multi-component operators since their 16 components share no intermediates at the expression level. This eliminates the superlinear optimize_rr_out bottleneck (e.g., 8.8s → 71ms for (ss|ds) prerequisite DAG). - build_libint.cc: fix compilation when only LIBINT_INCLUDE_RKB_ERI is defined (without LIBINT_INCLUDE_ERI): extend #ifdef guards for build_TwoPRep_2b_2k, add forward declaration, move make_descr to detail namespace, use if constexpr for component descriptor construction. - buildtest.h: add CodeGenProgress spinner showing elapsed time, function count, and current task name on stderr during code generation. - int_am.cmake: fix typo in OPT_AM variable reference. --- cmake/modules/int_am.cmake | 2 +- export/tests/unit/test-2body.cc | 126 ++++++--- include/libint2/engine.h | 4 +- include/libint2/engine.impl.h | 117 ++++++-- src/bin/libint/build_libint.cc | 101 ++++--- src/bin/libint/buildtest.h | 50 +++- ...3p\317\203pCoulomb\317\203p\317\203p_11.h" | 259 ++++++++++++++---- src/bin/libint/oper.h | 31 ++- 8 files changed, 522 insertions(+), 168 deletions(-) diff --git a/cmake/modules/int_am.cmake b/cmake/modules/int_am.cmake index cc86b7aa7..350924f49 100644 --- a/cmake/modules/int_am.cmake +++ b/cmake/modules/int_am.cmake @@ -262,7 +262,7 @@ macro(process_integrals_class class) if (LIBINT2_${class}_OPT_AM EQUAL -1) set(LIBINT_${class}_OPT_AM "") else() - set($LIBINT_{class}_OPT_AM ${LIBINT2_${class}_OPT_AM}) + set(LIBINT_${class}_OPT_AM ${LIBINT2_${class}_OPT_AM}) endif() endif() if (LIBINT_OPT_AM_LIST) diff --git a/export/tests/unit/test-2body.cc b/export/tests/unit/test-2body.cc index 2f0383fc1..708190b90 100644 --- a/export/tests/unit/test-2body.cc +++ b/export/tests/unit/test-2body.cc @@ -439,7 +439,7 @@ TEST_CASE("RKB Coulomb integrals", "[engine][2-body]") { const auto &results_ssss = engine_ssss.compute(obs[s0], obs[s1], obs[s2], obs[s3]); assert(results_llss.size() == - 4); // we get 4 buffers for each quaternion component + 4); // 4 buffers for single-spin quaternion components LIBINT2_REF_REALTYPE Aref[3]; for (int i = 0; i < 3; ++i) Aref[i] = obs[s0].O[i]; @@ -466,8 +466,8 @@ TEST_CASE("RKB Coulomb integrals", "[engine][2-body]") { std::array ref_coulomb_opop{0.0, 0.0, 0.0, 0.0}; - std::array ref_opop_coulomb_opop{0.0, 0.0, - 0.0, 0.0}; + std::array ref_opop_coulomb_opop{}; + ref_opop_coulomb_opop.fill(0.0); uint p0123 = 0; for (uint p0 = 0; p0 < obs[s0].nprim(); p0++) { for (uint p1 = 0; p1 < obs[s1].nprim(); p1++) { @@ -490,6 +490,22 @@ TEST_CASE("RKB Coulomb integrals", "[engine][2-body]") { l3, m3, n3, alpha3, Dref, 0); }; + // helper: build der_idx from 4 derivative directions + // (0=x, 1=y, 2=z) for centers A, B, C, D + constexpr int X = 0, Y = 1, Z = 2; + auto didx = [](int a, int b, int c, int d) -> der_idx { + der_idx r = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + r[a] = 1; + r[3 + b] = 1; + r[6 + c] = 1; + r[9 + d] = 1; + return r; + }; + // shorthand: evaluate derivative ERI from 4 directions + auto D = [&](int a, int b, int c, int d) { + return eri_drrrr(didx(a, b, c, d)); + }; + // (LL|SS) ref_coulomb_opop[0] += c0123 * @@ -501,30 +517,60 @@ TEST_CASE("RKB Coulomb integrals", "[engine][2-body]") { ref_coulomb_opop[3] += c0123 * (eri_drrrr(d_xy) - eri_drrrr(d_yx)); - // (SS|SS) + // (SS|SS) — 16 components, Option A: index = 4*bra + ket + // 0:SS 1:SX 2:SY 3:SZ ref_opop_coulomb_opop[0] += - c0123 * - (eri_drrrr(xxxx) + eri_drrrr(yyxx) + eri_drrrr(zzxx) - - eri_drrrr(yxyx) + eri_drrrr(xyyx) + eri_drrrr(yxxy) - - eri_drrrr(xyxy) + eri_drrrr(xxyy) + eri_drrrr(yyyy) + - eri_drrrr(zzyy) + eri_drrrr(xxzz) + eri_drrrr(yyzz) + - eri_drrrr(zzzz)); + c0123 * (D(X, X, X, X) + D(X, X, Y, Y) + D(X, X, Z, Z) + + D(Y, Y, X, X) + D(Y, Y, Y, Y) + D(Y, Y, Z, Z) + + D(Z, Z, X, X) + D(Z, Z, Y, Y) + D(Z, Z, Z, Z)); ref_opop_coulomb_opop[1] += - c0123 * - (eri_drrrr(zxzx) - eri_drrrr(xzzx) - eri_drrrr(zyzy) + - eri_drrrr(yzzy) - eri_drrrr(zxxz) + eri_drrrr(xzxz) + - eri_drrrr(zyyz) - eri_drrrr(yzyz)); + c0123 * (D(X, X, Y, Z) - D(X, X, Z, Y) + D(Y, Y, Y, Z) - + D(Y, Y, Z, Y) + D(Z, Z, Y, Z) - D(Z, Z, Z, Y)); ref_opop_coulomb_opop[2] += - c0123 * - (-eri_drrrr(zyzx) + eri_drrrr(yzzx) - eri_drrrr(zxzy) + - eri_drrrr(xzzy) + eri_drrrr(zyxz) - eri_drrrr(yzxz) + - eri_drrrr(zxyz) - eri_drrrr(xzyz)); + c0123 * (D(X, X, Z, X) - D(X, X, X, Z) + D(Y, Y, Z, X) - + D(Y, Y, X, Z) + D(Z, Z, Z, X) - D(Z, Z, X, Z)); ref_opop_coulomb_opop[3] += - c0123 * - (-eri_drrrr(yxxx) + eri_drrrr(xyxx) - eri_drrrr(xxyx) - - eri_drrrr(yyyx) - eri_drrrr(zzyx) + eri_drrrr(xxxy) + - eri_drrrr(yyxy) + eri_drrrr(zzxy) - eri_drrrr(yxyy) + - eri_drrrr(xyyy) - eri_drrrr(yxzz) + eri_drrrr(xyzz)); + c0123 * (D(X, X, X, Y) - D(X, X, Y, X) + D(Y, Y, X, Y) - + D(Y, Y, Y, X) + D(Z, Z, X, Y) - D(Z, Z, Y, X)); + // 4:XS 5:XX 6:XY 7:XZ + ref_opop_coulomb_opop[4] += + c0123 * (D(Y, Z, X, X) - D(Z, Y, X, X) + D(Y, Z, Y, Y) - + D(Z, Y, Y, Y) + D(Y, Z, Z, Z) - D(Z, Y, Z, Z)); + ref_opop_coulomb_opop[5] += + c0123 * (-D(Y, Z, Y, Z) + D(Y, Z, Z, Y) + + D(Z, Y, Y, Z) - D(Z, Y, Z, Y)); + ref_opop_coulomb_opop[6] += + c0123 * (-D(Y, Z, Z, X) + D(Y, Z, X, Z) + + D(Z, Y, Z, X) - D(Z, Y, X, Z)); + ref_opop_coulomb_opop[7] += + c0123 * (-D(Y, Z, X, Y) + D(Y, Z, Y, X) + + D(Z, Y, X, Y) - D(Z, Y, Y, X)); + // 8:YS 9:YX 10:YY 11:YZ + ref_opop_coulomb_opop[8] += + c0123 * (D(Z, X, X, X) - D(X, Z, X, X) + D(Z, X, Y, Y) - + D(X, Z, Y, Y) + D(Z, X, Z, Z) - D(X, Z, Z, Z)); + ref_opop_coulomb_opop[9] += + c0123 * (-D(Z, X, Y, Z) + D(Z, X, Z, Y) + + D(X, Z, Y, Z) - D(X, Z, Z, Y)); + ref_opop_coulomb_opop[10] += + c0123 * (-D(Z, X, Z, X) + D(Z, X, X, Z) + + D(X, Z, Z, X) - D(X, Z, X, Z)); + ref_opop_coulomb_opop[11] += + c0123 * (-D(Z, X, X, Y) + D(Z, X, Y, X) + + D(X, Z, X, Y) - D(X, Z, Y, X)); + // 12:ZS 13:ZX 14:ZY 15:ZZ + ref_opop_coulomb_opop[12] += + c0123 * (D(X, Y, X, X) - D(Y, X, X, X) + D(X, Y, Y, Y) - + D(Y, X, Y, Y) + D(X, Y, Z, Z) - D(Y, X, Z, Z)); + ref_opop_coulomb_opop[13] += + c0123 * (-D(X, Y, Y, Z) + D(X, Y, Z, Y) + + D(Y, X, Y, Z) - D(Y, X, Z, Y)); + ref_opop_coulomb_opop[14] += + c0123 * (-D(X, Y, Z, X) + D(X, Y, X, Z) + + D(Y, X, Z, X) - D(Y, X, X, Z)); + ref_opop_coulomb_opop[15] += + c0123 * (-D(X, Y, X, Y) + D(X, Y, Y, X) + + D(Y, X, X, Y) - D(Y, X, Y, X)); } } } @@ -539,29 +585,17 @@ TEST_CASE("RKB Coulomb integrals", "[engine][2-body]") { std::array abs_errs_llss; std::array rel_abs_errs_llss; - std::array abs_errs_ssss; - std::array rel_abs_errs_ssss; - + // (LL|SS) has 4 components for (auto comp = 0; comp < 4; ++comp) { abs_errs_llss[comp] = abs(ref_coulomb_opop[comp] - results_llss[comp][ijkl]); rel_abs_errs_llss[comp] = abs(abs_errs_llss[comp] / ref_coulomb_opop[comp]); - abs_errs_ssss[comp] = - abs(ref_opop_coulomb_opop[comp] - results_ssss[comp][ijkl]); - rel_abs_errs_ssss[comp] = - abs(abs_errs_ssss[comp] / ref_opop_coulomb_opop[comp]); - bool llss_not_ok = rel_abs_errs_llss[comp] > RELATIVE_DEVIATION_THRESHOLD && abs_errs_llss[comp] > ABSOLUTE_DEVIATION_THRESHOLD; - bool ssss_not_ok = - rel_abs_errs_ssss[comp] > RELATIVE_DEVIATION_THRESHOLD && - abs_errs_ssss[comp] > ABSOLUTE_DEVIATION_THRESHOLD; - - // no 3^n prefactor here since the intrinsic deriv order is 2 if (llss_not_ok) { std::cout << "(l0 l1| l2 l3) = " << "(" << s0 << " " << s1 << " | " << s2 << " " << s3 @@ -573,6 +607,20 @@ TEST_CASE("RKB Coulomb integrals", "[engine][2-body]") { << " abs_error = " << abs_errs_llss[comp] << std::endl; } + REQUIRE(!llss_not_ok); + } + + // (SS|SS) has 16 components (two independent spin spaces) + for (auto comp = 0; comp < 16; ++comp) { + auto abs_err_ssss = + abs(ref_opop_coulomb_opop[comp] - results_ssss[comp][ijkl]); + auto rel_abs_err_ssss = + abs(abs_err_ssss / ref_opop_coulomb_opop[comp]); + + bool ssss_not_ok = + rel_abs_err_ssss > RELATIVE_DEVIATION_THRESHOLD && + abs_err_ssss > ABSOLUTE_DEVIATION_THRESHOLD; + if (ssss_not_ok) { std::cout << "(l0 l1| l2 l3) = " << "(" << s0 << " " << s1 << " | " << s2 << " " << s3 @@ -580,11 +628,9 @@ TEST_CASE("RKB Coulomb integrals", "[engine][2-body]") { << "Elem " << ijkl << " comp= " << comp << " : ref = " << ref_opop_coulomb_opop[comp] << " libint = " << results_ssss[comp][ijkl] - << " relabs_error = " << rel_abs_errs_ssss[comp] - << " abs_error = " << abs_errs_ssss[comp] - << std::endl; + << " relabs_error = " << rel_abs_err_ssss + << " abs_error = " << abs_err_ssss << std::endl; } - REQUIRE(!llss_not_ok); REQUIRE(!ssss_not_ok); } diff --git a/include/libint2/engine.h b/include/libint2/engine.h index 94077da63..fae7d1785 100644 --- a/include/libint2/engine.h +++ b/include/libint2/engine.h @@ -364,7 +364,9 @@ struct operator_traits template <> struct operator_traits : public operator_traits { - static constexpr auto nopers = 4; + /// 16 components: tensor product of two independent spin-space quaternions + /// index = 4 * bra_spin + ket_spin, where spin in {S=0, X=1, Y=2, Z=3} + static constexpr auto nopers = 16; static constexpr auto intrinsic_deriv_order = 4; }; diff --git a/include/libint2/engine.impl.h b/include/libint2/engine.impl.h index a39244e7e..ff143d254 100644 --- a/include/libint2/engine.impl.h +++ b/include/libint2/engine.impl.h @@ -1216,17 +1216,39 @@ __libint2_engine_inline const Engine::target_ptr_vec& Engine::compute2( #if LIBINT2_SHELLQUARTET_SET == \ LIBINT2_SHELLQUARTET_SET_STANDARD // standard angular momentum ordering - const auto swap_braket = ((braket_ == BraKet::xx_xx) && - (tbra1.contr[0].l + tbra2.contr[0].l > - tket1.contr[0].l + tket2.contr[0].l) && - (oper_ != Operator::coulomb_opop)) || - braket_ == BraKet::xx_xs; + bool swap_braket; bool swap_tbra, swap_tket; if (oper_ == Operator::opop_coulomb_opop) { - bool swap_p1p2 = swap_braket ? (tbra1.contr[0].l < tbra2.contr[0].l) - : (tket1.contr[0].l < tket2.contr[0].l); - swap_tbra = swap_tket = swap_p1p2; + // For σpσpCoulombσpσp: (ab|cd) = (cd|ab) = (ba|dc)* = (dc|ba)* + // Canonical form: lc >= ld (or la >= lb when lc == ld), + // la+lb <= lc+ld (or max(la,lb) <= lc when sums equal) + const auto bra_total = tbra1.contr[0].l + tbra2.contr[0].l; + const auto ket_total = tket1.contr[0].l + tket2.contr[0].l; + const auto bra_max = std::max(tbra1.contr[0].l, tbra2.contr[0].l); + const auto ket_max = std::max(tket1.contr[0].l, tket2.contr[0].l); + swap_braket = ((braket_ == BraKet::xx_xx) && + (bra_total > ket_total || + (bra_total == ket_total && bra_max > ket_max))) || + braket_ == BraKet::xx_xs; + // Coupled swap: after braket swap, sort the pair that ends up in ket + // position to ensure lc >= ld; when lc == ld, also sort bra (la >= lb) + if (swap_braket) { + // After braket swap: new ket = original bra, new bra = original ket. + // Coupled swap sorts new ket (ensure lc >= ld). + const bool swap_p1p2 = (tbra1.contr[0].l < tbra2.contr[0].l); + swap_tbra = swap_tket = swap_p1p2; + } else { + // No braket swap: ket stays as original ket. + // Coupled swap sorts ket (ensure lc >= ld). + const bool swap_p1p2 = (tket1.contr[0].l < tket2.contr[0].l); + swap_tbra = swap_tket = swap_p1p2; + } } else { + swap_braket = ((braket_ == BraKet::xx_xx) && + (tbra1.contr[0].l + tbra2.contr[0].l > + tket1.contr[0].l + tket2.contr[0].l) && + (oper_ != Operator::coulomb_opop)) || + braket_ == BraKet::xx_xs; swap_tbra = (tbra1.contr[0].l < tbra2.contr[0].l); swap_tket = (tket1.contr[0].l < tket2.contr[0].l); } @@ -1234,15 +1256,35 @@ __libint2_engine_inline const Engine::target_ptr_vec& Engine::compute2( // N.B. cannot swap bra and ket for coulomb_opop since the ket is mutated by // this operator #else // orca angular momentum ordering - const auto swap_tbra = (tbra1.contr[0].l > tbra2.contr[0].l); - const auto swap_tket = (tket1.contr[0].l > tket2.contr[0].l); - const auto swap_braket = ((braket_ == BraKet::xx_xx) && - (tbra1.contr[0].l + tbra2.contr[0].l < - tket1.contr[0].l + tket2.contr[0].l) && - (oper_ != Operator::coulomb_opop)) || - braket_ == BraKet::xx_xs; - assert(false && "feature not implemented"); - abort(); + bool swap_braket; + bool swap_tbra, swap_tket; + if (oper_ == Operator::opop_coulomb_opop) { + // ORCA canonical for σpσpCoulombσpσp: lc <= ld (or la <= lb when lc == ld), + // la+lb >= lc+ld (or min(la,lb) >= lc when sums equal) + const auto bra_total = tbra1.contr[0].l + tbra2.contr[0].l; + const auto ket_total = tket1.contr[0].l + tket2.contr[0].l; + const auto bra_min = std::min(tbra1.contr[0].l, tbra2.contr[0].l); + const auto ket_min = std::min(tket1.contr[0].l, tket2.contr[0].l); + swap_braket = ((braket_ == BraKet::xx_xx) && + (bra_total < ket_total || + (bra_total == ket_total && bra_min < ket_min))) || + braket_ == BraKet::xx_xs; + if (swap_braket) { + const bool swap_p1p2 = (tbra1.contr[0].l > tbra2.contr[0].l); + swap_tbra = swap_tket = swap_p1p2; + } else { + const bool swap_p1p2 = (tket1.contr[0].l > tket2.contr[0].l); + swap_tbra = swap_tket = swap_p1p2; + } + } else { + swap_tbra = (tbra1.contr[0].l > tbra2.contr[0].l); + swap_tket = (tket1.contr[0].l > tket2.contr[0].l); + swap_braket = ((braket_ == BraKet::xx_xx) && + (tbra1.contr[0].l + tbra2.contr[0].l < + tket1.contr[0].l + tket2.contr[0].l) && + (oper_ != Operator::coulomb_opop)) || + braket_ == BraKet::xx_xs; + } #endif const auto& bra1 = swap_braket ? (swap_tket ? tket2 : tket1) : (swap_tbra ? tbra2 : tbra1); @@ -2124,10 +2166,21 @@ __libint2_engine_inline const Engine::target_ptr_vec& Engine::compute2( tgt_ptr + tgt_col_idx, nr1_tgt, nr2_tgt, Eigen::Stride( nr2_tgt * ncol_tgt, ncol_tgt)); + // Coupled swap sign correction for multi-component operators + Shell::real_t oper_cart_component_phase = 1.0; + if (swap_tket && oper_ == Operator::opop_coulomb_opop) { + const bool bra_is_spin = (s / 4) > 0; + const bool ket_is_spin = (s % 4) > 0; + if (bra_is_spin != ket_is_spin) + oper_cart_component_phase = -1.0; + } + if (swap_tket && oper_ == Operator::coulomb_opop && s > 0) + oper_cart_component_phase = -1.0; if (swap_tbra) - tgt_blk_mat = src_blk_mat.transpose(); + tgt_blk_mat = + oper_cart_component_phase * src_blk_mat.transpose(); else - tgt_blk_mat = src_blk_mat; + tgt_blk_mat = oper_cart_component_phase * src_blk_mat; } else { // source row {r1,r2} is mapped to target row {r1,r2} if // !swap_tbra, else to {r2,r1} @@ -2136,10 +2189,16 @@ __libint2_engine_inline const Engine::target_ptr_vec& Engine::compute2( Map tgt_blk_mat(tgt_ptr + tgt_row_idx * ncol, nc1_tgt, nc2_tgt); if (swap_tket) { Shell::real_t oper_cart_component_phase = 1.0; - if (oper_ == Operator::opop_coulomb_opop && s == 3) - oper_cart_component_phase = - -1.0; // z quaternion components flip sign on - // swapping ket + if (oper_ == Operator::opop_coulomb_opop) { + // Option A ordering: index = 4*bra + ket + // Coupled swap (a<->b AND c<->d) flips sign when exactly + // one of bra/ket is a cross product (spin != S): + // bra_spin = s/4, ket_spin = s%4 (0=S, 1-3=X/Y/Z) + const bool bra_is_spin = (s / 4) > 0; + const bool ket_is_spin = (s % 4) > 0; + if (bra_is_spin != ket_is_spin) + oper_cart_component_phase = -1.0; + } if (oper_ == Operator::coulomb_opop && s > 0) oper_cart_component_phase = -1.0; // x,y,z quaternion components flip sign on @@ -2171,7 +2230,17 @@ __libint2_engine_inline const Engine::target_ptr_vec& Engine::compute2( // to primdata_[0].targets targets_[s_target] = source; } - } // loop over shellsets + } // loop over shellsets + + // For opop_coulomb_opop with swap_braket: swapping particles remaps + // component (α,β) → (β,α). With Option A ordering (index=4*bra+ket), + // this is a matrix transpose: s_new = 4*(s%4) + (s/4). + if (permute && oper_ == Operator::opop_coulomb_opop && swap_braket) { + std::array temp; + for (auto s = 0; s != ntargets; ++s) temp[s] = targets_[s]; + for (auto s = 0; s != ntargets; ++s) + targets_[4 * (s % 4) + (s / 4)] = temp[s]; + } } // if need_scratch => needed to transpose and/or tform else { // did not use scratch? may still need to update targets_ if (set_targets_) { diff --git a/src/bin/libint/build_libint.cc b/src/bin/libint/build_libint.cc index c2a9de6cd..4e9b10152 100644 --- a/src/bin/libint/build_libint.cc +++ b/src/bin/libint/build_libint.cc @@ -31,7 +31,9 @@ */ #include +#include #include +#include #include #include #include @@ -66,6 +68,8 @@ using namespace std; using namespace libint2; +CodeGenProgress g_progress; + enum ShellSetType { ShellSetType_Standard = LIBINT_SHELL_SET_STANDARD, ShellSetType_ORCA = LIBINT_SHELL_SET_ORCA @@ -90,7 +94,8 @@ struct ShellQuartetSetPredicate { static bool value(int la, int lb, int lc, int ld, bool p1p2_swappable = true, bool bra_ket_coswappable = false) { if (bra_ket_coswappable) - return (la + lb <= lc + ld) && lc >= ld; + return lc >= ld && (la + lb < lc + ld || + (la + lb == lc + ld && std::max(la, lb) <= lc)); else return la >= lb && lc >= ld && (!p1p2_swappable || la + lb <= lc + ld); } @@ -100,7 +105,8 @@ struct ShellQuartetSetPredicate { static bool value(int la, int lb, int lc, int ld, bool p1p2_swappable = true, bool bra_ket_coswappable = false) { if (bra_ket_coswappable) - return (la < lc || (la == lc && lb <= ld)); + return lc <= ld && (la + lb > lc + ld || + (la + lb == lc + ld && std::min(la, lb) >= lc)); else return la <= lb && lc <= ld && (!p1p2_swappable || (la < lc || (la == lc && lb <= ld))); @@ -211,7 +217,9 @@ static void config_to_api(const std::shared_ptr& cparams, #ifdef LIBINT_INCLUDE_ERI #define USE_GENERIC_ERI_BUILD 1 -#if !USE_GENERIC_ERI_BUILD +#endif +#if defined(LIBINT_INCLUDE_ERI) || defined(LIBINT_INCLUDE_RKB_ERI) +#if defined(USE_GENERIC_ERI_BUILD) && !USE_GENERIC_ERI_BUILD template static void build_TwoPRep_2b_2k( std::ostream& os, std::string label, @@ -222,7 +230,7 @@ template static void build_TwoPRep_2b_2k( std::ostream& os, std::string label, const std::shared_ptr& cparams, - std::shared_ptr& iface, unsigned int deriv_level); + std::shared_ptr& iface, unsigned int deriv_level = 0); #endif #endif @@ -272,6 +280,8 @@ struct AuxQuantaType { typedef EmptySet type; }; +} // namespace + template OperDescrType make_descr(int, int = 0, int = 0) { return OperDescrType(); @@ -308,8 +318,6 @@ template <> return σpσpCoulombσpσp_Descr(p); } -} // namespace - template void build_onebody_1b_1k(std::ostream& os, std::string label, const std::shared_ptr& cparams, @@ -514,8 +522,8 @@ void build_onebody_1b_1k(std::ostream& os, std::string label, eval_label = oss.str(); } - std::cout << "working on " << eval_label << " ... "; - std::cout.flush(); + g_progress.current_task = eval_label; + g_progress.print(); std::string prefix(cparams->source_directory()); std::deque decl_filenames; @@ -555,8 +563,6 @@ void build_onebody_1b_1k(std::ostream& os, std::string label, dg->reset(); memman->reset(); - std::cout << "done" << std::endl; - } // end of b loop } // end of a loop } @@ -962,6 +968,8 @@ void try_main(int argc, char* argv[]) { #endif cparams->print(os); + g_progress.start(); + #ifdef LIBINT_INCLUDE_ONEBODY for (unsigned int d = 0; d <= LIBINT_INCLUDE_ONEBODY; ++d) { #define BOOST_PP_ONEBODY_MCR7(r, data, i, elem) \ @@ -1020,6 +1028,8 @@ void try_main(int argc, char* argv[]) { build_G12DKH_2b_2k(os, cparams, iface); #endif + g_progress.finish(); + // Generate code for the set-level RRs std::deque decl_filenames, def_filenames; generate_rr_code(os, cparams, decl_filenames, def_filenames); @@ -1101,12 +1111,12 @@ void print_config(std::ostream& os) { #endif } -#ifdef LIBINT_INCLUDE_ERI +#if defined(LIBINT_INCLUDE_ERI) || defined(LIBINT_INCLUDE_RKB_ERI) template -void build_TwoPRep_2b_2k(std::ostream& os, std::string label, - const std::shared_ptr& cparams, - std::shared_ptr& iface, - unsigned int deriv_level) { +static void build_TwoPRep_2b_2k( + std::ostream& os, std::string label, + const std::shared_ptr& cparams, + std::shared_ptr& iface, unsigned int deriv_level) { typedef GenIntegralSet_11_11 TwoBody_sh_11_11; typedef typename OperType::Descriptor OperDescrType; @@ -1168,13 +1178,20 @@ void build_TwoPRep_2b_2k(std::ostream& os, std::string label, // loop over operator components ///////////////////////////////// std::vector descrs(1); - if (std::is_same::value || - std::is_same::value) { + if constexpr (std::is_same::value) { // reset descriptors array descrs.resize(0); - // iterate over quaternion components + // iterate over 4 quaternion components (single spin space) for (int p = 0; p != 4; ++p) { - descrs.emplace_back(make_descr(p)); + descrs.emplace_back(OperDescrType(p)); + } + } + if constexpr (std::is_same::value) { + // reset descriptors array + descrs.resize(0); + // iterate over 16 components (tensor product of two spin spaces) + for (int p = 0; p != 16; ++p) { + descrs.emplace_back(OperDescrType(p)); } } @@ -1192,10 +1209,14 @@ void build_TwoPRep_2b_2k(std::ostream& os, std::string label, ? std::numeric_limits::max() : 0; dg_xxxx->registry()->unroll_threshold(unroll_threshold); - dg_xxxx->registry()->do_cse(need_to_optimize); - dg_xxxx->registry()->condense_expr(condense_expr( - cparams->unroll_threshold(), cparams->max_vector_length() > 1)); - // dg_xxxx->registry()->condense_expr(true); + // For multi-component operators (RKB), components share no + // intermediates, so CSE/condense_expr is pure overhead — disable. + const bool do_optimize = (nopers > 1) ? false : need_to_optimize; + dg_xxxx->registry()->do_cse(do_optimize); + dg_xxxx->registry()->condense_expr( + do_optimize ? condense_expr(cparams->unroll_threshold(), + cparams->max_vector_length() > 1) + : false); // Need to accumulate integrals? dg_xxxx->registry()->accumulate_targets( cparams->accumulate_targets()); @@ -1238,13 +1259,6 @@ void build_TwoPRep_2b_2k(std::ostream& os, std::string label, last_deriv = diter.last(); if (!last_deriv) diter.next(); } while (!last_deriv); - // append all derivatives as targets to the graph - for (auto it = targets.begin(); it != targets.end(); ++it) { - std::shared_ptr t_ptr = - std::dynamic_pointer_cast(*it); - dg_xxxx->append_target(t_ptr); - } - // make label that characterizes this set of targets // use the label of the nondifferentiated integral as a base std::string abcd_label; @@ -1280,13 +1294,20 @@ void build_TwoPRep_2b_2k(std::ostream& os, std::string label, eval_label += abcd_label; } - std::cout << "working on " << eval_label << " ... "; - std::cout.flush(); + g_progress.current_task = eval_label; + g_progress.print(); std::string prefix(cparams->source_directory()); std::deque decl_filenames; std::deque def_filenames; + // append all targets to the graph + for (auto it = targets.begin(); it != targets.end(); ++it) { + std::shared_ptr t_ptr = + std::dynamic_pointer_cast(*it); + dg_xxxx->append_target(t_ptr); + } + // this will generate code for these targets, and potentially // generate code for its prerequisites GenerateCode(dg_xxxx, context, cparams, strat, tactic, memman, @@ -1322,7 +1343,8 @@ void build_TwoPRep_2b_2k(std::ostream& os, std::string label, dg_xxxx->reset(); memman->reset(); - std::cout << "done" << std::endl; + ++g_progress.done; + g_progress.print(); } // end of d loop } // end of c loop @@ -1330,7 +1352,7 @@ void build_TwoPRep_2b_2k(std::ostream& os, std::string label, } // end of a loop } -#endif // LIBINT_INCLUDE_ERI +#endif // LIBINT_INCLUDE_ERI || LIBINT_INCLUDE_RKB_ERI #ifdef LIBINT_INCLUDE_ERI3 @@ -1490,8 +1512,8 @@ void build_TwoPRep_1b_2k(std::ostream& os, label += abcd_label; } - std::cout << "working on " << label << " ... "; - std::cout.flush(); + g_progress.current_task = label; + g_progress.print(); std::string prefix(cparams->source_directory()); std::deque decl_filenames; @@ -1530,7 +1552,8 @@ void build_TwoPRep_1b_2k(std::ostream& os, #endif dg_xxx->reset(); memman->reset(); - std::cout << "done" << std::endl; + ++g_progress.done; + g_progress.print(); } // end of d loop } // end of c loop } // end of bra loop @@ -1687,7 +1710,8 @@ void build_TwoPRep_1b_1k(std::ostream& os, label += abcd_label; } - std::cout << "working on " << label << " ... "; + g_progress.current_task = label; + g_progress.print(); std::cout.flush(); std::string prefix(cparams->source_directory()); @@ -1727,7 +1751,6 @@ void build_TwoPRep_1b_1k(std::ostream& os, #endif dg_xxx->reset(); memman->reset(); - std::cout << "done" << std::endl; } // end of ket loop } // end of bra loop } diff --git a/src/bin/libint/buildtest.h b/src/bin/libint/buildtest.h index a4923c022..0ea5b571e 100644 --- a/src/bin/libint/buildtest.h +++ b/src/bin/libint/buildtest.h @@ -30,13 +30,57 @@ #include #include +#include #include #include +#include #include #include #include #include +/// Progress tracker for code generation. +struct CodeGenProgress { + unsigned int done = 0; + std::string current_task; + std::chrono::steady_clock::time_point start_time; + bool started = false; + + void start() { + start_time = std::chrono::steady_clock::now(); + started = true; + } + + void print() const { + if (!started) return; + static const char spinner[] = "|/-\\"; + const auto elapsed = std::chrono::duration_cast( + std::chrono::steady_clock::now() - start_time) + .count(); + const auto mins = elapsed / 60; + const auto secs = elapsed % 60; + std::cerr << "\r " << spinner[done % 4] << " " << std::setfill('0') + << std::setw(2) << mins << ":" << std::setw(2) << secs + << std::setfill(' ') << " [" << done << " functions generated] " + << current_task << " " << std::flush; + } + + void finish() { + if (!started) return; + const auto elapsed = std::chrono::duration_cast( + std::chrono::steady_clock::now() - start_time) + .count(); + const auto mins = elapsed / 60; + const auto secs = elapsed % 60; + std::cerr << "\r done " << std::setfill('0') << std::setw(2) << mins + << ":" << std::setw(2) << secs << std::setfill(' ') << " [" + << done << " functions generated]" + << " " << std::endl; + started = false; + } +}; +extern CodeGenProgress g_progress; + namespace libint2 { // defined in buildtest.cc @@ -263,7 +307,6 @@ void GenerateCode(const std::shared_ptr& dg, // if there are missing prerequisites -- make a list of them PrerequisitesExtractor pe; if (dg->missing_prerequisites()) { - // std::cout << "missing some prerequisites!" << std::endl; dg->foreach (pe); } std::deque > prereq_list = pe.vertices; @@ -296,6 +339,11 @@ void GenerateCode(const std::shared_ptr& dg, // extract all external symbols extract_symbols(dg); + // Update progress + ++g_progress.done; + g_progress.current_task = label; + g_progress.print(); + #if PRINT_DAG_GRAPHVIZ { std::basic_ofstream dotfile(dg->label() + ".symb.dot"); diff --git "a/src/bin/libint/comp_11_\317\203p\317\203pCoulomb\317\203p\317\203p_11.h" "b/src/bin/libint/comp_11_\317\203p\317\203pCoulomb\317\203p\317\203p_11.h" index 7bf0a4b9a..606418d0b 100644 --- "a/src/bin/libint/comp_11_\317\203p\317\203pCoulomb\317\203p\317\203p_11.h" +++ "b/src/bin/libint/comp_11_\317\203p\317\203pCoulomb\317\203p\317\203p_11.h" @@ -28,10 +28,38 @@ namespace libint2 { /** - * this computes integral of - * \sigma \cdot \hat{p}_1 \sigma \cdot \hat{p}_2 \f$ \frac{1}{r_{ij}} \sigma - * \cdot \hat{p}_3 \sigma \cdot \hat{p}_4 \f$ over CGShell/CGF by rewriting it - * as a linear combination of integrals over derivatives of \frac{1}{r_{ij}} + * Computes integral of + * \f$ (\sigma_1 \cdot \hat{p}_a)(\sigma_1 \cdot \hat{p}_b) + * \frac{1}{r_{12}} + * (\sigma_2 \cdot \hat{p}_c)(\sigma_2 \cdot \hat{p}_d) \f$ + * over CGShell/CGF by rewriting it as a linear combination of integrals + * over derivatives of \f$ \frac{1}{r_{12}} \f$. + * + * The two sigma operators act on independent spin spaces (electron 1 and + * electron 2). Using the Dirac identity (see e.g. Eq. 1.27 of I. P. Grant, + * "Relativistic Quantum Theory of Atoms and Molecules", Springer, 2007): + * \f$ (\sigma \cdot a)(\sigma \cdot b) = (a \cdot b)I + * + i\sigma \cdot (a \times b) \f$ + * applied independently to each particle's spin space gives a tensor product + * of two quaternions with \f$ 4 \times 4 = 16 \f$ components: + * + * index = 4 * bra_spin_index + ket_spin_index + * + * where spin indices are: 0=S (scalar/dot product), 1=X, 2=Y, 3=Z + * (cross product components). + * + * The 16 components map to: + * T1 (index 0): SS = (a.b)(c.d) [scalar x scalar] + * T2 (indices 1-3): SX,SY,SZ = (a.b)(cxd)_{x,y,z} [scalar x spin] + * T3 (indices 4-6): XS,YS,ZS = (axb)_{x,y,z}(c.d) [spin x scalar] + * T4 (indices 7-15): XX..ZZ = -(axb)_i(cxd)_j [spin x spin] + * + * Sign convention: T4 components include the minus sign from \f$ i^2 = -1 \f$, + * arising from the product of two \f$ i \f$ factors in the Dirac identity: + * \f$ [i\sigma_1 \cdot (a \times b)] \otimes [i\sigma_2 \cdot (c \times d)] + * = -\sigma_{1,i} \otimes \sigma_{2,j}\; (a \times b)_i\, (c \times d)_j + * \f$ + * * @tparam F basis function type. valid choices are CGShell or CGF */ template @@ -113,81 +141,216 @@ CR_11_σpσpCoulombσpσp_11::CR_11_σpσpCoulombσpσp_11( return factory.make_child(a_r1, b_r2, c_r3, d_r4, zero_m); }; - // Component wise generation for quaternion : - // ( (σ.p) a (σ.p) b | 1/r12 | (σ.p) c (σ.p) d ) + // 16-component generation for two independent spin spaces: + // ( (σ₁.p) a (σ₁.p) b | 1/r12 | (σ₂.p) c (σ₂.p) d ) + // + // Option A (tensor product) ordering: index = 4 * bra_spin + ket_spin + // bra_spin = index / 4, ket_spin = index % 4 + // spin indices: S=0, X=1, Y=2, Z=3 + // + // Row bra=S: 0=SS, 1=SX, 2=SY, 3=SZ (T1 + T2) + // Row bra=X: 4=XS, 5=XX, 6=XY, 7=XZ (T3 + T4) + // Row bra=Y: 8=YS, 9=YX, 10=YY, 11=YZ (T3 + T4) + // Row bra=Z: 12=ZS, 13=ZX, 14=ZY, 15=ZZ (T3 + T4) + // + // T4 components include minus sign from i^2 = -1. switch (oper->descr().quaternion_index()) { + // ===== 0: SS = (a.b)(c.d) ===== case 0: { auto xxxx = mc(x, x, x, x); - auto yyxx = mc(y, y, x, x); - auto zzxx = mc(z, z, x, x); - auto yxyx = mc(y, x, y, x); - auto xyyx = mc(x, y, y, x); - auto yxxy = mc(y, x, x, y); - auto xyxy = mc(x, y, x, y); auto xxyy = mc(x, x, y, y); - auto yyyy = mc(y, y, y, y); - auto zzyy = mc(z, z, y, y); auto xxzz = mc(x, x, z, z); + auto yyxx = mc(y, y, x, x); + auto yyyy = mc(y, y, y, y); auto yyzz = mc(y, y, z, z); + auto zzxx = mc(z, z, x, x); + auto zzyy = mc(z, z, y, y); auto zzzz = mc(z, z, z, z); if (is_simple()) { - expr_ = xxxx + yyxx + zzxx - yxyx + xyyx + yxxy - xyxy + xxyy + yyyy + - zzyy + xxzz + yyzz + zzzz; - nflops_ += 12; + expr_ = xxxx + xxyy + xxzz + yyxx + yyyy + yyzz + zzxx + zzyy + zzzz; + nflops_ += 8; } } break; + // ===== 1: SX = (a.b)(c×d)_x ===== case 1: { - auto zxzx = mc(z, x, z, x); - auto xzzx = mc(x, z, z, x); - auto zyzy = mc(z, y, z, y); + auto xxyz = mc(x, x, y, z); + auto xxzy = mc(x, x, z, y); + auto yyyz = mc(y, y, y, z); + auto yyzy = mc(y, y, z, y); + auto zzyz = mc(z, z, y, z); + auto zzzy = mc(z, z, z, y); + if (is_simple()) { + expr_ = xxyz - xxzy + yyyz - yyzy + zzyz - zzzy; + nflops_ += 5; + } + } break; + // ===== 2: SY = (a.b)(c×d)_y ===== + case 2: { + auto xxzx = mc(x, x, z, x); + auto xxxz = mc(x, x, x, z); + auto yyzx = mc(y, y, z, x); + auto yyxz = mc(y, y, x, z); + auto zzzx = mc(z, z, z, x); + auto zzxz = mc(z, z, x, z); + if (is_simple()) { + expr_ = xxzx - xxxz + yyzx - yyxz + zzzx - zzxz; + nflops_ += 5; + } + } break; + // ===== 3: SZ = (a.b)(c×d)_z ===== + case 3: { + auto xxxy = mc(x, x, x, y); + auto xxyx = mc(x, x, y, x); + auto yyxy = mc(y, y, x, y); + auto yyyx = mc(y, y, y, x); + auto zzxy = mc(z, z, x, y); + auto zzyx = mc(z, z, y, x); + if (is_simple()) { + expr_ = xxxy - xxyx + yyxy - yyyx + zzxy - zzyx; + nflops_ += 5; + } + } break; + // ===== 4: XS = (a×b)_x(c.d) ===== + case 4: { + auto yzxx = mc(y, z, x, x); + auto zyxx = mc(z, y, x, x); + auto yzyy = mc(y, z, y, y); + auto zyyy = mc(z, y, y, y); + auto yzzz = mc(y, z, z, z); + auto zyzz = mc(z, y, z, z); + if (is_simple()) { + expr_ = yzxx - zyxx + yzyy - zyyy + yzzz - zyzz; + nflops_ += 5; + } + } break; + // ===== 5: XX = -(a×b)_x(c×d)_x (minus from i²=-1) ===== + case 5: { + auto yzyz = mc(y, z, y, z); auto yzzy = mc(y, z, z, y); - auto zxxz = mc(z, x, x, z); - auto xzxz = mc(x, z, x, z); auto zyyz = mc(z, y, y, z); - auto yzyz = mc(y, z, y, z); + auto zyzy = mc(z, y, z, y); if (is_simple()) { - expr_ = zxzx - xzzx - zyzy + yzzy - zxxz + xzxz + zyyz - yzyz; - nflops_ += 7; + expr_ = yzzy - yzyz + zyyz - zyzy; + nflops_ += 3; } } break; - case 2: { - auto zyzx = mc(z, y, z, x); + // ===== 6: XY = -(a×b)_x(c×d)_y ===== + case 6: { auto yzzx = mc(y, z, z, x); - auto zxzy = mc(z, x, z, y); - auto xzzy = mc(x, z, z, y); - auto zyxz = mc(z, y, x, z); auto yzxz = mc(y, z, x, z); + auto zyzx = mc(z, y, z, x); + auto zyxz = mc(z, y, x, z); + if (is_simple()) { + expr_ = yzxz - yzzx + zyzx - zyxz; + nflops_ += 3; + } + } break; + // ===== 7: XZ = -(a×b)_x(c×d)_z ===== + case 7: { + auto yzxy = mc(y, z, x, y); + auto yzyx = mc(y, z, y, x); + auto zyxy = mc(z, y, x, y); + auto zyyx = mc(z, y, y, x); + if (is_simple()) { + expr_ = yzyx - yzxy + zyxy - zyyx; + nflops_ += 3; + } + } break; + // ===== 8: YS = (a×b)_y(c.d) ===== + case 8: { + auto zxxx = mc(z, x, x, x); + auto xzxx = mc(x, z, x, x); + auto zxyy = mc(z, x, y, y); + auto xzyy = mc(x, z, y, y); + auto zxzz = mc(z, x, z, z); + auto xzzz = mc(x, z, z, z); + if (is_simple()) { + expr_ = zxxx - xzxx + zxyy - xzyy + zxzz - xzzz; + nflops_ += 5; + } + } break; + // ===== 9: YX = -(a×b)_y(c×d)_x ===== + case 9: { auto zxyz = mc(z, x, y, z); + auto zxzy = mc(z, x, z, y); auto xzyz = mc(x, z, y, z); + auto xzzy = mc(x, z, z, y); if (is_simple()) { - // swapped order of first two terms compiler does not like negative sign - // in front of first term - expr_ = yzzx - zyzx - zxzy + xzzy + zyxz - yzxz + zxyz - xzyz; - nflops_ += 7; + expr_ = zxzy - zxyz + xzyz - xzzy; + nflops_ += 3; } } break; - case 3: { - auto yxxx = mc(y, x, x, x); + // ===== 10: YY = -(a×b)_y(c×d)_y ===== + case 10: { + auto zxzx = mc(z, x, z, x); + auto zxxz = mc(z, x, x, z); + auto xzzx = mc(x, z, z, x); + auto xzxz = mc(x, z, x, z); + if (is_simple()) { + expr_ = zxxz - zxzx + xzzx - xzxz; + nflops_ += 3; + } + } break; + // ===== 11: YZ = -(a×b)_y(c×d)_z ===== + case 11: { + auto zxxy = mc(z, x, x, y); + auto zxyx = mc(z, x, y, x); + auto xzxy = mc(x, z, x, y); + auto xzyx = mc(x, z, y, x); + if (is_simple()) { + expr_ = zxyx - zxxy + xzxy - xzyx; + nflops_ += 3; + } + } break; + // ===== 12: ZS = (a×b)_z(c.d) ===== + case 12: { auto xyxx = mc(x, y, x, x); - auto xxyx = mc(x, x, y, x); - auto yyyx = mc(y, y, y, x); - auto zzyx = mc(z, z, y, x); - auto xxxy = mc(x, x, x, y); - auto yyxy = mc(y, y, x, y); - auto zzxy = mc(z, z, x, y); - auto yxyy = mc(y, x, y, y); + auto yxxx = mc(y, x, x, x); auto xyyy = mc(x, y, y, y); - auto yxzz = mc(y, x, z, z); + auto yxyy = mc(y, x, y, y); auto xyzz = mc(x, y, z, z); + auto yxzz = mc(y, x, z, z); + if (is_simple()) { + expr_ = xyxx - yxxx + xyyy - yxyy + xyzz - yxzz; + nflops_ += 5; + } + } break; + // ===== 13: ZX = -(a×b)_z(c×d)_x ===== + case 13: { + auto xyyz = mc(x, y, y, z); + auto xyzy = mc(x, y, z, y); + auto yxyz = mc(y, x, y, z); + auto yxzy = mc(y, x, z, y); + if (is_simple()) { + expr_ = xyzy - xyyz + yxyz - yxzy; + nflops_ += 3; + } + } break; + // ===== 14: ZY = -(a×b)_z(c×d)_y ===== + case 14: { + auto xyzx = mc(x, y, z, x); + auto xyxz = mc(x, y, x, z); + auto yxzx = mc(y, x, z, x); + auto yxxz = mc(y, x, x, z); + if (is_simple()) { + expr_ = xyxz - xyzx + yxzx - yxxz; + nflops_ += 3; + } + } break; + // ===== 15: ZZ = -(a×b)_z(c×d)_z ===== + case 15: { + auto xyxy = mc(x, y, x, y); + auto xyyx = mc(x, y, y, x); + auto yxxy = mc(y, x, x, y); + auto yxyx = mc(y, x, y, x); if (is_simple()) { - expr_ = xyxx - yxxx - xxyx - yyyx - zzyx + xxxy + yyxy + zzxy - yxyy + - xyyy - yxzz + xyzz; - nflops_ += 11; + expr_ = xyyx - xyxy + yxxy - yxyx; + nflops_ += 3; } } break; default: throw std::runtime_error( - "CR_11_σpσpCoulombσpσp_11: invalid quaternionic index"); + "CR_11_σpσpCoulombσpσp_11: invalid component index (expected 0-15)"); } } // CR_11_σpσpCoulombσpσp_11::CR_11_σpσpCoulombσpσp_11 diff --git a/src/bin/libint/oper.h b/src/bin/libint/oper.h index cecbda72b..678180c28 100644 --- a/src/bin/libint/oper.h +++ b/src/bin/libint/oper.h @@ -443,24 +443,27 @@ struct σpσpCoulombσpσp_Descr : public Contractable<σpσpCoulombσpσp_Descr σpσpCoulombσpσp_Descr() : quaternion_index_(0) {} σpσpCoulombσpσp_Descr(int quaternion_index) : quaternion_index_(quaternion_index) { - assert(quaternion_index <= 3); + assert(quaternion_index >= 0 && quaternion_index <= 15); } - static const unsigned int max_key = 4; + /// 16 components from tensor product of two independent spin spaces: + /// index = 4 * bra_spin_index + ket_spin_index + /// where spin indices are: 0=S (scalar), 1=X, 2=Y, 3=Z (cross product) + static const unsigned int max_key = 16; unsigned int key() const { return quaternion_index(); } std::string description() const { - std::string descr("opop_coulomb_opop["); - if (quaternion_index() == 0) - descr += "0"; - else if (quaternion_index() == 1) - descr += "X"; - else if (quaternion_index() == 2) - descr += "Y"; - else if (quaternion_index() == 3) - descr += "Z"; - else - abort(); - return descr + "]"; + // clang-format off + // Option A (tensor product order): index = 4 * bra_spin + ket_spin + static const char* labels[] = { + "SS", "SX", "SY", "SZ", + "XS", "XX", "XY", "XZ", + "YS", "YX", "YY", "YZ", + "ZS", "ZX", "ZY", "ZZ" + }; + // clang-format on + const auto qi = quaternion_index(); + if (qi > 15) abort(); + return std::string("opop_coulomb_opop[") + labels[qi] + "]"; } std::string label() const { return description(); } int psymm(int i, int j) const { abort(); } From 98d7d375a0e9bd040a14236f3e9a79a798bd42e1 Mon Sep 17 00:00:00 2001 From: Kshitij Surjuse Date: Thu, 2 Apr 2026 19:28:27 -0400 Subject: [PATCH 15/22] DerivGaussV2: HRR-like code-sharing for derivative Gaussians Apply the same optimization as HRR: since differentiation of a Gaussian depends only on that shell's quanta (not spectators), generate code once per unique differentiated shell and pass spectator dims at the call site. Verified: 295,944 assertions pass (Coulomb + ERI derivs + RKB integrals). --- src/bin/libint/comp_deriv_gauss_v2.h | 547 +++++++++++++++++++++++++++ src/bin/libint/master_rrs_list.h | 46 +++ src/bin/libint/strategy.cc | 62 +-- 3 files changed, 631 insertions(+), 24 deletions(-) create mode 100644 src/bin/libint/comp_deriv_gauss_v2.h diff --git a/src/bin/libint/comp_deriv_gauss_v2.h b/src/bin/libint/comp_deriv_gauss_v2.h new file mode 100644 index 000000000..970bab0ae --- /dev/null +++ b/src/bin/libint/comp_deriv_gauss_v2.h @@ -0,0 +1,547 @@ +/* + * Copyright (C) 2004-2026 Edward F. Valeev + * + * This file is part of Libint compiler. + * + * Libint compiler is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Libint compiler is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Libint compiler. If not, see . + * + */ + +#ifndef _libint2_src_bin_libint_compderivgaussv2_h_ +#define _libint2_src_bin_libint_compderivgaussv2_h_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +namespace libint2 { + +/** Optimized compute relation for (geometric) derivative Gaussian integrals. + * + * Like CR_DerivGauss, this expands derivative Gaussians via: + * d/dr G(a) = 2*alpha * G(a+1) - a_i * G(a-1) + * + * Unlike CR_DerivGauss, this uses the HRR-like code-sharing optimization: + * since differentiation of a Gaussian at position (part, where) depends only + * on that shell's quanta (not spectator shells), we generate code once per + * unique differentiated shell and pass spectator dimensions at the call site. + * + * @tparam IntType integral type + * @tparam part particle index of the function to be differentiated + * @tparam where position of the function to be differentiated (InBra/InKet) + * @tparam trans_inv_part if non-negative, specifies the particle index for + * translational invariance + * @tparam trans_inv_where position for translational invariance + */ +template +class DerivGaussV2 : public RecurrenceRelation { + private: + static constexpr auto trans_inv_oper = + not IntType::OperType::Properties::odep; + static constexpr auto using_trans_inv = + trans_inv_oper && (part == trans_inv_part) && (where == trans_inv_where); + + public: + typedef RecurrenceRelation ParentType; + typedef typename IntType::BasisFunctionType BasisFunctionType; + typedef DerivGaussV2 ThisType; + typedef IntType TargetType; + typedef IntType ChildType; + typedef RecurrenceRelation::ExprType ExprType; + + static const unsigned int max_nchildren_ = + using_trans_inv ? (IntType::num_bf - 1) : 2u; + + static std::shared_ptr Instance( + const std::shared_ptr& Tint, unsigned int dir = 0); + virtual ~DerivGaussV2() {} + + /// always directional + static bool directional() { return true; } + + unsigned int num_children() const override { return nchildren_; } + std::shared_ptr rr_target() const override { + return std::static_pointer_cast(target_); + } + std::shared_ptr rr_child(unsigned int i) const override { + return children_.at(i); + } + bool is_simple() const override { + return TrivialBFSet::result; + } + + std::string spfunction_call( + const std::shared_ptr& context, + const std::shared_ptr& dims) const override; + + private: + DerivGaussV2(const std::shared_ptr& Tint, unsigned int dir); + + unsigned int dir_; + std::shared_ptr target_; + std::vector> children_; + unsigned int nchildren_; + + std::string generate_label() const override; + std::shared_ptr adapt_dims_( + const std::shared_ptr& dims) const override; + bool register_with_rrstack() const; + bool expl_high_dim() const; + bool expl_low_dim() const; + + /// add child, deduplicating + const std::shared_ptr& add_child( + const std::shared_ptr& child) { + for (auto& c : children_) { + if (c == child) return c; + } + children_.push_back(child); + ++nchildren_; + return children_.back(); + } +}; + +// +// Implementation +// + +template +std::shared_ptr< + DerivGaussV2> +DerivGaussV2::Instance( + const std::shared_ptr& Tint, unsigned int dir) { + std::shared_ptr this_ptr(new ThisType(Tint, dir)); + if (this_ptr->num_children() != 0) { + this_ptr->register_with_rrstack(); + return this_ptr; + } + return std::shared_ptr(); +} + +template +DerivGaussV2:: + DerivGaussV2(const std::shared_ptr& Tint, unsigned int dir) + : dir_(dir), target_(Tint), nchildren_(0) { + using namespace libint2::algebra; + using namespace libint2::prefactor; + using namespace libint2::braket; + typedef BasisFunctionType F; + const F& _1 = unit(is_simple() ? dir : 0); + + const typename IntType::AuxQuantaType& aux = Tint->aux(); + const typename IntType::OperType& oper = Tint->oper(); + + children_.reserve(max_nchildren_); + + // the Gaussian must be differentiated in direction dir + { + if (where == InBra && Tint->bra(part, 0).deriv().d(dir) == 0) return; + if (where == InKet && Tint->ket(part, 0).deriv().d(dir) == 0) return; + } + + // if not using translational invariance, can only expand primitives + if (not using_trans_inv) { + if (where == InBra && Tint->bra(part, 0).contracted()) return; + if (where == InKet && Tint->ket(part, 0).contracted()) return; + } + + typedef typename IntType::BraType IBraType; + typedef typename IntType::KetType IKetType; + IBraType* bra = new IBraType(Tint->bra()); + IKetType* ket = new IKetType(Tint->ket()); + + if (not using_trans_inv) { // differentiate + + if (where == InBra) { + F a(bra->member(part, 0)); + + // add a+1 + F ap1(bra->member(part, 0) + _1); + ap1.deriv().dec(dir); + bra->set_member(ap1, part, 0); + auto int_ap1 = add_child(IntType::Instance(*bra, *ket, aux, oper)); + bra->set_member(a, part, 0); + if (is_simple()) { + std::ostringstream oss; + oss << "two_alpha" << part << "_bra"; + expr_ = Scalar(oss.str()) * int_ap1; + nflops_ += 1; + } + + // See if a-1 exists + F am1(bra->member(part, 0) - _1); + if (exists(am1)) { + am1.deriv().dec(dir); + bra->set_member(am1, part, 0); + auto int_am1 = add_child(IntType::Instance(*bra, *ket, aux, oper)); + bra->set_member(a, part, 0); + if (is_simple()) { + expr_ -= Scalar(a[dir]) * int_am1; + nflops_ += 2; + } + } + delete bra; + delete ket; + return; + } + + if (where == InKet) { + F a(ket->member(part, 0)); + + // add a+1 + F ap1(ket->member(part, 0) + _1); + ap1.deriv().dec(dir); + ket->set_member(ap1, part, 0); + auto int_ap1 = add_child(IntType::Instance(*bra, *ket, aux, oper)); + ket->set_member(a, part, 0); + if (is_simple()) { + std::ostringstream oss; + oss << "two_alpha" << part << "_ket"; + expr_ = Scalar(oss.str()) * int_ap1; + nflops_ += 1; + } + + // See if a-1 exists + F am1(ket->member(part, 0) - _1); + if (exists(am1)) { + am1.deriv().dec(dir); + ket->set_member(am1, part, 0); + auto int_am1 = add_child(IntType::Instance(*bra, *ket, aux, oper)); + ket->set_member(a, part, 0); + if (is_simple()) { + expr_ -= Scalar(a[dir]) * int_am1; + nflops_ += 2; + } + } + delete bra; + delete ket; + return; + } + + } else { // use translational invariance + + // remove one deriv quantum from the target function + if (where == InBra) bra->member(part, 0).deriv().dec(dir); + if (where == InKet) ket->member(part, 0).deriv().dec(dir); + + int term_count = 0; + for (int p = 0; p != IntType::num_particles; ++p) { + typedef BasisFunctionType F; + if (p != trans_inv_part || trans_inv_where != InBra) { + F a(bra->member(p, 0)); + if (not a.is_unit()) { + F da(a); + da.deriv().inc(dir); + bra->set_member(da, p, 0); + auto int_da = add_child(IntType::Instance(*bra, *ket, aux, oper)); + bra->set_member(a, p, 0); + if (is_simple()) { + if (term_count == 0) + expr_ = Scalar(-1) * int_da; + else + expr_ -= int_da; + ++term_count; + nflops_ += 1; + } + } + } + if (p != trans_inv_part || trans_inv_where != InKet) { + F a(ket->member(p, 0)); + if (not a.is_unit()) { + F da(a); + da.deriv().inc(dir); + ket->set_member(da, p, 0); + auto int_da = add_child(IntType::Instance(*bra, *ket, aux, oper)); + ket->set_member(a, p, 0); + if (is_simple()) { + if (term_count == 0) + expr_ = Scalar(-1) * int_da; + else + expr_ -= int_da; + ++term_count; + nflops_ += 1; + } + } + } + } + } + + delete bra; + delete ket; +} + +template +bool DerivGaussV2::register_with_rrstack() const { + using std::swap; + + // only register RRs for shell sets (not individual integrals) + if (TrivialBFSet::result) return false; + + // translational invariance path not optimized yet — register as-is + if (using_trans_inv) { + std::shared_ptr rrstack = RRStack::Instance(); + std::shared_ptr this_ptr = + std::const_pointer_cast( + std::static_pointer_cast( + std::enable_shared_from_this::shared_from_this())); + rrstack->find(this_ptr); + return true; + } + + typedef typename IntType::BraType IBraType; + typedef typename IntType::KetType IKetType; + const IBraType& bra = target_->bra(); + const IKetType& ket = target_->ket(); + + // check if all spectator shells already have zero quanta + bool nonzero_quanta = false; + unsigned const int npart = IntType::OperatorType::Properties::np; + for (unsigned int p = 0; p < npart; p++) { + int nfbra = bra.num_members(p); + for (int f = 0; f < nfbra; f++) { + // skip the differentiated position + if (static_cast(p) == part && where == InBra) continue; + if (!bra.member(p, f).zero() || !bra.member(p, f).deriv().zero()) + nonzero_quanta = true; + } + int nfket = ket.num_members(p); + for (int f = 0; f < nfket; f++) { + if (static_cast(p) == part && where == InKet) continue; + if (!ket.member(p, f).zero() || !ket.member(p, f).deriv().zero()) + nonzero_quanta = true; + } + } + + // if all spectators are zero, register this instance directly + if (!nonzero_quanta) { + std::shared_ptr rrstack = RRStack::Instance(); + std::shared_ptr this_ptr = + std::const_pointer_cast( + std::static_pointer_cast( + std::enable_shared_from_this::shared_from_this())); + rrstack->find(this_ptr); + return true; + } + + // Otherwise, zero out all spectator shells and register a dummy + IBraType bra_zero(bra); + IKetType ket_zero(ket); + for (unsigned int p = 0; p < npart; p++) { + int nfbra = bra_zero.num_members(p); + for (int f = 0; f < nfbra; f++) { + if (static_cast(p) == part && where == InBra) continue; + typedef typename IBraType::bfs_type bfs_type; + typedef typename IBraType::bfs_ref bfs_ref; + bfs_ref bfs = bra_zero.member(p, f); + if (!bfs.zero() || !bfs.deriv().zero()) { + bfs_type null_bfs; + swap(bfs, null_bfs); + } + } + int nfket = ket_zero.num_members(p); + for (int f = 0; f < nfket; f++) { + if (static_cast(p) == part && where == InKet) continue; + typedef typename IKetType::bfs_type bfs_type; + typedef typename IKetType::bfs_ref bfs_ref; + bfs_ref bfs = ket_zero.member(p, f); + if (!bfs.zero() || !bfs.deriv().zero()) { + bfs_type null_bfs; + swap(bfs, null_bfs); + } + } + } + + // create a generic integral with a dummy operator + typedef GenOper> + DummyOper; + typedef EmptySet DummyQuanta; + typedef GenIntegralSet + DummyIntegral; + DummyOper dummy_oper; + DummyQuanta dummy_quanta(std::vector(0, 0)); + std::shared_ptr dummy_integral = + DummyIntegral::Instance(bra_zero, ket_zero, dummy_quanta, dummy_oper); + + // construct a DerivGaussV2 over the dummy integral and register it + typedef DerivGaussV2 DummyDerivGaussV2; + std::shared_ptr dummy_rr = + DummyDerivGaussV2::Instance(dummy_integral, dir_); + std::shared_ptr rrstack = RRStack::Instance(); + rrstack->find(dummy_rr); + return true; +} + +template +std::string DerivGaussV2::generate_label() const { + std::ostringstream os; + + // For translational invariance, children depend on ALL shells, so + // the label must include full integral info (no code sharing). + // For direct differentiation, only the differentiated shell matters. + if constexpr (using_trans_inv) { + typedef typename TargetType::AuxIndexType mType; + static std::shared_ptr aux0(new mType(0u)); + os << "CR_DerivGauss" + << "P" << part << to_string(where) + << genintegralset_label(target_->bra(), target_->ket(), aux0, + target_->oper()); + return os.str(); + } + + os << "DerivGaussV2 P" << part << " " << to_string(where) << " "; + + // Only encode the differentiated shell — not spectators + if (where == InBra) { + BasisFunctionType sh(target_->bra(part, 0)); + sh.uncontract(); + os << sh.label(); + } else { + BasisFunctionType sh(target_->ket(part, 0)); + sh.uncontract(); + os << sh.label(); + } + + return os.str(); +} + +template +std::string +DerivGaussV2:: + spfunction_call(const std::shared_ptr& context, + const std::shared_ptr& dims) const { + std::ostringstream os; + os << context->label_to_function_name(label()) << "(inteval, " + << context->value_to_pointer(rr_target()->symbol()); + + const unsigned int nc = num_children(); + for (unsigned int c = 0; c < nc; c++) { + os << ", " << context->value_to_pointer(rr_child(c)->symbol()); + } + + // compute hsr and lsr — dimensions of spectator shells + // canonical order: for each particle p, bra then ket + // hsr = product of dims before (part, where) + // lsr = product of dims after (part, where) + unsigned int hsr = 1; + unsigned int lsr = 1; + const unsigned int np = IntType::OperType::Properties::np; + for (int p = 0; p < static_cast(np); p++) { + unsigned int nbra = target_->bra().num_members(p); + assert(nbra == 1); + for (unsigned int i = 0; i < nbra; i++) { + SubIterator* iter = target_->bra().member_subiter(p, i); + if (p < part || (p == part && where == InKet)) hsr *= iter->num_iter(); + // skip p == part && where == InBra (the differentiated shell) + if (p > part) lsr *= iter->num_iter(); + delete iter; + } + unsigned int nket = target_->ket().num_members(p); + assert(nket == 1); + for (unsigned int i = 0; i < nket; i++) { + SubIterator* iter = target_->ket().member_subiter(p, i); + if (p < part) hsr *= iter->num_iter(); + // skip p == part && where == InKet (the differentiated shell) + if (p > part || (p == part && where == InBra)) lsr *= iter->num_iter(); + delete iter; + } + } + + // Use TaskParameters to keep track of maximum ranks + LibraryTaskManager& taskmgr = LibraryTaskManager::Instance(); + taskmgr.current().params()->max_hrr_hsrank(hsr); + + if (expl_high_dim()) os << "," << hsr; + if (expl_low_dim()) os << "," << lsr; + os << ")" << context->end_of_stat() << std::endl; + return os.str(); +} + +template +bool DerivGaussV2::expl_high_dim() const { + // translational invariance: no code sharing, no explicit dims + if (using_trans_inv) return false; + // need explicit high dim unless this is the first position + if (part == 0 && where == InBra) return false; + return true; +} + +template +bool DerivGaussV2::expl_low_dim() const { + // translational invariance: no code sharing, no explicit dims + if (using_trans_inv) return false; + unsigned int np = IntType::OperType::Properties::np; + // need explicit low dim unless this is the last position + if (static_cast(np) - 1 == part && where == InKet) return false; + // corner case: 1-particle operator + if (np == 1) return true; + return true; +} + +template +std::shared_ptr +DerivGaussV2:: + adapt_dims_(const std::shared_ptr& dims) const { + bool high_rank = expl_high_dim(); + bool low_rank = expl_low_dim(); + + std::shared_ptr high_dim, low_dim; + if (high_rank) { + high_dim = + std::shared_ptr(new RTimeEntity("highdim")); + } else { + high_dim = dims->high(); + } + if (low_rank) { + low_dim = + std::shared_ptr(new RTimeEntity("lowdim")); + } else { + low_dim = dims->low(); + } + + std::shared_ptr localdims( + new ImplicitDimensions(high_dim, low_dim, dims->vecdim())); + return localdims; +} + +}; // namespace libint2 + +#endif diff --git a/src/bin/libint/master_rrs_list.h b/src/bin/libint/master_rrs_list.h index d55cfa301..5517d7c5c 100644 --- a/src/bin/libint/master_rrs_list.h +++ b/src/bin/libint/master_rrs_list.h @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -268,6 +269,51 @@ typedef CR_DerivGauss Deriv_d_11_TwoPRep_11_int; +// DerivGaussV2 for TwoPRep (shell sets) +typedef DerivGaussV2 + DerivV2_a_11_TwoPRep_11_sh; +typedef DerivGaussV2 + DerivV2_b_11_TwoPRep_11_sh; +typedef DerivGaussV2 + DerivV2_c_11_TwoPRep_11_sh; +typedef DerivGaussV2 + DerivV2_d_11_TwoPRep_11_sh; +// DerivGaussV2 for TwoPRep (individual integrals) +typedef DerivGaussV2 + DerivV2_a_11_TwoPRep_11_int; +typedef DerivGaussV2 + DerivV2_b_11_TwoPRep_11_int; +typedef DerivGaussV2 + DerivV2_c_11_TwoPRep_11_int; +typedef DerivGaussV2 + DerivV2_d_11_TwoPRep_11_int; + +// DerivGaussV2 for DummySymmIntegral (used by register_with_rrstack) +typedef DerivGaussV2 + DerivV2_a_11_Dummy_11_sh; +typedef DerivGaussV2 + DerivV2_b_11_Dummy_11_sh; +typedef DerivGaussV2 + DerivV2_c_11_Dummy_11_sh; +typedef DerivGaussV2 + DerivV2_d_11_Dummy_11_sh; +typedef DerivGaussV2 + DerivV2_a_11_Dummy_11_int; +typedef DerivGaussV2 + DerivV2_b_11_Dummy_11_int; +typedef DerivGaussV2 + DerivV2_c_11_Dummy_11_int; +typedef DerivGaussV2 + DerivV2_d_11_Dummy_11_int; + typedef CR_11_Coulombσpσp_11 CR_11_Coulombσpσp_11_sh; typedef CR_11_Coulombσpσp_11 CR_11_Coulombσpσp_11_int; diff --git a/src/bin/libint/strategy.cc b/src/bin/libint/strategy.cc index 58fb8d2bd..4e4804da7 100644 --- a/src/bin/libint/strategy.cc +++ b/src/bin/libint/strategy.cc @@ -70,47 +70,51 @@ struct MasterStrategy; #if LIBINT_SHELLQUARTET_STRATEGY == LIBINT_SHELLQUARTET_STRATEGY_A0C0 template <> struct MasterStrategy { - typedef boost::mpl::list + VRR_a_11_TwoPRep_11_sh, VRR_c_11_TwoPRep_11_sh> value; }; template <> struct MasterStrategy { - typedef boost::mpl::list + VRR_a_11_TwoPRep_11_int, VRR_c_11_TwoPRep_11_int> value; }; #else // 0B0D strategy template <> struct MasterStrategy { - typedef boost::mpl::list + VRR_b_11_TwoPRep_11_sh, VRR_d_11_TwoPRep_11_sh> value; }; template <> struct MasterStrategy { - typedef boost::mpl::list + VRR_b_11_TwoPRep_11_int, VRR_d_11_TwoPRep_11_int> value; }; #endif @@ -214,21 +218,31 @@ struct MasterStrategy { #if LIBINT_SHELLQUARTET_STRATEGY == LIBINT_SHELLQUARTET_STRATEGY_A0C0 template <> struct MasterStrategy { - typedef boost::mpl::list value; + typedef boost::mpl::list + value; }; template <> struct MasterStrategy { - typedef boost::mpl::list + typedef boost::mpl::list value; }; #else // 0B0D strategy template <> struct MasterStrategy { - typedef boost::mpl::list value; + typedef boost::mpl::list + value; }; template <> struct MasterStrategy { - typedef boost::mpl::list + typedef boost::mpl::list value; }; #endif From 9b50b0b46010916acb168d8f6bf77b1d3b7fa070 Mon Sep 17 00:00:00 2001 From: Kshitij Surjuse Date: Fri, 10 Apr 2026 14:02:01 -0400 Subject: [PATCH 16/22] Fix DerivGaussV2 target indexing for size-1 integral sets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit handle_trivial_nodes() used default_dims() (hardcoded "1") before adapt_dims_() provided correct runtime dims ("lowdim"/"highdim"). Pass localdims through optimize_rr_out → handle_trivial_nodes. --- src/bin/libint/dg.cc | 10 +++++----- src/bin/libint/dg.h | 9 +++++++-- src/bin/libint/rr.cc | 6 ++++-- 3 files changed, 16 insertions(+), 9 deletions(-) diff --git a/src/bin/libint/dg.cc b/src/bin/libint/dg.cc index eded6a54f..9e03277ae 100644 --- a/src/bin/libint/dg.cc +++ b/src/bin/libint/dg.cc @@ -499,10 +499,11 @@ void DirectedGraph::apply_to(const std::shared_ptr& vertex, // Optimize out simple recurrence relations void DirectedGraph::optimize_rr_out( - const std::shared_ptr& context) { + const std::shared_ptr& context, + const std::shared_ptr& dims) { replace_rr_with_expr(); remove_trivial_arithmetics(); - handle_trivial_nodes(context); + handle_trivial_nodes(context, dims); remove_disconnected_vertices(); find_subtrees(); } @@ -797,7 +798,8 @@ inline std::string to_vector_symbol(const std::shared_ptr& v) { // refer to another node so that no code is generated for it. // void DirectedGraph::handle_trivial_nodes( - const std::shared_ptr& context) { + const std::shared_ptr& context, + const std::shared_ptr& dims) { typedef vertices::iterator iter; for (iter v = stack_.begin(); v != stack_.end(); ++v) { const ver_ptr& vptr = vertex_ptr(*v); @@ -821,8 +823,6 @@ void DirectedGraph::handle_trivial_nodes( // if (child->symbol_set() == false) { const std::string stack_name("stack"); - const std::shared_ptr& dims = - ImplicitDimensions::default_dims(); std::string low_rank = dims->low_label(); std::string veclen = dims->vecdim_label(); diff --git a/src/bin/libint/dg.h b/src/bin/libint/dg.h index 0f85abc8f..19e96074c 100644 --- a/src/bin/libint/dg.h +++ b/src/bin/libint/dg.h @@ -22,6 +22,7 @@ #define _libint2_src_bin_libint_dg_h_ #include +#include #include #include #include @@ -253,7 +254,9 @@ class DirectedGraph : public std::enable_shared_from_this { optimized away. optimize_rr_out() will replace all simple recurrence relations with code representing them. */ - void optimize_rr_out(const std::shared_ptr& context); + void optimize_rr_out(const std::shared_ptr& context, + const std::shared_ptr& dims = + ImplicitDimensions::default_dims()); /** after all apply's have been called, traverse() construct a heuristic order of traversal for the graph. @@ -438,7 +441,9 @@ class DirectedGraph : public std::enable_shared_from_this { to their equivalents (such as (ss|ss) shell quartet can only be connected to (ss|ss) integral) */ - void handle_trivial_nodes(const std::shared_ptr& context); + void handle_trivial_nodes(const std::shared_ptr& context, + const std::shared_ptr& dims = + ImplicitDimensions::default_dims()); /// This functions removes vertices not connected to other vertices void remove_disconnected_vertices(); /** Finds (binary) subtrees. The subtrees correspond to a single-line code (no diff --git a/src/bin/libint/rr.cc b/src/bin/libint/rr.cc index 32bbb9df9..a653bf4b2 100644 --- a/src/bin/libint/rr.cc +++ b/src/bin/libint/rr.cc @@ -127,8 +127,11 @@ void RecurrenceRelation::generate_code( // Assign symbols for the target and source integral sets std::shared_ptr symbols(new CodeSymbols); assign_symbols_(symbols); + // Compute local dimensions before optimize_rr_out so that + // handle_trivial_nodes uses the correct dims (e.g., "lowdim" instead of "1") + std::shared_ptr localdims = adapt_dims_(dims); // Traverse the graph - dg->optimize_rr_out(context); + dg->optimize_rr_out(context, localdims); dg->traverse(); #if PRINT_DAG_GRAPHVIZ { @@ -138,7 +141,6 @@ void RecurrenceRelation::generate_code( #endif // Generate code std::shared_ptr memman(new WorstFitMemoryManager()); - std::shared_ptr localdims = adapt_dims_(dims); dg->generate_code(context, memman, localdims, symbols, funcname, decl, def); // extract all external symbols -- these will be members of the evaluator From 1db978df470b632c98039c19484d2b77249ac696 Mon Sep 17 00:00:00 2001 From: Kshitij Surjuse Date: Fri, 10 Apr 2026 14:49:46 -0400 Subject: [PATCH 17/22] =?UTF-8?q?RKB=20CR=20code=20sharing:=20deduplicate?= =?UTF-8?q?=20generated=20Coulomb=CF=83p=CF=83p/=CF=83p=CF=83pCoulomb?= =?UTF-8?q?=CF=83p=CF=83p=20files?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add code-sharing overrides (generate_label, spfunction_call, adapt_dims_, generate_code) to CR_11_Coulombσpσp_11 and CR_11_σpσpCoulombσpσp_11. Shell quartets with the same quaternion component share a single function parameterized by highdim. Hand-emits element-wise loops to avoid TwoPRep particle-swap child deduplication issues in DAG-based codegen. --- .../comp_11_Coulomb\317\203p\317\203p_11.h" | 103 ++++++++++++++- ...3p\317\203pCoulomb\317\203p\317\203p_11.h" | 120 +++++++++++++++++- 2 files changed, 215 insertions(+), 8 deletions(-) diff --git "a/src/bin/libint/comp_11_Coulomb\317\203p\317\203p_11.h" "b/src/bin/libint/comp_11_Coulomb\317\203p\317\203p_11.h" index 315135994..db28cb7ea 100644 --- "a/src/bin/libint/comp_11_Coulomb\317\203p\317\203p_11.h" +++ "b/src/bin/libint/comp_11_Coulomb\317\203p\317\203p_11.h" @@ -21,8 +21,11 @@ #ifndef LIBINT_COMP_11_COULOMBΣPΣP_11_H #define LIBINT_COMP_11_COULOMBΣPΣP_11_H +#include +#include #include #include +#include #include namespace libint2 { @@ -60,16 +63,108 @@ class CR_11_Coulombσpσp_11 using ParentType::RecurrenceRelation::expr_; using ParentType::RecurrenceRelation::nflops_; - /// Constructor is private, used by ParentType::Instance that maintains + /// Constructor is private, used by Instance that maintains /// registry of these objects - CR_11_Coulombσpσp_11(const std::shared_ptr &, unsigned int = 0); + CR_11_Coulombσpσp_11(const std::shared_ptr&, unsigned int = 0); static std::string descr() { return "CR"; } + + // --- Code sharing overrides --- + // All shell combos with the same quaternion component share one function. + + std::string generate_label() const override { + return "CR_Coulombopop_" + + std::to_string(target_->oper()->descr().quaternion_index()); + } + + std::string spfunction_call( + const std::shared_ptr& context, + const std::shared_ptr& dims) const override { + std::ostringstream os; + os << context->label_to_function_name(this->label()) << "(inteval, " + << context->value_to_pointer(this->rr_target()->symbol()); + const unsigned int nc = this->num_children(); + for (unsigned int c = 0; c < nc; c++) { + os << ", " << context->value_to_pointer(this->rr_child(c)->symbol()); + } + // total_dim = product of all shell dims (all 4 shells are spectators) + unsigned int total_dim = 1; + for (unsigned int p = 0; p < 2; p++) { + SubIterator* si = target_->bra().member_subiter(p, 0); + total_dim *= si->num_iter(); + delete si; + si = target_->ket().member_subiter(p, 0); + total_dim *= si->num_iter(); + delete si; + } + os << "," << total_dim; + LibraryTaskManager& taskmgr = LibraryTaskManager::Instance(); + taskmgr.current().params()->max_hrr_hsrank(total_dim); + os << ")" << context->end_of_stat() << std::endl; + return os.str(); + } + + std::shared_ptr adapt_dims_( + const std::shared_ptr& dims) const override { + auto high_dim = std::make_shared>("highdim"); + return std::make_shared(high_dim, dims->low(), + dims->vecdim()); + } + + /// Hand-emit a simple element-wise loop function. + /// comp 0: target = src0 + src1 + src2 (dot product) + /// comp 1-3: target = src0 - src1 (cross product components) + void generate_code(const std::shared_ptr& context, + const std::shared_ptr& dims, + const std::string& funcname, std::ostream& decl, + std::ostream& def) override { + // declare_function lives in dg.cc + extern std::string declare_function( + const std::shared_ptr& context, + const std::shared_ptr& dims, + const std::shared_ptr& args, const std::string& tlabel, + const std::string& function_descr, std::ostream& decl); + + std::shared_ptr localdims = adapt_dims_(dims); + // inline assign_symbols_: set symbol names on target/children and + // populate CodeSymbols + std::shared_ptr symbols(new CodeSymbols); + this->rr_target()->set_symbol("target"); + symbols->append_symbol("target"); + for (unsigned int c = 0; c < this->num_children(); c++) { + std::string symb = "src" + std::to_string(c); + this->rr_child(c)->set_symbol(symb); + symbols->append_symbol(symb); + } + LibraryTaskManager& taskmgr = LibraryTaskManager::Instance(); + const std::string tlabel = taskmgr.current().label(); + const std::string func_decl = + declare_function(context, localdims, symbols, tlabel, funcname, decl); + def << context->std_header(); + def << "#include <" << context->label_to_name(funcname) << ".h>\n\n"; + def << context->code_prefix(); + def << func_decl << context->open_block() << std::endl; + def << context->std_function_header(); + const unsigned int nc = this->num_children(); + def << "#ifdef __INTEL_COMPILER\n#pragma ivdep\n#endif\n"; + def << "for(int hsi = 0; hsi 1) ? nc - 1 : 0; + def << "/** Number of flops = " << nflops << " */\n"; + def << context->close_block() << std::endl; + def << context->code_postfix(); + } }; template CR_11_Coulombσpσp_11::CR_11_Coulombσpσp_11( - const std::shared_ptr &Tint, unsigned int) + const std::shared_ptr& Tint, unsigned int) : ParentType(Tint, 0) { assert(Tint->num_func_bra(/* particle */ 0) == 1); assert(Tint->num_func_bra(/* particle */ 1) == 1); @@ -81,7 +176,7 @@ CR_11_Coulombσpσp_11::CR_11_Coulombσpσp_11( F c(Tint->bra(1, 0)); F d(Tint->ket(1, 0)); - const auto &oper = Tint->oper(); + const auto& oper = Tint->oper(); if (a.contracted() || b.contracted() || c.contracted() || d.contracted()) return; diff --git "a/src/bin/libint/comp_11_\317\203p\317\203pCoulomb\317\203p\317\203p_11.h" "b/src/bin/libint/comp_11_\317\203p\317\203pCoulomb\317\203p\317\203p_11.h" index 606418d0b..64d1fdee6 100644 --- "a/src/bin/libint/comp_11_\317\203p\317\203pCoulomb\317\203p\317\203p_11.h" +++ "b/src/bin/libint/comp_11_\317\203p\317\203pCoulomb\317\203p\317\203p_11.h" @@ -21,8 +21,11 @@ #ifndef LIBINT_COMP_11_ΣPΣPCOULOMBΣPΣP_11_H #define LIBINT_COMP_11_ΣPΣPCOULOMBΣPΣP_11_H +#include +#include #include #include +#include #include namespace libint2 { @@ -88,17 +91,126 @@ class CR_11_σpσpCoulombσpσp_11 using ParentType::RecurrenceRelation::expr_; using ParentType::RecurrenceRelation::nflops_; - /// Constructor is private, used by ParentType::Instance that maintains + /// Constructor is private, used by Instance that maintains /// registry of these objects - CR_11_σpσpCoulombσpσp_11(const std::shared_ptr &, + CR_11_σpσpCoulombσpσp_11(const std::shared_ptr&, unsigned int = 0); static std::string descr() { return "CR"; } + + // --- Code sharing overrides --- + // All shell combos with the same quaternion component share one function. + + std::string generate_label() const override { + return "CR_opopCoulombopop_" + + std::to_string(target_->oper()->descr().quaternion_index()); + } + + std::string spfunction_call( + const std::shared_ptr& context, + const std::shared_ptr& dims) const override { + std::ostringstream os; + os << context->label_to_function_name(this->label()) << "(inteval, " + << context->value_to_pointer(this->rr_target()->symbol()); + const unsigned int nc = this->num_children(); + for (unsigned int c = 0; c < nc; c++) { + os << ", " << context->value_to_pointer(this->rr_child(c)->symbol()); + } + // total_dim = product of all shell dims (all 4 shells are spectators) + unsigned int total_dim = 1; + for (unsigned int p = 0; p < 2; p++) { + SubIterator* si = target_->bra().member_subiter(p, 0); + total_dim *= si->num_iter(); + delete si; + si = target_->ket().member_subiter(p, 0); + total_dim *= si->num_iter(); + delete si; + } + os << "," << total_dim; + LibraryTaskManager& taskmgr = LibraryTaskManager::Instance(); + taskmgr.current().params()->max_hrr_hsrank(total_dim); + os << ")" << context->end_of_stat() << std::endl; + return os.str(); + } + + std::shared_ptr adapt_dims_( + const std::shared_ptr& dims) const override { + auto high_dim = std::make_shared>("highdim"); + return std::make_shared(high_dim, dims->low(), + dims->vecdim()); + } + + /// Hand-emit a simple element-wise loop function. + /// Cannot use S-shell dummy because TwoPRep particle-swap canonicalization + /// deduplicates children (e.g., (S_x S_x|S_y S_y) = (S_y S_y|S_x S_x)), + /// giving fewer children than the real instance. + void generate_code(const std::shared_ptr& context, + const std::shared_ptr& dims, + const std::string& funcname, std::ostream& decl, + std::ostream& def) override { + // declare_function lives in dg.cc + extern std::string declare_function( + const std::shared_ptr& context, + const std::shared_ptr& dims, + const std::shared_ptr& args, const std::string& tlabel, + const std::string& function_descr, std::ostream& decl); + + std::shared_ptr localdims = adapt_dims_(dims); + // inline assign_symbols_: set symbol names on target/children and + // populate CodeSymbols + std::shared_ptr symbols(new CodeSymbols); + this->rr_target()->set_symbol("target"); + symbols->append_symbol("target"); + for (unsigned int c = 0; c < this->num_children(); c++) { + std::string symb = "src" + std::to_string(c); + this->rr_child(c)->set_symbol(symb); + symbols->append_symbol(symb); + } + LibraryTaskManager& taskmgr = LibraryTaskManager::Instance(); + const std::string tlabel = taskmgr.current().label(); + const std::string func_decl = + declare_function(context, localdims, symbols, tlabel, funcname, decl); + def << context->std_header(); + def << "#include <" << context->label_to_name(funcname) << ".h>\n\n"; + def << context->code_prefix(); + def << func_decl << context->open_block() << std::endl; + def << context->std_function_header(); + // Sign patterns for each component, indexed by child order in constructor. + // comp 0 (SS): 9 children, all +1 + // comp 1-3 (SX,SY,SZ): 6 children, alternating +1,-1 + // comp 4,8,12 (XS,YS,ZS): 6 children, alternating +1,-1 + // comp 5-7,9-11,13-15 (XX..ZZ): 4 children, pattern -1,+1,+1,-1 + const unsigned int nc = this->num_children(); + def << "#ifdef __INTEL_COMPILER\n#pragma ivdep\n#endif\n"; + def << "for(int hsi = 0; hsi 1) ? nc - 1 : 0; + def << "/** Number of flops = " << nflops << " */\n"; + def << context->close_block() << std::endl; + def << context->code_postfix(); + } }; template CR_11_σpσpCoulombσpσp_11::CR_11_σpσpCoulombσpσp_11( - const std::shared_ptr &Tint, unsigned int) + const std::shared_ptr& Tint, unsigned int) : ParentType(Tint, 0) { assert(Tint->num_func_bra(/* particle */ 0) == 1); assert(Tint->num_func_bra(/* particle */ 1) == 1); @@ -110,7 +222,7 @@ CR_11_σpσpCoulombσpσp_11::CR_11_σpσpCoulombσpσp_11( F c(Tint->bra(1, 0)); F d(Tint->ket(1, 0)); - const auto &oper = Tint->oper(); + const auto& oper = Tint->oper(); if (a.contracted() || b.contracted() || c.contracted() || d.contracted()) return; From 5104e816d9ac650f83f5031dba9ac3a41bf63323 Mon Sep 17 00:00:00 2001 From: Kshitij Surjuse Date: Fri, 17 Apr 2026 16:20:02 -0400 Subject: [PATCH 18/22] Add op_coulomb_op: 9-component Gaunt LS bilinear integral MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit (μ ∂_a ν | 1/r12 | κ ∂_b λ) for a, b ∈ {x,y,z}. Needed for Gaunt LS Fock where coulomb_opop's 4 folded outputs are rank-deficient. Only 2-fold bra↔ket symmetry (p1_p2_swappable, no within-side swap); uses dedicated predicate la+lb <= lc+ld. Also: LIBINT2_SIMPLE_CORE_EVAL_CASE macro to compactify Coulomb-family dispatch in engine.impl.h. --- export/tests/unit/test-2body.cc | 103 +++++++++++ include/libint2/engine.h | 16 ++ include/libint2/engine.impl.h | 119 +++++++------ src/bin/libint/build_libint.cc | 51 ++++-- src/bin/libint/comp_11_opCoulombop_11.h | 216 ++++++++++++++++++++++++ src/bin/libint/master_ints_list.h | 10 +- src/bin/libint/master_rrs_list.h | 4 + src/bin/libint/oper.h | 49 ++++++ src/bin/libint/strategy.cc | 8 + 9 files changed, 516 insertions(+), 60 deletions(-) create mode 100644 src/bin/libint/comp_11_opCoulombop_11.h diff --git a/export/tests/unit/test-2body.cc b/export/tests/unit/test-2body.cc index 708190b90..cee9b0800 100644 --- a/export/tests/unit/test-2body.cc +++ b/export/tests/unit/test-2body.cc @@ -644,6 +644,109 @@ TEST_CASE("RKB Coulomb integrals", "[engine][2-body]") { } } } + + SECTION("op_coulomb_op") { + Engine engine_opCop; + try { + engine_opCop = Engine(Operator::op_coulomb_op, max_nprim, max_l, 0); + } catch (Engine::lmax_exceeded &) { + return; + } + + const auto nshell = obs.size(); + for (int s0 = 0; s0 != nshell; ++s0) { + for (int s1 = 0; s1 != nshell; ++s1) { + for (int s2 = 0; s2 != nshell; ++s2) { + for (int s3 = 0; s3 != nshell; ++s3) { + const auto &results = + engine_opCop.compute(obs[s0], obs[s1], obs[s2], obs[s3]); + assert(results.size() == 9); + + LIBINT2_REF_REALTYPE Aref[3], Bref[3], Cref[3], Dref[3]; + for (int i = 0; i < 3; ++i) Aref[i] = obs[s0].O[i]; + for (int i = 0; i < 3; ++i) Bref[i] = obs[s1].O[i]; + for (int i = 0; i < 3; ++i) Cref[i] = obs[s2].O[i]; + for (int i = 0; i < 3; ++i) Dref[i] = obs[s3].O[i]; + + int ijkl = 0; + + int l0, m0, n0; + FOR_CART(l0, m0, n0, obs[s0].contr[0].l) + int l1, m1, n1; + FOR_CART(l1, m1, n1, obs[s1].contr[0].l) + int l2, m2, n2; + FOR_CART(l2, m2, n2, obs[s2].contr[0].l) + int l3, m3, n3; + FOR_CART(l3, m3, n3, obs[s3].contr[0].l) + + std::array ref_op_coulomb_op{}; + ref_op_coulomb_op.fill(0.0); + + for (uint p0 = 0; p0 < obs[s0].nprim(); p0++) { + for (uint p1 = 0; p1 < obs[s1].nprim(); p1++) { + for (uint p2 = 0; p2 < obs[s2].nprim(); p2++) { + for (uint p3 = 0; p3 < obs[s3].nprim(); p3++) { + const LIBINT2_REF_REALTYPE alpha0 = obs[s0].alpha[p0]; + const LIBINT2_REF_REALTYPE alpha1 = obs[s1].alpha[p1]; + const LIBINT2_REF_REALTYPE alpha2 = obs[s2].alpha[p2]; + const LIBINT2_REF_REALTYPE alpha3 = obs[s3].alpha[p3]; + const LIBINT2_REF_REALTYPE c0 = obs[s0].contr[0].coeff[p0]; + const LIBINT2_REF_REALTYPE c1 = obs[s1].contr[0].coeff[p1]; + const LIBINT2_REF_REALTYPE c2 = obs[s2].contr[0].coeff[p2]; + const LIBINT2_REF_REALTYPE c3 = obs[s3].contr[0].coeff[p3]; + const LIBINT2_REF_REALTYPE c0123 = c0 * c1 * c2 * c3; + + // Deriv on ν (center B, index 1) and on λ (center D, idx + // 3). + auto didx_bd = [](int a, int b) -> der_idx { + der_idx r = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + r[3 + a] = 1; + r[9 + b] = 1; + return r; + }; + auto D = [&](int a, int b) { + auto di = didx_bd(a, b); + return eri(di.data(), l0, m0, n0, alpha0, Aref, l1, m1, + n1, alpha1, Bref, l2, m2, n2, alpha2, Cref, l3, + m3, n3, alpha3, Dref, 0); + }; + for (int a = 0; a < 3; ++a) + for (int b = 0; b < 3; ++b) + ref_op_coulomb_op[3 * a + b] += c0123 * D(a, b); + } + } + } + } + + const double ABSOLUTE_DEVIATION_THRESHOLD = 5.0E-14; + const double RELATIVE_DEVIATION_THRESHOLD = 1.0E-9; + for (auto comp = 0; comp < 9; ++comp) { + auto abs_err = abs(ref_op_coulomb_op[comp] - results[comp][ijkl]); + auto rel_abs_err = abs(abs_err / ref_op_coulomb_op[comp]); + bool not_ok = rel_abs_err > RELATIVE_DEVIATION_THRESHOLD && + abs_err > ABSOLUTE_DEVIATION_THRESHOLD; + if (not_ok) { + std::cout << "(l0 l1| l2 l3) = (" << s0 << " " << s1 << " | " + << s2 << " " << s3 << ") Elem " << ijkl + << " comp= " << comp + << " : ref = " << ref_op_coulomb_op[comp] + << " libint = " << results[comp][ijkl] + << " relabs_error = " << rel_abs_err + << " abs_error = " << abs_err << std::endl; + } + REQUIRE(!not_ok); + } + + ++ijkl; + END_FOR_CART + END_FOR_CART + END_FOR_CART + END_FOR_CART + } + } + } + } + } } TEST_CASE("Erfx_Coulomb integrals", "[engine][2-body]") { diff --git a/include/libint2/engine.h b/include/libint2/engine.h index fae7d1785..f99c6a898 100644 --- a/include/libint2/engine.h +++ b/include/libint2/engine.h @@ -160,6 +160,13 @@ enum class Operator { /// where b1 & b2 are centers of bra1 and bra2 and k1 & k2 are centers of /// ket1 and ket2, respectively opop_coulomb_opop, + /// (2-body) \f$ (σ.p_{b2}) r_{12}^{-1} (σ.p_{k2}) \f$ where b2 is the center + /// of bra2 and k2 is the center of ket2; Gaunt LS "bilinear" integral. + /// Produces 9 components (outer product of two Cartesian directions), + /// indexed as `3*a + b` with `a` = bra-side direction, `b` = ket-side + /// direction, and `a,b ∈ {x=0, y=1, z=2}`. Unlike coulomb_opop, the 9 + /// components are NOT contracted via σ·σ — all are kept independent. + op_coulomb_op, /// contracted Gaussian geminal cgtg, /// contracted Gaussian geminal times Coulomb @@ -369,6 +376,15 @@ struct operator_traits static constexpr auto nopers = 16; static constexpr auto intrinsic_deriv_order = 4; }; +template <> +struct operator_traits + : public operator_traits { + /// 9 components: Cartesian dyadic of the two (σ·p) directions. + /// index = 3 * a + b, with a = bra-side direction, b = ket-side direction, + /// a,b ∈ {x=0, y=1, z=2}. + static constexpr auto nopers = 9; + static constexpr auto intrinsic_deriv_order = 2; +}; namespace detail { template diff --git a/include/libint2/engine.impl.h b/include/libint2/engine.impl.h index ff143d254..27187cbcc 100644 --- a/include/libint2/engine.impl.h +++ b/include/libint2/engine.impl.h @@ -70,32 +70,33 @@ typename std::remove_all_extents::type* to_ptr1(T (&a)[N]) { /// These MUST appear in the same order as in Operator. /// You must also update BOOST_PP_NBODY_OPERATOR_LAST_ONEBODY_INDEX when you add /// one-body ints -#define BOOST_PP_NBODY_OPERATOR_LIST \ - (overlap, /* overlap */ \ - (kinetic, /* kinetic */ \ - (elecpot, /* nuclear */ \ - (elecpot, /* erf_nuclear */ \ - (elecpot, /* erfc_nuclear */ \ - (elecpot, /* erfx_nuclear */ \ - (1emultipole, /* emultipole1 */ \ - (2emultipole, /* emultipole2 */ \ - (3emultipole, /* emultipole3 */ \ - (sphemultipole, /* sphemultipole */ \ - (opVop, /* opVop */ \ - (eri, /* delta */ \ - (eri, /* coulomb */ \ - (coulomb_opop, /* coulomb_opop */ \ - (opop_coulomb_opop, /* coulomb_opop */ \ - (eri, /* cgtg */ \ - (eri, /* cgtg_x_coulomb */ \ - (eri, /* delcgtg2 */ \ - (eri, /* r12 */ \ - (eri, /* erf_coulomb */ \ - (eri, /* erfc_coulomb */ \ - (eri, /* erfx_coulomb */ \ - (eri, /* stg */ \ - (eri, /* yukawa */ \ - BOOST_PP_NIL)))))))))))))))))))))))) +#define BOOST_PP_NBODY_OPERATOR_LIST \ + (overlap, /* overlap */ \ + (kinetic, /* kinetic */ \ + (elecpot, /* nuclear */ \ + (elecpot, /* erf_nuclear */ \ + (elecpot, /* erfc_nuclear */ \ + (elecpot, /* erfx_nuclear */ \ + (1emultipole, /* emultipole1 */ \ + (2emultipole, /* emultipole2 */ \ + (3emultipole, /* emultipole3 */ \ + (sphemultipole, /* sphemultipole */ \ + (opVop, /* opVop */ \ + (eri, /* delta */ \ + (eri, /* coulomb */ \ + (coulomb_opop, /* coulomb_opop */ \ + (opop_coulomb_opop, /* opop_coulomb_opop */ \ + (op_coulomb_op, /* op_coulomb_op */ \ + (eri, /* cgtg */ \ + (eri, /* cgtg_x_coulomb */ \ + (eri, /* delcgtg2 */ \ + (eri, /* r12 */ \ + (eri, /* erf_coulomb */ \ + (eri, /* erfc_coulomb */ \ + (eri, /* erfx_coulomb */ \ + (eri, /* stg */ \ + (eri, /* yukawa */ \ + BOOST_PP_NIL))))))))))))))))))))))))) #define BOOST_PP_NBODY_OPERATOR_INDEX_TUPLE \ BOOST_PP_MAKE_TUPLE(BOOST_PP_LIST_SIZE(BOOST_PP_NBODY_OPERATOR_LIST)) @@ -1243,6 +1244,18 @@ __libint2_engine_inline const Engine::target_ptr_vec& Engine::compute2( const bool swap_p1p2 = (tket1.contr[0].l < tket2.contr[0].l); swap_tbra = swap_tket = swap_p1p2; } + } else if (oper_ == Operator::op_coulomb_op) { + // opCoulombop: only bra↔ket (particle 1↔2) swap is a symmetry (with + // (a,b)↔(b,a) component remap). Within-side swap is NOT a symmetry + // because σ·p attaches to one specific function per side; moving it to + // the other function changes the integral in a way IBP cannot recover + // across electrons. Canonical form: la+lb <= lc+ld only. + const auto bra_total = tbra1.contr[0].l + tbra2.contr[0].l; + const auto ket_total = tket1.contr[0].l + tket2.contr[0].l; + swap_braket = ((braket_ == BraKet::xx_xx) && (bra_total > ket_total)) || + braket_ == BraKet::xx_xs; + swap_tbra = false; + swap_tket = false; } else { swap_braket = ((braket_ == BraKet::xx_xx) && (tbra1.contr[0].l + tbra2.contr[0].l > @@ -1276,6 +1289,15 @@ __libint2_engine_inline const Engine::target_ptr_vec& Engine::compute2( const bool swap_p1p2 = (tket1.contr[0].l > tket2.contr[0].l); swap_tbra = swap_tket = swap_p1p2; } + } else if (oper_ == Operator::op_coulomb_op) { + // opCoulombop: only bra↔ket swap is a symmetry (with (a,b)↔(b,a) remap). + // ORCA canonical form: la+lb >= lc+ld only. + const auto bra_total = tbra1.contr[0].l + tbra2.contr[0].l; + const auto ket_total = tket1.contr[0].l + tket2.contr[0].l; + swap_braket = ((braket_ == BraKet::xx_xx) && (bra_total < ket_total)) || + braket_ == BraKet::xx_xs; + swap_tbra = false; + swap_tket = false; } else { swap_tbra = (tbra1.contr[0].l > tbra2.contr[0].l); swap_tket = (tket1.contr[0].l > tket2.contr[0].l); @@ -1482,28 +1504,21 @@ __libint2_engine_inline const Engine::target_ptr_vec& Engine::compute2( const auto mmax = l + deriv_order_ + intrinsic_deriv_order(); if (!skip_core_ints) { +// Simple core-eval dispatch: just `core_eval_ptr->eval(gm_ptr, T, mmax)`. +// Applies to Coulomb-family operators whose core integral is the bare Fm. +#define LIBINT2_SIMPLE_CORE_EVAL_CASE(OP) \ + case Operator::OP: { \ + const auto& core_eval_ptr = \ + any_cast&>( \ + core_eval_pack_) \ + .first(); \ + core_eval_ptr->eval(gm_ptr, T, mmax); \ + } break switch (oper_) { - case Operator::coulomb: { - const auto& core_eval_ptr = - any_cast&>(core_eval_pack_) - .first(); - core_eval_ptr->eval(gm_ptr, T, mmax); - } break; - case Operator::coulomb_opop: { - const auto& core_eval_ptr = - any_cast&>(core_eval_pack_) - .first(); - core_eval_ptr->eval(gm_ptr, T, mmax); - } break; - case Operator::opop_coulomb_opop: { - const auto& core_eval_ptr = - any_cast&>(core_eval_pack_) - .first(); - core_eval_ptr->eval(gm_ptr, T, mmax); - } break; + LIBINT2_SIMPLE_CORE_EVAL_CASE(coulomb); + LIBINT2_SIMPLE_CORE_EVAL_CASE(coulomb_opop); + LIBINT2_SIMPLE_CORE_EVAL_CASE(opop_coulomb_opop); + LIBINT2_SIMPLE_CORE_EVAL_CASE(op_coulomb_op); case Operator::cgtg_x_coulomb: { const auto& core_eval_ptr = any_cast temp; + for (auto s = 0; s != ntargets; ++s) temp[s] = targets_[s]; + for (auto s = 0; s != ntargets; ++s) + targets_[3 * (s % 3) + (s / 3)] = temp[s]; + } } // if need_scratch => needed to transpose and/or tform else { // did not use scratch? may still need to update targets_ if (set_targets_) { diff --git a/src/bin/libint/build_libint.cc b/src/bin/libint/build_libint.cc index 4e9b10152..fbc8ac07b 100644 --- a/src/bin/libint/build_libint.cc +++ b/src/bin/libint/build_libint.cc @@ -609,8 +609,10 @@ void try_main(int argc, char* argv[]) { #endif #ifdef LIBINT_INCLUDE_RKB_ERI -#define BOOST_PP_RKB_ERI_TASK_TUPLE (coulomb_opop, opop_coulomb_opop) -#define BOOST_PP_RKB_ERI_TASK_OPER_TUPLE (CoulombσpσpOper, σpσpCoulombσpσpOper) +#define BOOST_PP_RKB_ERI_TASK_TUPLE \ + (coulomb_opop, opop_coulomb_opop, op_coulomb_op) +#define BOOST_PP_RKB_ERI_TASK_OPER_TUPLE \ + (CoulombσpσpOper, σpσpCoulombσpσpOper, opCoulombopOper) #define BOOST_PP_RKB_ERI_TASK_LIST \ BOOST_PP_TUPLE_TO_LIST(BOOST_PP_RKB_ERI_TASK_TUPLE) #define BOOST_PP_RKB_ERI_TASK_OPER_LIST \ @@ -1148,7 +1150,14 @@ static void build_TwoPRep_2b_2k( std::shared_ptr context(new CppCodeContext(cparams)); std::shared_ptr memman(new WorstFitMemoryManager()); - bool p1_p2_swappable = !std::is_same::value; + // opCoulombop has only a 2-fold bra↔ket-swap symmetry (with (a,b)↔(b,a) + // component remap). Within-side particle swap is NOT a symmetry because σ·p + // attaches to one specific function per side (ν on bra, λ on ket); swapping + // moves the operator to a different physical center that IBP cannot recover + // when centers differ. Emit code for every (la,lb,lc,ld) combination to + // avoid triggering any within-side swap at runtime. + bool p1_p2_swappable = !std::is_same::value && + !std::is_same::value; bool bra_ket_coswappable = std::is_same::value; // Note: la, lb, lc, ld generate code for chemist notation (ab|O|cd), where O @@ -1157,10 +1166,23 @@ static void build_TwoPRep_2b_2k( for (unsigned int lb = 0; lb <= lmax; lb++) { for (unsigned int lc = 0; lc <= lmax; lc++) { for (unsigned int ld = 0; ld <= lmax; ld++) { - if (!ShellQuartetSetPredicate( - LIBINT_SHELL_SET)>::value(la, lb, lc, ld, p1_p2_swappable, - bra_ket_coswappable)) - continue; + // opCoulombop has only a bra↔ket (particle 1↔2) swap symmetry; + // within-side swap is NOT a symmetry (σ·p would move to the wrong + // physical center). Canonical form: la+lb <= lc+ld only + // (ORCA: la+lb >= lc+ld). Use a dedicated predicate so within-side + // orderings are not reduced away. + if constexpr (std::is_same::value) { +#if LIBINT_SHELL_SET == LIBINT_SHELL_SET_STANDARD + if (!(la + lb <= lc + ld)) continue; +#else + if (!(la + lb >= lc + ld)) continue; +#endif + } else { + if (!ShellQuartetSetPredicate( + LIBINT_SHELL_SET)>::value(la, lb, lc, ld, p1_p2_swappable, + bra_ket_coswappable)) + continue; + } // std::shared_ptr tactic(new ParticleDirectionTactic(la+lb > // lc+ld ? false : true)); @@ -1194,6 +1216,15 @@ static void build_TwoPRep_2b_2k( descrs.emplace_back(OperDescrType(p)); } } + if constexpr (std::is_same::value) { + // reset descriptors array + descrs.resize(0); + // iterate over 9 components (3x3 Cartesian dyadic: bra-dir × + // ket-dir) + for (int p = 0; p != 9; ++p) { + descrs.emplace_back(OperDescrType(p)); + } + } // unroll only if max_am <= cparams->max_am_opt(task) using std::max; @@ -2376,9 +2407,9 @@ void config_to_api(const std::shared_ptr& cparams, { // 2-body ints -#define BOOST_PP_TWOBODY_TASKOPER_TUPLE \ - ("eri", "coulomb_opop", "opop_coulomb_opop", "r12kg12", "r12_0_g12", \ - "r12_2_g12", "g12_T1_g12", "g12dkh") +#define BOOST_PP_TWOBODY_TASKOPER_TUPLE \ + ("eri", "coulomb_opop", "opop_coulomb_opop", "op_coulomb_op", "r12kg12", \ + "r12_0_g12", "r12_2_g12", "g12_T1_g12", "g12dkh") #define BOOST_PP_TWOBODY_TASKOPER_LIST \ BOOST_PP_TUPLE_TO_LIST(BOOST_PP_TWOBODY_TASKOPER_TUPLE) diff --git a/src/bin/libint/comp_11_opCoulombop_11.h b/src/bin/libint/comp_11_opCoulombop_11.h new file mode 100644 index 000000000..a6cbb7744 --- /dev/null +++ b/src/bin/libint/comp_11_opCoulombop_11.h @@ -0,0 +1,216 @@ +/* + * Copyright (C) 2004-2026 Edward F. Valeev + * + * This file is part of Libint compiler. + * + * Libint compiler is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Libint compiler is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Libint compiler. If not, see . + * + */ + +#ifndef LIBINT_COMP_11_OPCOULOMBOP_11_H +#define LIBINT_COMP_11_OPCOULOMBOP_11_H + +#include +#include +#include +#include +#include +#include + +namespace libint2 { + +/** + * Computes the "Gaunt LS bilinear" integral + * \f$ (\mu\, \sigma\cdot\hat{p}\,\nu | 1/r_{12} | \kappa\, + * \sigma\cdot\hat{p}\,\lambda ) \f$ by rewriting each of the 9 (a,b) components + * as a single derivative Coulomb integral \f$ ( \mu \cdot \partial_a \nu | + * 1/r_{12} | \kappa \cdot \partial_b \lambda ) \f$ with a ∈ {x,y,z} on + * bra-function-1 (ν) and b ∈ {x,y,z} on ket-function-1 (λ). + * + * Unlike Coulombσpσp (which folds 9 → 4 via σ·σ on the ket pair), all 9 + * components are exposed independently, since the two σ's here act on different + * particles and their contraction cannot be absorbed locally. + * + * @tparam F basis function type. valid choices are CGShell or CGF + */ +template +class CR_11_opCoulombop_11 + : public GenericRecurrenceRelation< + CR_11_opCoulombop_11, F, + GenIntegralSet_11_11> { + public: + typedef CR_11_opCoulombop_11 ThisType; + typedef F BasisFunctionType; + typedef opCoulombopOper OperType; + typedef GenIntegralSet_11_11 TargetType; + typedef GenericRecurrenceRelation + ParentType; + friend class GenericRecurrenceRelation; + static const unsigned int max_nchildren = 1; + + using ParentType::Instance; + + static bool directional() { return false; } + + private: + using ParentType::is_simple; + using ParentType::target_; + using ParentType::RecurrenceRelation::expr_; + using ParentType::RecurrenceRelation::nflops_; + + /// Constructor is private, used by Instance that maintains + /// registry of these objects + CR_11_opCoulombop_11(const std::shared_ptr&, unsigned int = 0); + + static std::string descr() { return "CR"; } + + // --- Code sharing overrides (mirror Coulombσpσp pattern) --- + // All shell quartets with the same quaternion component share one function. + + std::string generate_label() const override { + return "CR_opCoulombop_" + + std::to_string(target_->oper()->descr().cartesian_index()); + } + + std::string spfunction_call( + const std::shared_ptr& context, + const std::shared_ptr& dims) const override { + std::ostringstream os; + os << context->label_to_function_name(this->label()) << "(inteval, " + << context->value_to_pointer(this->rr_target()->symbol()); + const unsigned int nc = this->num_children(); + for (unsigned int c = 0; c < nc; c++) { + os << ", " << context->value_to_pointer(this->rr_child(c)->symbol()); + } + // total_dim = product of all shell dims (all 4 shells are spectators) + unsigned int total_dim = 1; + for (unsigned int p = 0; p < 2; p++) { + SubIterator* si = target_->bra().member_subiter(p, 0); + total_dim *= si->num_iter(); + delete si; + si = target_->ket().member_subiter(p, 0); + total_dim *= si->num_iter(); + delete si; + } + os << "," << total_dim; + LibraryTaskManager& taskmgr = LibraryTaskManager::Instance(); + taskmgr.current().params()->max_hrr_hsrank(total_dim); + os << ")" << context->end_of_stat() << std::endl; + return os.str(); + } + + std::shared_ptr adapt_dims_( + const std::shared_ptr& dims) const override { + auto high_dim = std::make_shared>("highdim"); + return std::make_shared(high_dim, dims->low(), + dims->vecdim()); + } + + /// Each of the 9 components is a single deriv-ERI child ⇒ trivial passthrough + /// loop. + void generate_code(const std::shared_ptr& context, + const std::shared_ptr& dims, + const std::string& funcname, std::ostream& decl, + std::ostream& def) override { + extern std::string declare_function( + const std::shared_ptr& context, + const std::shared_ptr& dims, + const std::shared_ptr& args, const std::string& tlabel, + const std::string& function_descr, std::ostream& decl); + + std::shared_ptr localdims = adapt_dims_(dims); + std::shared_ptr symbols(new CodeSymbols); + this->rr_target()->set_symbol("target"); + symbols->append_symbol("target"); + for (unsigned int c = 0; c < this->num_children(); c++) { + std::string symb = "src" + std::to_string(c); + this->rr_child(c)->set_symbol(symb); + symbols->append_symbol(symb); + } + LibraryTaskManager& taskmgr = LibraryTaskManager::Instance(); + const std::string tlabel = taskmgr.current().label(); + const std::string func_decl = + declare_function(context, localdims, symbols, tlabel, funcname, decl); + def << context->std_header(); + def << "#include <" << context->label_to_name(funcname) << ".h>\n\n"; + def << context->code_prefix(); + def << func_decl << context->open_block() << std::endl; + def << context->std_function_header(); + def << "#ifdef __INTEL_COMPILER\n#pragma ivdep\n#endif\n"; + def << "for(int hsi = 0; hsiclose_block() << std::endl; + def << context->code_postfix(); + } +}; + +template +CR_11_opCoulombop_11::CR_11_opCoulombop_11( + const std::shared_ptr& Tint, unsigned int) + : ParentType(Tint, 0) { + assert(Tint->num_func_bra(/* particle */ 0) == 1); + assert(Tint->num_func_bra(/* particle */ 1) == 1); + assert(Tint->num_func_ket(/* particle */ 0) == 1); + assert(Tint->num_func_ket(/* particle */ 1) == 1); + + F a(Tint->bra(0, 0)); + F b(Tint->ket(0, 0)); + F c(Tint->bra(1, 0)); + F d(Tint->ket(1, 0)); + + const auto& oper = Tint->oper(); + + if (a.contracted() || b.contracted() || c.contracted() || d.contracted()) + return; + + using namespace libint2::algebra; + using namespace libint2::prefactor; + using libint2::algebra::operator*; + + const mType zero_m(0u); + + ChildFactory> + factory(this); + + // Chemist notation: (a b | op c op d) — σ·p acts on one function per + // electron. Target component is indexed (a_dir, b_dir) where + // a_dir = direction of σ·p on electron 1 (applied to ket(0,0) = b) + // b_dir = direction of σ·p on electron 2 (applied to ket(1,0) = d) + // Mirrors Coulombσpσp which places BOTH derivatives on electron 2 (c and d); + // here we place ONE derivative on each electron (b on el-1, d on el-2). + const int a_dir = oper->descr().cart_a(); + const int b_dir = oper->descr().cart_b(); + + F b_deriv{b}; + b_deriv.deriv().inc(a_dir); + F d_deriv{d}; + d_deriv.deriv().inc(b_dir); + + auto child = factory.make_child(a, b_deriv, c, d_deriv, zero_m); + if (is_simple()) { + // Wrap single child in a trivial sum to satisfy expr_'s AlgebraicOperator + // type (same pattern as vrr_1_onep_1.h:261). + expr_ = Scalar(0u) + child; + nflops_ += 0; + } + +} // CR_11_opCoulombop_11::CR_11_opCoulombop_11 + +} // namespace libint2 + +#endif // LIBINT_COMP_11_OPCOULOMBOP_11_H diff --git a/src/bin/libint/master_ints_list.h b/src/bin/libint/master_ints_list.h index 37bfa29a7..ee9ca39d4 100644 --- a/src/bin/libint/master_ints_list.h +++ b/src/bin/libint/master_ints_list.h @@ -113,6 +113,9 @@ typedef GenIntegralSet_11_11 σpσpCoulombσpσp_11_11_sq; typedef GenIntegralSet_11_11 σpσpCoulombσpσp_11_11_int; +typedef GenIntegralSet_11_11 + opCoulombop_11_11_sq; +typedef GenIntegralSet_11_11 opCoulombop_11_11_int; typedef GenIntegralSet_11_11 R12kG12_11_11_sq; typedef GenIntegralSet_11_11 R12kG12_11_11_int; typedef GenIntegralSet_11_11 @@ -153,9 +156,10 @@ typedef boost::mpl::list< #endif TwoPRep_11_11_sq, TwoPRep_11_11_int, Coulombσpσp_11_11_sq, Coulombσpσp_11_11_int, σpσpCoulombσpσp_11_11_sq, σpσpCoulombσpσp_11_11_int, - R12kG12_11_11_sq, R12kG12_11_11_int, R12kR12lG12_11_11_sq, - R12kR12lG12_11_11_int, TiG12_11_11_sq, TiG12_11_11_int, G12TiG12_11_11_sq, - G12TiG12_11_11_int, DivG12prime_xTx_11_11_sq, DivG12prime_xTx_11_11_int, + opCoulombop_11_11_sq, opCoulombop_11_11_int, R12kG12_11_11_sq, + R12kG12_11_11_int, R12kR12lG12_11_11_sq, R12kR12lG12_11_11_int, + TiG12_11_11_sq, TiG12_11_11_int, G12TiG12_11_11_sq, G12TiG12_11_11_int, + DivG12prime_xTx_11_11_sq, DivG12prime_xTx_11_11_int, DummySymmIntegral_11_11_sq, DummySymmIntegral_11_11_int> MasterIntegralTypeList; diff --git a/src/bin/libint/master_rrs_list.h b/src/bin/libint/master_rrs_list.h index 5517d7c5c..62a7bfb08 100644 --- a/src/bin/libint/master_rrs_list.h +++ b/src/bin/libint/master_rrs_list.h @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -319,6 +320,9 @@ typedef CR_11_Coulombσpσp_11 CR_11_Coulombσpσp_11_int; typedef CR_11_σpσpCoulombσpσp_11 CR_11_σpσpCoulombσpσp_11_sh; typedef CR_11_σpσpCoulombσpσp_11 CR_11_σpσpCoulombσpσp_11_int; + +typedef CR_11_opCoulombop_11 CR_11_opCoulombop_11_sh; +typedef CR_11_opCoulombop_11 CR_11_opCoulombop_11_int; }; // namespace libint2 #endif // header guard diff --git a/src/bin/libint/oper.h b/src/bin/libint/oper.h index 678180c28..c6d307dd1 100644 --- a/src/bin/libint/oper.h +++ b/src/bin/libint/oper.h @@ -476,6 +476,55 @@ struct σpσpCoulombσpσp_Descr : public Contractable<σpσpCoulombσpσp_Descr }; typedef GenOper<σpσpCoulombσpσp_Descr> σpσpCoulombσpσpOper; +/** opCoulombop: (μ σ·p ν | 1/r_{12} | κ σ·p λ). + * Gaunt LS "bilinear" operator with one σ·p on each side. + * Exposes the full 3×3 gradient-gradient tensor as 9 independent components + * (indexed `3*a + b`, with a,b ∈ {x=0,y=1,z=2}), unlike Coulombσpσp which + * collapses 9 → 4 via σ·σ identity on one side only. + */ +struct opCoulombop_Descr : public Contractable { + typedef MultiplicativeSymm2Body_Props Properties; + + opCoulombop_Descr() : cartesian_index_(0) {} + opCoulombop_Descr(int cartesian_index) : cartesian_index_(cartesian_index) { + assert(cartesian_index >= 0 && cartesian_index <= 8); + } + + /// 9 components = σ_a(1) ⊗ σ_b(2) bilinear, indexed as 3*a + b, + /// where a = bra-side derivative direction, b = ket-side derivative + /// direction, and a, b ∈ {x=0, y=1, z=2}. Component layout is the outer + /// product of two Cartesian unit vectors — a dyadic — analogous in spirit to + /// libint's CartesianMultipole index, but over two independent direction + /// indices. + static const unsigned int max_key = 9; + unsigned int key() const { return cartesian_index(); } + std::string description() const { + // clang-format off + static const char* labels[] = { + "XX", "XY", "XZ", + "YX", "YY", "YZ", + "ZX", "ZY", "ZZ" + }; + // clang-format on + const auto ci = cartesian_index(); + if (ci > 8) abort(); + return std::string("op_coulomb_op[") + labels[ci] + "]"; + } + std::string label() const { return description(); } + int psymm(int i, int j) const { abort(); } + int hermitian(int i) const { return +1; } + + int cartesian_index() const { return cartesian_index_; } + /// bra-side (first σ) derivative direction ∈ {0=x, 1=y, 2=z} + int cart_a() const { return cartesian_index_ / 3; } + /// ket-side (second σ) derivative direction ∈ {0=x, 1=y, 2=z} + int cart_b() const { return cartesian_index_ % 3; } + + private: + const int cartesian_index_ = -1; +}; +typedef GenOper opCoulombopOper; + /** GTG_1d is the two-body 1-dimensional Gaussian geminal */ struct GTG_1d_Descr : public Contractable { diff --git a/src/bin/libint/strategy.cc b/src/bin/libint/strategy.cc index 4e4804da7..e59f6e92f 100644 --- a/src/bin/libint/strategy.cc +++ b/src/bin/libint/strategy.cc @@ -135,6 +135,14 @@ template <> struct MasterStrategy<σpσpCoulombσpσp_11_11_int> { typedef boost::mpl::list value; }; +template <> +struct MasterStrategy { + typedef boost::mpl::list value; +}; +template <> +struct MasterStrategy { + typedef boost::mpl::list value; +}; #if LIBINT_SHELLQUARTET_STRATEGY == LIBINT_SHELLQUARTET_STRATEGY_A0C0 template <> From 590ce42f1711386318b8f1cd56adc94c5296849d Mon Sep 17 00:00:00 2001 From: Kshitij Surjuse Date: Sun, 26 Apr 2026 14:41:43 -0400 Subject: [PATCH 19/22] op_coulomb_op: redesign 9 components as SO(3) irreps Scalar trace + 3 antisym + 5 sym-TL replaces raw 3*a+b dyadic. Bra<->ket swap: per-component sign flip on antisym (was: index transpose). p1_p2_swappable=true. Tests pass: 2,112,533 assertions. --- export/tests/unit/test-2body.cc | 33 ++++- include/libint2/engine.impl.h | 18 +-- src/bin/libint/build_libint.cc | 30 ++--- src/bin/libint/comp_11_opCoulombop_11.h | 171 +++++++++++++++++++----- src/bin/libint/oper.h | 64 +++++---- 5 files changed, 229 insertions(+), 87 deletions(-) diff --git a/export/tests/unit/test-2body.cc b/export/tests/unit/test-2body.cc index cee9b0800..60b6cf812 100644 --- a/export/tests/unit/test-2body.cc +++ b/export/tests/unit/test-2body.cc @@ -679,8 +679,12 @@ TEST_CASE("RKB Coulomb integrals", "[engine][2-body]") { int l3, m3, n3; FOR_CART(l3, m3, n3, obs[s3].contr[0].l) - std::array ref_op_coulomb_op{}; - ref_op_coulomb_op.fill(0.0); + // Raw 3x3 dyadic T_{ab} = (a b∂_a | c d∂_b), accumulated in + // chemist-notation index 3*a+b. After the primitive loop we + // project into the 9 SO(3) irreducible components that the engine + // returns: Scalar, AntisymX/Y/Z, SymTLDiagA/B, SymTLOffXY/XZ/YZ. + std::array ref_raw{}; + ref_raw.fill(0.0); for (uint p0 = 0; p0 < obs[s0].nprim(); p0++) { for (uint p1 = 0; p1 < obs[s1].nprim(); p1++) { @@ -712,12 +716,35 @@ TEST_CASE("RKB Coulomb integrals", "[engine][2-body]") { }; for (int a = 0; a < 3; ++a) for (int b = 0; b < 3; ++b) - ref_op_coulomb_op[3 * a + b] += c0123 * D(a, b); + ref_raw[3 * a + b] += c0123 * D(a, b); } } } } + // Project raw dyadic into the 9 SO(3) irrep components used by + // op_coulomb_op (must match opCoulombop_Descr::Component order). + const auto Txx = ref_raw[0]; + const auto Txy = ref_raw[1]; + const auto Txz = ref_raw[2]; + const auto Tyx = ref_raw[3]; + const auto Tyy = ref_raw[4]; + const auto Tyz = ref_raw[5]; + const auto Tzx = ref_raw[6]; + const auto Tzy = ref_raw[7]; + const auto Tzz = ref_raw[8]; + std::array ref_op_coulomb_op{ + Txx + Tyy + Tzz, // Scalar + Tyz - Tzy, // AntisymX + Tzx - Txz, // AntisymY + Txy - Tyx, // AntisymZ + Txx - Tyy, // SymTLDiagA + 2.0 * Tzz - Txx - Tyy, // SymTLDiagB + Txy + Tyx, // SymTLOffXY + Txz + Tzx, // SymTLOffXZ + Tyz + Tzy, // SymTLOffYZ + }; + const double ABSOLUTE_DEVIATION_THRESHOLD = 5.0E-14; const double RELATIVE_DEVIATION_THRESHOLD = 1.0E-9; for (auto comp = 0; comp < 9; ++comp) { diff --git a/include/libint2/engine.impl.h b/include/libint2/engine.impl.h index 27187cbcc..12cf89303 100644 --- a/include/libint2/engine.impl.h +++ b/include/libint2/engine.impl.h @@ -2192,6 +2192,12 @@ __libint2_engine_inline const Engine::target_ptr_vec& Engine::compute2( } if (swap_tket && oper_ == Operator::coulomb_opop && s > 0) oper_cart_component_phase = -1.0; + // op_coulomb_op irrep layout under bra↔ket swap: antisym + // components (s ∈ {1,2,3}) flip sign; scalar (0) and sym-TL + // (4..8) are invariant. swap_tket is always false for this + // operator; the sign correction applies on swap_braket alone. + if (oper_ == Operator::op_coulomb_op && s >= 1 && s <= 3) + oper_cart_component_phase = -1.0; if (swap_tbra) tgt_blk_mat = oper_cart_component_phase * src_blk_mat.transpose(); @@ -2257,15 +2263,9 @@ __libint2_engine_inline const Engine::target_ptr_vec& Engine::compute2( for (auto s = 0; s != ntargets; ++s) targets_[4 * (s % 4) + (s / 4)] = temp[s]; } - // For op_coulomb_op with swap_braket: (a,b) → (b,a) because bra↔ket swap - // exchanges which side each σ·p direction came from. Layout is - // index = 3*a + b, so the remap is s_new = 3*(s%3) + (s/3). - if (permute && oper_ == Operator::op_coulomb_op && swap_braket) { - std::array temp; - for (auto s = 0; s != ntargets; ++s) temp[s] = targets_[s]; - for (auto s = 0; s != ntargets; ++s) - targets_[3 * (s % 3) + (s / 3)] = temp[s]; - } + // op_coulomb_op irrep layout: bra↔ket swap is handled in-place by + // oper_cart_component_phase above (sign flip on antisym components, + // identity on scalar / sym-TL); no pointer remap is needed. } // if need_scratch => needed to transpose and/or tform else { // did not use scratch? may still need to update targets_ if (set_targets_) { diff --git a/src/bin/libint/build_libint.cc b/src/bin/libint/build_libint.cc index fbc8ac07b..5f799ea1c 100644 --- a/src/bin/libint/build_libint.cc +++ b/src/bin/libint/build_libint.cc @@ -1150,14 +1150,14 @@ static void build_TwoPRep_2b_2k( std::shared_ptr context(new CppCodeContext(cparams)); std::shared_ptr memman(new WorstFitMemoryManager()); - // opCoulombop has only a 2-fold bra↔ket-swap symmetry (with (a,b)↔(b,a) - // component remap). Within-side particle swap is NOT a symmetry because σ·p - // attaches to one specific function per side (ν on bra, λ on ket); swapping - // moves the operator to a different physical center that IBP cannot recover - // when centers differ. Emit code for every (la,lb,lc,ld) combination to - // avoid triggering any within-side swap at runtime. - bool p1_p2_swappable = !std::is_same::value && - !std::is_same::value; + // opCoulombop has a 2-fold bra↔ket-swap symmetry, with per-component sign + // flips under the swap (Antisym* flip sign; Scalar/SymTL* invariant) — this + // is captured by p1_p2_swappable=true plus a dedicated predicate that + // canonicalizes only la+lb<=lc+ld and emits code for *all* within-side + // orderings (within-side swap is NOT a symmetry — σ·p attaches to one + // specific function per side, and IBP cannot recover the sign across the + // 1/r12 coupling). + bool p1_p2_swappable = !std::is_same::value; bool bra_ket_coswappable = std::is_same::value; // Note: la, lb, lc, ld generate code for chemist notation (ab|O|cd), where O @@ -1166,11 +1166,11 @@ static void build_TwoPRep_2b_2k( for (unsigned int lb = 0; lb <= lmax; lb++) { for (unsigned int lc = 0; lc <= lmax; lc++) { for (unsigned int ld = 0; ld <= lmax; ld++) { - // opCoulombop has only a bra↔ket (particle 1↔2) swap symmetry; - // within-side swap is NOT a symmetry (σ·p would move to the wrong - // physical center). Canonical form: la+lb <= lc+ld only - // (ORCA: la+lb >= lc+ld). Use a dedicated predicate so within-side - // orderings are not reduced away. + // opCoulombop: only bra↔ket (particle 1↔2) swap is a symmetry. + // Within-side swap is NOT (σ·p would move to a different physical + // center; IBP cannot repair the sign across 1/r12). Dedicated + // predicate canonicalizes la+lb<=lc+ld only (ORCA: >=) and accepts + // all within-side orderings. if constexpr (std::is_same::value) { #if LIBINT_SHELL_SET == LIBINT_SHELL_SET_STANDARD if (!(la + lb <= lc + ld)) continue; @@ -1219,8 +1219,8 @@ static void build_TwoPRep_2b_2k( if constexpr (std::is_same::value) { // reset descriptors array descrs.resize(0); - // iterate over 9 components (3x3 Cartesian dyadic: bra-dir × - // ket-dir) + // iterate over 9 SO(3) irrep components: 1 scalar trace + 3 + // antisym (curl-curl) + 5 sym-traceless for (int p = 0; p != 9; ++p) { descrs.emplace_back(OperDescrType(p)); } diff --git a/src/bin/libint/comp_11_opCoulombop_11.h b/src/bin/libint/comp_11_opCoulombop_11.h index a6cbb7744..38ae4763e 100644 --- a/src/bin/libint/comp_11_opCoulombop_11.h +++ b/src/bin/libint/comp_11_opCoulombop_11.h @@ -33,14 +33,12 @@ namespace libint2 { /** * Computes the "Gaunt LS bilinear" integral * \f$ (\mu\, \sigma\cdot\hat{p}\,\nu | 1/r_{12} | \kappa\, - * \sigma\cdot\hat{p}\,\lambda ) \f$ by rewriting each of the 9 (a,b) components - * as a single derivative Coulomb integral \f$ ( \mu \cdot \partial_a \nu | - * 1/r_{12} | \kappa \cdot \partial_b \lambda ) \f$ with a ∈ {x,y,z} on - * bra-function-1 (ν) and b ∈ {x,y,z} on ket-function-1 (λ). - * - * Unlike Coulombσpσp (which folds 9 → 4 via σ·σ on the ket pair), all 9 - * components are exposed independently, since the two σ's here act on different - * particles and their contraction cannot be absorbed locally. + * \sigma\cdot\hat{p}\,\lambda ) \f$ in the SO(3) irreducible decomposition of + * the rank-2 tensor \f$ T_{ab} = ( \mu \cdot \partial_a \nu | 1/r_{12} | \kappa + * \cdot \partial_b \lambda ) \f$: 1 scalar trace + 3 antisymmetric (curl-curl) + * + 5 symmetric-traceless = 9 components total. Each output is a small linear + * combination of raw deriv-TwoPRep children, mirroring + * comp_11_Coulombσpσp_11.h's pattern of trace/antisym emission. * * @tparam F basis function type. valid choices are CGShell or CGF */ @@ -58,7 +56,7 @@ class CR_11_opCoulombop_11 ParentType; friend class GenericRecurrenceRelation; - static const unsigned int max_nchildren = 1; + static const unsigned int max_nchildren = 3; using ParentType::Instance; @@ -81,7 +79,7 @@ class CR_11_opCoulombop_11 std::string generate_label() const override { return "CR_opCoulombop_" + - std::to_string(target_->oper()->descr().cartesian_index()); + std::to_string(target_->oper()->descr().component_index()); } std::string spfunction_call( @@ -118,8 +116,8 @@ class CR_11_opCoulombop_11 dims->vecdim()); } - /// Each of the 9 components is a single deriv-ERI child ⇒ trivial passthrough - /// loop. + /// Hand-emit the per-component irrep linear combination over deriv-ERI + /// children. The combination depends on the target's component index. void generate_code(const std::shared_ptr& context, const std::shared_ptr& dims, const std::string& funcname, std::ostream& decl, @@ -150,9 +148,38 @@ class CR_11_opCoulombop_11 def << context->std_function_header(); def << "#ifdef __INTEL_COMPILER\n#pragma ivdep\n#endif\n"; def << "for(int hsi = 0; hsitarget_->oper()->descr().component_index(); + std::string rhs; + unsigned int nflops = 0; + switch (comp) { + case opCoulombop_Descr::Scalar: + rhs = "src0[hsi] + src1[hsi] + src2[hsi]"; + nflops = 2; + break; + case opCoulombop_Descr::AntisymX: + case opCoulombop_Descr::AntisymY: + case opCoulombop_Descr::AntisymZ: + case opCoulombop_Descr::SymTLDiagA: + rhs = "src0[hsi] - src1[hsi]"; + nflops = 1; + break; + case opCoulombop_Descr::SymTLDiagB: + rhs = "2.0*src0[hsi] - src1[hsi] - src2[hsi]"; + nflops = 3; + break; + case opCoulombop_Descr::SymTLOffXY: + case opCoulombop_Descr::SymTLOffXZ: + case opCoulombop_Descr::SymTLOffYZ: + rhs = "src0[hsi] + src1[hsi]"; + nflops = 1; + break; + default: + throw std::runtime_error( + "CR_11_opCoulombop_11::generate_code: invalid component index"); + } + def << "target[hsi] = " << rhs << ";\n}\n"; + def << "/** Number of flops = " << nflops << " */\n"; def << context->close_block() << std::endl; def << context->code_postfix(); } @@ -187,26 +214,100 @@ CR_11_opCoulombop_11::CR_11_opCoulombop_11( GenIntegralSet_11_11> factory(this); - // Chemist notation: (a b | op c op d) — σ·p acts on one function per - // electron. Target component is indexed (a_dir, b_dir) where - // a_dir = direction of σ·p on electron 1 (applied to ket(0,0) = b) - // b_dir = direction of σ·p on electron 2 (applied to ket(1,0) = d) - // Mirrors Coulombσpσp which places BOTH derivatives on electron 2 (c and d); - // here we place ONE derivative on each electron (b on el-1, d on el-2). - const int a_dir = oper->descr().cart_a(); - const int b_dir = oper->descr().cart_b(); - - F b_deriv{b}; - b_deriv.deriv().inc(a_dir); - F d_deriv{d}; - d_deriv.deriv().inc(b_dir); - - auto child = factory.make_child(a, b_deriv, c, d_deriv, zero_m); - if (is_simple()) { - // Wrap single child in a trivial sum to satisfy expr_'s AlgebraicOperator - // type (same pattern as vrr_1_onep_1.h:261). - expr_ = Scalar(0u) + child; - nflops_ += 0; + // Chemist notation: (a b | op c op d). σ·p acts on one function per electron + // — direction `i` on ket(0,0) = b (electron 1), direction `j` on ket(1,0) = d + // (electron 2). Each output is an SO(3) irrep combination of the raw 3×3 + // T_{ij} dyadic; case bodies build only the children each combination needs. + constexpr auto x = 0; + constexpr auto y = 1; + constexpr auto z = 2; + + auto T = [&](int i, int j) { + F b_d{b}; + b_d.deriv().inc(i); + F d_d{d}; + d_d.deriv().inc(j); + return factory.make_child(a, b_d, c, d_d, zero_m); + }; + + switch (oper->descr().component_index()) { + case opCoulombop_Descr::Scalar: { + auto Txx = T(x, x); + auto Tyy = T(y, y); + auto Tzz = T(z, z); + if (is_simple()) { + expr_ = Txx + Tyy + Tzz; + nflops_ += 2; + } + } break; + case opCoulombop_Descr::AntisymX: { + auto Tyz = T(y, z); + auto Tzy = T(z, y); + if (is_simple()) { + expr_ = Tyz - Tzy; + nflops_ += 1; + } + } break; + case opCoulombop_Descr::AntisymY: { + auto Tzx = T(z, x); + auto Txz = T(x, z); + if (is_simple()) { + expr_ = Tzx - Txz; + nflops_ += 1; + } + } break; + case opCoulombop_Descr::AntisymZ: { + auto Txy = T(x, y); + auto Tyx = T(y, x); + if (is_simple()) { + expr_ = Txy - Tyx; + nflops_ += 1; + } + } break; + case opCoulombop_Descr::SymTLDiagA: { + auto Txx = T(x, x); + auto Tyy = T(y, y); + if (is_simple()) { + expr_ = Txx - Tyy; + nflops_ += 1; + } + } break; + case opCoulombop_Descr::SymTLDiagB: { + // 2·T_zz − T_xx − T_yy: child order (Tzz, Txx, Tyy) matches generate_code + auto Tzz = T(z, z); + auto Txx = T(x, x); + auto Tyy = T(y, y); + if (is_simple()) { + expr_ = Scalar(2) * Tzz - Txx - Tyy; + nflops_ += 3; + } + } break; + case opCoulombop_Descr::SymTLOffXY: { + auto Txy = T(x, y); + auto Tyx = T(y, x); + if (is_simple()) { + expr_ = Txy + Tyx; + nflops_ += 1; + } + } break; + case opCoulombop_Descr::SymTLOffXZ: { + auto Txz = T(x, z); + auto Tzx = T(z, x); + if (is_simple()) { + expr_ = Txz + Tzx; + nflops_ += 1; + } + } break; + case opCoulombop_Descr::SymTLOffYZ: { + auto Tyz = T(y, z); + auto Tzy = T(z, y); + if (is_simple()) { + expr_ = Tyz + Tzy; + nflops_ += 1; + } + } break; + default: + throw std::runtime_error("CR_11_opCoulombop_11: invalid component index"); } } // CR_11_opCoulombop_11::CR_11_opCoulombop_11 diff --git a/src/bin/libint/oper.h b/src/bin/libint/oper.h index c6d307dd1..8e8a4f567 100644 --- a/src/bin/libint/oper.h +++ b/src/bin/libint/oper.h @@ -478,35 +478,43 @@ typedef GenOper<σpσpCoulombσpσp_Descr> σpσpCoulombσpσpOper; /** opCoulombop: (μ σ·p ν | 1/r_{12} | κ σ·p λ). * Gaunt LS "bilinear" operator with one σ·p on each side. - * Exposes the full 3×3 gradient-gradient tensor as 9 independent components - * (indexed `3*a + b`, with a,b ∈ {x=0,y=1,z=2}), unlike Coulombσpσp which - * collapses 9 → 4 via σ·σ identity on one side only. + * Outputs the SO(3) irreducible decomposition of the 3×3 gradient-gradient + * tensor T_{ab} = ∂_a ∂_b (μν|κλ): 1 scalar trace + 3 antisymmetric + * (curl-curl) + 5 symmetric-traceless = 9 components. Same storage as the + * raw dyadic, but indexed by physics-meaningful irreps so consumers do not + * need to hand-build trace/antisym/sym-TL combinations at every contraction + * site. */ struct opCoulombop_Descr : public Contractable { typedef MultiplicativeSymm2Body_Props Properties; - opCoulombop_Descr() : cartesian_index_(0) {} - opCoulombop_Descr(int cartesian_index) : cartesian_index_(cartesian_index) { - assert(cartesian_index >= 0 && cartesian_index <= 8); + /// SO(3) irreducible components of the rank-2 Cartesian tensor T_{ab}. + enum Component : int { + Scalar = 0, ///< T_xx + T_yy + T_zz (trace, ∇·∇) + AntisymX = 1, ///< T_yz − T_zy = (∇×∇)_x + AntisymY = 2, ///< T_zx − T_xz = (∇×∇)_y + AntisymZ = 3, ///< T_xy − T_yx = (∇×∇)_z + SymTLDiagA = 4, ///< T_xx − T_yy + SymTLDiagB = 5, ///< 2·T_zz − T_xx − T_yy + SymTLOffXY = 6, ///< T_xy + T_yx + SymTLOffXZ = 7, ///< T_xz + T_zx + SymTLOffYZ = 8, ///< T_yz + T_zy + }; + + opCoulombop_Descr() : component_index_(0) {} + opCoulombop_Descr(int component_index) : component_index_(component_index) { + assert(component_index >= 0 && component_index <= 8); } - /// 9 components = σ_a(1) ⊗ σ_b(2) bilinear, indexed as 3*a + b, - /// where a = bra-side derivative direction, b = ket-side derivative - /// direction, and a, b ∈ {x=0, y=1, z=2}. Component layout is the outer - /// product of two Cartesian unit vectors — a dyadic — analogous in spirit to - /// libint's CartesianMultipole index, but over two independent direction - /// indices. static const unsigned int max_key = 9; - unsigned int key() const { return cartesian_index(); } + unsigned int key() const { return component_index(); } std::string description() const { - // clang-format off static const char* labels[] = { - "XX", "XY", "XZ", - "YX", "YY", "YZ", - "ZX", "ZY", "ZZ" + "scalar", "antisym_x", "antisym_y", + "antisym_z", "symtl_diag_a", "symtl_diag_b", + "symtl_off_xy", "symtl_off_xz", "symtl_off_yz", }; - // clang-format on - const auto ci = cartesian_index(); + const auto ci = component_index(); if (ci > 8) abort(); return std::string("op_coulomb_op[") + labels[ci] + "]"; } @@ -514,14 +522,20 @@ struct opCoulombop_Descr : public Contractable { int psymm(int i, int j) const { abort(); } int hermitian(int i) const { return +1; } - int cartesian_index() const { return cartesian_index_; } - /// bra-side (first σ) derivative direction ∈ {0=x, 1=y, 2=z} - int cart_a() const { return cartesian_index_ / 3; } - /// ket-side (second σ) derivative direction ∈ {0=x, 1=y, 2=z} - int cart_b() const { return cartesian_index_ % 3; } + int component_index() const { return component_index_; } + + bool is_scalar() const { return component_index_ == Scalar; } + bool is_antisym() const { + return component_index_ >= AntisymX && component_index_ <= AntisymZ; + } + bool is_sym_tl() const { + return component_index_ >= SymTLDiagA && component_index_ <= SymTLOffYZ; + } + /// for antisym components: 0=x, 1=y, 2=z (only valid if is_antisym()) + int antisym_cart() const { return component_index_ - AntisymX; } private: - const int cartesian_index_ = -1; + const int component_index_ = -1; }; typedef GenOper opCoulombopOper; From b463ca8d43632444e950971e454623345a32cd8e Mon Sep 17 00:00:00 2001 From: Kshitij Surjuse Date: Sun, 26 Apr 2026 14:43:40 -0400 Subject: [PATCH 20/22] =?UTF-8?q?Add=20=CF=83pR=CF=83p=20(oprop):=201-body?= =?UTF-8?q?=20=CF=83=C2=B7p=20=C2=B7=20r=20=C2=B7=20=CF=83=C2=B7p=20integr?= =?UTF-8?q?al?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 12 components = 3 dipole directions × 4 Pauli quaternion (trace + 3 antisym), mirroring σpVσp's fold. Engine origin via set_params. Master integral list split via mpl::joint_view (boost list50 limit). --- export/tests/unit/test-1body.cc | 66 +++++++ include/libint2/engine.h | 15 +- include/libint2/engine.impl.h | 61 +++---- src/bin/libint/build_libint.cc | 17 +- .../libint/comp_1_\317\203pR\317\203p_1.h" | 161 ++++++++++++++++++ src/bin/libint/master_ints_list.h | 18 +- src/bin/libint/master_rrs_list.h | 4 + src/bin/libint/oper.h | 39 +++++ src/bin/libint/strategy.cc | 8 + 9 files changed, 352 insertions(+), 37 deletions(-) create mode 100644 "src/bin/libint/comp_1_\317\203pR\317\203p_1.h" diff --git a/export/tests/unit/test-1body.cc b/export/tests/unit/test-1body.cc index 57c65f428..54c0cc14d 100644 --- a/export/tests/unit/test-1body.cc +++ b/export/tests/unit/test-1body.cc @@ -244,6 +244,72 @@ TEST_CASE_METHOD(libint2::unit::DefaultFixture, "W correctness", #endif // LIBINT2_SUPPORT_ONEBODY } +TEST_CASE_METHOD(libint2::unit::DefaultFixture, "σpRσp correctness", + "[engine][1-body]") { +#if defined(LIBINT2_SUPPORT_ONEBODY) + if (LIBINT_SHGSHELL_ORDERING != LIBINT_SHGSHELL_ORDERING_STANDARD) return; + + // Two contracted Gaussian shells at distinct centers. We exercise + // `(σ·p) r (σ·p)` over the (s|d) and (d|s) shell pairs and validate + // Hermiticity: trace components (q=0) are symmetric under bra↔ket swap, + // antisym components (q=1,2,3) are antisymmetric. + std::vector obs{ + Shell{{1.0, 3.0}, {{0, true, {1.0, 0.3}}}, {{0.0, 0.0, 0.0}}}, + Shell{{2.0, 5.0}, {{2, true, {1.0, 0.2}}}, {{1.0, 1.0, 1.0}}}}; + + const auto lmax = std::min(2, LIBINT2_MAX_AM_oprop); + if (lmax < 2) return; + + auto engine = Engine(Operator::oprop, 2, lmax); + engine.set_params(std::array{{0.0, 0.0, 0.0}}); + + // (s|σpRσp|d) and (d|σpRσp|s) + engine.compute(obs[0], obs[1]); + std::array, 12> ab; + for (int c = 0; c < 12; ++c) { + const auto* buf = engine.results()[c]; + REQUIRE(buf != nullptr); + ab[c].assign(buf, buf + (1 * 5)); // n_s × n_d_pure = 1 × 5 + } + + engine.compute(obs[1], obs[0]); + std::array, 12> ba; + for (int c = 0; c < 12; ++c) { + const auto* buf = engine.results()[c]; + REQUIRE(buf != nullptr); + ba[c].assign(buf, buf + (5 * 1)); // n_d_pure × n_s + } + + // Hermiticity check: σpRσp is Hermitian, but the Pauli identity routes the + // imaginary i factor on the antisym pieces into a real-stored sign flip. + // q=0 trace: matrix is symmetric ⇒ ab[0+k][i,j] == ba[0+k][j,i] + // q=1..3 : matrix is antisym ⇒ ab[q+k][i,j] == -ba[q+k][j,i] + // (Indices: ab is laid out (i=0..n_s-1, j=0..n_d-1); ba is (j, i).) + const double tol = 1.0e-12; + for (int k = 0; k < 3; ++k) { + for (int q = 0; q < 4; ++q) { + const int comp = 4 * k + q; + const double expected_sign = (q == 0) ? +1.0 : -1.0; + for (int i = 0; i < 1; ++i) { + for (int j = 0; j < 5; ++j) { + const double v_ab = ab[comp][i * 5 + j]; + const double v_ba = ba[comp][j * 1 + i]; + REQUIRE(std::isfinite(v_ab)); + REQUIRE(std::abs(v_ab - expected_sign * v_ba) < tol); + } + } + } + } + + // Sanity: not every component is identically zero (would mask codegen bugs). + bool any_nonzero = false; + for (int c = 0; c < 12; ++c) + for (double v : ab[c]) + if (std::abs(v) > 1.0e-10) any_nonzero = true; + REQUIRE(any_nonzero); +#endif // LIBINT2_SUPPORT_ONEBODY +} + // verify that python/tests/test_libint2.py:test_integrals is correct TEST_CASE_METHOD(libint2::unit::DefaultFixture, "python correctness", "[engine][1-body]") { diff --git a/include/libint2/engine.h b/include/libint2/engine.h index f99c6a898..d90c6709c 100644 --- a/include/libint2/engine.h +++ b/include/libint2/engine.h @@ -147,6 +147,12 @@ enum class Operator { sphemultipole, /// The four components of σp . V . σp, where V is the nuclear potential. opVop, + /// (1-body) σp . r . σp, the σ·p-on-both-sides analog of the dipole moment. + /// Produces 12 components = 3 dipole directions × 4 Pauli quaternion + /// components (trace + 3 antisym), indexed as `4*k + q` with `k ∈ {x,y,z}` + /// the dipole direction and `q ∈ {0=trace, 1=σ_x, 2=σ_y, 3=σ_z}` the Pauli + /// piece. Origin set via `engine.set_params(std::array)`. + oprop, /// \f$ \delta(\vec{r}_1 - \vec{r}_2) \f$ delta, /// (2-body) Coulomb operator = \f$ r_{12}^{-1} \f$ @@ -199,7 +205,7 @@ enum class Operator { // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!keep this // updated!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! first_1body_oper = overlap, - last_1body_oper = opVop, + last_1body_oper = oprop, first_2body_oper = delta, last_2body_oper = stg_x_coulomb, first_oper = first_1body_oper, @@ -352,6 +358,13 @@ struct operator_traits (LIBINT_MULTIPOLE_MAX_ORDER + 1) * (LIBINT_MULTIPOLE_MAX_ORDER + 1); }; +template <> +struct operator_traits + : public operator_traits { + static constexpr auto nopers = 12; + static constexpr auto intrinsic_deriv_order = 2; +}; + template <> struct operator_traits : public detail::default_operator_traits { diff --git a/include/libint2/engine.impl.h b/include/libint2/engine.impl.h index 12cf89303..a431f8ad1 100644 --- a/include/libint2/engine.impl.h +++ b/include/libint2/engine.impl.h @@ -70,40 +70,41 @@ typename std::remove_all_extents::type* to_ptr1(T (&a)[N]) { /// These MUST appear in the same order as in Operator. /// You must also update BOOST_PP_NBODY_OPERATOR_LAST_ONEBODY_INDEX when you add /// one-body ints -#define BOOST_PP_NBODY_OPERATOR_LIST \ - (overlap, /* overlap */ \ - (kinetic, /* kinetic */ \ - (elecpot, /* nuclear */ \ - (elecpot, /* erf_nuclear */ \ - (elecpot, /* erfc_nuclear */ \ - (elecpot, /* erfx_nuclear */ \ - (1emultipole, /* emultipole1 */ \ - (2emultipole, /* emultipole2 */ \ - (3emultipole, /* emultipole3 */ \ - (sphemultipole, /* sphemultipole */ \ - (opVop, /* opVop */ \ - (eri, /* delta */ \ - (eri, /* coulomb */ \ - (coulomb_opop, /* coulomb_opop */ \ - (opop_coulomb_opop, /* opop_coulomb_opop */ \ - (op_coulomb_op, /* op_coulomb_op */ \ - (eri, /* cgtg */ \ - (eri, /* cgtg_x_coulomb */ \ - (eri, /* delcgtg2 */ \ - (eri, /* r12 */ \ - (eri, /* erf_coulomb */ \ - (eri, /* erfc_coulomb */ \ - (eri, /* erfx_coulomb */ \ - (eri, /* stg */ \ - (eri, /* yukawa */ \ - BOOST_PP_NIL))))))))))))))))))))))))) +#define BOOST_PP_NBODY_OPERATOR_LIST \ + (overlap, /* overlap */ \ + (kinetic, /* kinetic */ \ + (elecpot, /* nuclear */ \ + (elecpot, /* erf_nuclear */ \ + (elecpot, /* erfc_nuclear */ \ + (elecpot, /* erfx_nuclear */ \ + (1emultipole, /* emultipole1 */ \ + (2emultipole, /* emultipole2 */ \ + (3emultipole, /* emultipole3 */ \ + (sphemultipole, /* sphemultipole */ \ + (opVop, /* opVop */ \ + (oprop, /* oprop */ \ + (eri, /* delta */ \ + (eri, /* coulomb */ \ + (coulomb_opop, /* coulomb_opop */ \ + (opop_coulomb_opop, /* opop_coulomb_opop */ \ + (op_coulomb_op, /* op_coulomb_op */ \ + (eri, /* cgtg */ \ + (eri, /* cgtg_x_coulomb */ \ + (eri, /* delcgtg2 */ \ + (eri, /* r12 */ \ + (eri, /* erf_coulomb */ \ + (eri, /* erfc_coulomb */ \ + (eri, /* erfx_coulomb */ \ + (eri, /* stg */ \ + (eri, /* yukawa */ \ + BOOST_PP_NIL)))))))))))))))))))))))))) #define BOOST_PP_NBODY_OPERATOR_INDEX_TUPLE \ BOOST_PP_MAKE_TUPLE(BOOST_PP_LIST_SIZE(BOOST_PP_NBODY_OPERATOR_LIST)) #define BOOST_PP_NBODY_OPERATOR_INDEX_LIST \ BOOST_PP_TUPLE_TO_LIST(BOOST_PP_NBODY_OPERATOR_INDEX_TUPLE) #define BOOST_PP_NBODY_OPERATOR_LAST_ONEBODY_INDEX \ - 10 // opVop, the 11th member of BOOST_PP_NBODY_OPERATOR_LIST, is the last + 11 // oprop, the 12th member of BOOST_PP_NBODY_OPERATOR_LIST, is the last // 1-body operator // make list of braket indices for n-body ints @@ -1028,7 +1029,7 @@ __libint2_engine_inline void Engine::compute_primdata(Libint_t& primdata, // } if (oper_ == Operator::emultipole1 || oper_ == Operator::emultipole2 || - oper_ == Operator::emultipole3) { + oper_ == Operator::emultipole3 || oper_ == Operator::oprop) { const auto& O = any_cast< const operator_traits::oper_params_type&>( params_); // same as emultipoleX @@ -1076,7 +1077,7 @@ __libint2_engine_inline void Engine::compute_primdata(Libint_t& primdata, primdata._0_Overlap_0_z[0] = ovlp_ss_z; if (oper_ == Operator::kinetic || (deriv_order_ > 0) || - oper_ == Operator::opVop) { + oper_ == Operator::opVop || oper_ == Operator::oprop) { #if LIBINT2_DEFINED(eri, two_alpha0_bra) primdata.two_alpha0_bra[0] = 2.0 * alpha1; #endif diff --git a/src/bin/libint/build_libint.cc b/src/bin/libint/build_libint.cc index 5f799ea1c..206cd43c4 100644 --- a/src/bin/libint/build_libint.cc +++ b/src/bin/libint/build_libint.cc @@ -308,6 +308,11 @@ template <> return σpVσp_Descr(p); } +template <> +σpRσp_Descr make_descr<σpRσp_Descr>(int p, int, int) { + return σpRσp_Descr(p); +} + template <> Coulombσpσp_Descr make_descr(int p, int, int) { return Coulombσpσp_Descr(p); @@ -445,6 +450,14 @@ void build_onebody_1b_1k(std::ostream& os, std::string label, descrs.emplace_back(make_descr(p)); } } + if (std::is_same<_OperType, σpRσpOper>::value) { + // reset descriptors array + descrs.resize(0); + // iterate over 12 = 3 dipole directions × 4 Pauli components + for (int p = 0; p != 12; ++p) { + descrs.emplace_back(make_descr(p)); + } + } // derivative index is the outermost (slowest running) // operator component is second slowest @@ -584,11 +597,11 @@ void try_main(int argc, char* argv[]) { // overlap, kinetic, elecpot cannot be omitted #define BOOST_PP_ONEBODY_TASK_TUPLE \ (overlap, kinetic, elecpot, 1emultipole, 2emultipole, 3emultipole, \ - sphemultipole, opVop) + sphemultipole, opVop, oprop) #define BOOST_PP_ONEBODY_TASK_OPER_TUPLE \ (OverlapOper, KineticOper, ElecPotOper, CartesianMultipoleOper<3u>, \ CartesianMultipoleOper<3u>, CartesianMultipoleOper<3u>, \ - SphericalMultipoleOper, σpVσpOper) + SphericalMultipoleOper, σpVσpOper, σpRσpOper) #define BOOST_PP_ONEBODY_TASK_LIST \ BOOST_PP_TUPLE_TO_LIST(BOOST_PP_ONEBODY_TASK_TUPLE) #define BOOST_PP_ONEBODY_TASK_OPER_LIST \ diff --git "a/src/bin/libint/comp_1_\317\203pR\317\203p_1.h" "b/src/bin/libint/comp_1_\317\203pR\317\203p_1.h" new file mode 100644 index 000000000..e698c9047 --- /dev/null +++ "b/src/bin/libint/comp_1_\317\203pR\317\203p_1.h" @@ -0,0 +1,161 @@ +/* + * Copyright (C) 2004-2026 Edward F. Valeev + * + * This file is part of Libint compiler. + * + * Libint compiler is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Libint compiler is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Libint compiler. If not, see . + * + */ + +#ifndef LIBINT_COMP_1_ΣPRΣP_1_H +#define LIBINT_COMP_1_ΣPRΣP_1_H + +#include + +namespace libint2 { + +/** + * Computes the integral of \f$ \sigma \cdot \hat{p}\, r_k\, \sigma \cdot + * \hat{p} \f$ over CGShell/CGF by folding the 9 raw \f$ \sigma_a \partial_a r_k + * \sigma_b \partial_b \f$ dyadics per dipole direction \f$ k \f$ to 4 + * Pauli-quaternion components via \f$ \sigma_a \sigma_b = \delta_{ab} + + * i\epsilon_{abc}\sigma_c \f$. 12 outputs total = 3 dipole directions × 4 + * Pauli components, mirroring the σpVσp fold but with the central operator + * being a Cartesian dipole instead of the electrostatic potential V. + * + * @tparam F basis function type. valid choices are CGShell or CGF + */ +template +class CR_1_σpRσp_1 + : public GenericRecurrenceRelation< + CR_1_σpRσp_1, F, GenIntegralSet_1_1> { + public: + typedef CR_1_σpRσp_1 ThisType; + typedef F BasisFunctionType; + typedef σpRσpOper OperType; + typedef GenIntegralSet_1_1 TargetType; + typedef GenericRecurrenceRelation + ParentType; + friend class GenericRecurrenceRelation; + static const unsigned int max_nchildren = 100; + + using ParentType::Instance; + + static bool directional() { return false; } + + private: + using ParentType::is_simple; + using ParentType::target_; + using ParentType::RecurrenceRelation::expr_; + using ParentType::RecurrenceRelation::nflops_; + + CR_1_σpRσp_1(const std::shared_ptr &, unsigned int = 0); + + static std::string descr() { return "CR"; } +}; + +template +CR_1_σpRσp_1::CR_1_σpRσp_1(const std::shared_ptr &Tint, + unsigned int) + : ParentType(Tint, 0) { + assert(Tint->num_func_bra(/* particle */ 0) == 1); + assert(Tint->num_func_ket(/* particle */ 0) == 1); + const auto &a = Tint->bra(0, 0); + const auto &b = Tint->ket(0, 0); + const auto &oper = Tint->oper(); + + // express σ·p r_k σ·p in terms of derivative integrals of the dipole + // operator r_k for primitive Gaussians only + if (a.contracted() || b.contracted()) return; + + using namespace libint2::algebra; + using namespace libint2::prefactor; + using libint2::algebra::operator*; + + ChildFactory, + EmptySet>> + factory(this); + + constexpr auto x = 0; + constexpr auto y = 1; + constexpr auto z = 2; + + F Dx_a{a}; + Dx_a.deriv().inc(x); + F Dx_b{b}; + Dx_b.deriv().inc(x); + F Dy_a{a}; + Dy_a.deriv().inc(y); + F Dy_b{b}; + Dy_b.deriv().inc(y); + F Dz_a{a}; + Dz_a.deriv().inc(z); + F Dz_b{b}; + Dz_b.deriv().inc(z); + + // Build the dipole multipole descriptor for direction k. + const auto k = oper->descr().dipole_dir(); + CartesianMultipole_Descr<3u> mu_k; + mu_k.inc(k, 1); // r_k = (kx,ky,kz) with k_k = 1, others 0 + + // Pauli quaternion fold per (k, q): + // q=0: trace δ_ab → Σ_a (∂_a μ | r_k | ∂_a ν) + // q=1: σ_x antisym → (∂_y μ | r_k | ∂_z ν) − (∂_z μ | r_k | ∂_y ν) + // q=2: σ_y antisym → (∂_z μ | r_k | ∂_x ν) − (∂_x μ | r_k | ∂_z ν) + // q=3: σ_z antisym → (∂_x μ | r_k | ∂_y ν) − (∂_y μ | r_k | ∂_x ν) + switch (oper->descr().quaternion_index()) { + case 0: { + auto Dx_a_R_Dx_b = factory.make_child(Dx_a, Dx_b, EmptySet(), mu_k); + auto Dy_a_R_Dy_b = factory.make_child(Dy_a, Dy_b, EmptySet(), mu_k); + auto Dz_a_R_Dz_b = factory.make_child(Dz_a, Dz_b, EmptySet(), mu_k); + if (is_simple()) { + expr_ = Dx_a_R_Dx_b + Dy_a_R_Dy_b + Dz_a_R_Dz_b; + nflops_ += 2; + } + } break; + case 1: { + auto Dy_a_R_Dz_b = factory.make_child(Dy_a, Dz_b, EmptySet(), mu_k); + auto Dz_a_R_Dy_b = factory.make_child(Dz_a, Dy_b, EmptySet(), mu_k); + if (is_simple()) { + expr_ = Dy_a_R_Dz_b - Dz_a_R_Dy_b; + nflops_ += 1; + } + } break; + case 2: { + auto Dz_a_R_Dx_b = factory.make_child(Dz_a, Dx_b, EmptySet(), mu_k); + auto Dx_a_R_Dz_b = factory.make_child(Dx_a, Dz_b, EmptySet(), mu_k); + if (is_simple()) { + expr_ = Dz_a_R_Dx_b - Dx_a_R_Dz_b; + nflops_ += 1; + } + } break; + case 3: { + auto Dx_a_R_Dy_b = factory.make_child(Dx_a, Dy_b, EmptySet(), mu_k); + auto Dy_a_R_Dx_b = factory.make_child(Dy_a, Dx_b, EmptySet(), mu_k); + if (is_simple()) { + expr_ = Dx_a_R_Dy_b - Dy_a_R_Dx_b; + nflops_ += 1; + } + } break; + default: + throw std::runtime_error("CR_1_σpRσp_1: invalid quaternionic index"); + } + +} // CR_1_σpRσp_1::CR_1_σpRσp_1 + +}; // namespace libint2 + +#endif // LIBINT_COMP_1_ΣPRΣP_1_H diff --git a/src/bin/libint/master_ints_list.h b/src/bin/libint/master_ints_list.h index ee9ca39d4..2a6a7cd0f 100644 --- a/src/bin/libint/master_ints_list.h +++ b/src/bin/libint/master_ints_list.h @@ -21,12 +21,15 @@ #ifndef _libint2_src_bin_libint_masterintslist_h_ #define _libint2_src_bin_libint_masterintslist_h_ -// need extra-long mpl list +// need extra-long mpl list — split across two sub-lists and joined via +// boost::mpl::joint_view, since boost only ships pre-generated list headers up +// to size 50. #define BOOST_MPL_CFG_NO_PREPROCESSED_HEADERS #define BOOST_MPL_LIMIT_LIST_SIZE 50 #include #include +#include #include #if LIBINT_SUPPORT_ONEBODYINTS #include @@ -47,6 +50,8 @@ typedef GenIntegralSet_1_1 ElecPot_1_1_sh; typedef GenIntegralSet_1_1 ElecPot_1_1_int; typedef GenIntegralSet_1_1 σpVσp_1_1_sh; typedef GenIntegralSet_1_1 σpVσp_1_1_int; +typedef GenIntegralSet_1_1 σpRσp_1_1_sh; +typedef GenIntegralSet_1_1 σpRσp_1_1_int; typedef GenIntegralSet_1_1, EmptySet> CMultipole_1_1_sh; typedef GenIntegralSet_1_1, EmptySet> @@ -148,12 +153,14 @@ typedef boost::mpl::list< Overlap_1_1_sh_y, Overlap_1_1_int_y, Overlap_1_1_sh_z, Overlap_1_1_int_z, Kinetic_1_1_sh, Kinetic_1_1_int, Kinetic_1_1_sh_x, Kinetic_1_1_int_x, Kinetic_1_1_sh_y, Kinetic_1_1_int_y, Kinetic_1_1_sh_z, Kinetic_1_1_int_z, - ElecPot_1_1_sh, ElecPot_1_1_int, σpVσp_1_1_sh, σpVσp_1_1_int, - CMultipole_1_1_sh, CMultipole_1_1_int, CMultipole_1_1_sh_x, + ElecPot_1_1_sh, ElecPot_1_1_int, σpVσp_1_1_sh, σpVσp_1_1_int, σpRσp_1_1_sh, + σpRσp_1_1_int, CMultipole_1_1_sh, CMultipole_1_1_int, CMultipole_1_1_sh_x, CMultipole_1_1_sh_y, CMultipole_1_1_sh_z, CMultipole_1_1_int_x, CMultipole_1_1_int_y, CMultipole_1_1_int_z, SMultipole_1_1_sh, - SMultipole_1_1_int, #endif + SMultipole_1_1_int> + MasterIntegralTypeList_1body_part; +typedef boost::mpl::list< TwoPRep_11_11_sq, TwoPRep_11_11_int, Coulombσpσp_11_11_sq, Coulombσpσp_11_11_int, σpσpCoulombσpσp_11_11_sq, σpσpCoulombσpσp_11_11_int, opCoulombop_11_11_sq, opCoulombop_11_11_int, R12kG12_11_11_sq, @@ -161,6 +168,9 @@ typedef boost::mpl::list< TiG12_11_11_sq, TiG12_11_11_int, G12TiG12_11_11_sq, G12TiG12_11_11_int, DivG12prime_xTx_11_11_sq, DivG12prime_xTx_11_11_int, DummySymmIntegral_11_11_sq, DummySymmIntegral_11_11_int> + MasterIntegralTypeList_2body_part; +typedef boost::mpl::joint_view MasterIntegralTypeList; }; // namespace libint2 diff --git a/src/bin/libint/master_rrs_list.h b/src/bin/libint/master_rrs_list.h index 62a7bfb08..067ccddf3 100644 --- a/src/bin/libint/master_rrs_list.h +++ b/src/bin/libint/master_rrs_list.h @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -182,6 +183,9 @@ typedef VRR_1_ElecPot_1 VRR_b_1_ElecPot_1_int; typedef CR_1_σpVσp_1 CR_1_σpVσp_1_sh; typedef CR_1_σpVσp_1 CR_1_σpVσp_1_int; +typedef CR_1_σpRσp_1 CR_1_σpRσp_1_sh; +typedef CR_1_σpRσp_1 CR_1_σpRσp_1_int; + // TODO investigate whether need to stay away from HRR for now to be sure that // multipoles are computed as precisely as possible typedef HRR diff --git a/src/bin/libint/oper.h b/src/bin/libint/oper.h index 8e8a4f567..b231d80cc 100644 --- a/src/bin/libint/oper.h +++ b/src/bin/libint/oper.h @@ -321,6 +321,45 @@ struct σpVσp_Descr : public Contractable<σpVσp_Descr> { }; typedef GenOper<σpVσp_Descr> σpVσpOper; +/** opRop: (μ σ·p | r | σ·p ν), one-body σ·p-on-both-sides analog of dipole. + * σ_a σ_b = δ_ab + iε_abc σ_c folds the 9 raw σ_a∂_a r_k σ_b∂_b dyadics per + * dipole direction k down to 4 Pauli-quaternion components (trace + 3 + * antisym), mirroring σpVσp's fold of σ·p V σ·p. 12 outputs total = 3 dipole + * directions × 4 Pauli components, indexed composite_index = 4·k + q. + */ +struct σpRσp_Descr : public Contractable<σpRσp_Descr> { + typedef MultiplicativeODep1Body_Props Properties; + + σpRσp_Descr() : composite_index_(0) {} + σpRσp_Descr(int composite_index) : composite_index_(composite_index) { + assert(composite_index >= 0 && composite_index < 12); + } + + static const unsigned int max_key = 12; + unsigned int key() const { return composite_index(); } + std::string description() const { + static const char* dipole_lbl[] = {"x", "y", "z"}; + static const char* pauli_lbl[] = {"0", "X", "Y", "Z"}; + const auto ci = composite_index(); + if (ci < 0 || ci >= 12) abort(); + return std::string("opRop[") + dipole_lbl[ci / 4] + "," + + pauli_lbl[ci % 4] + "]"; + } + std::string label() const { return description(); } + int psymm(int i, int j) const { abort(); } + int hermitian(int i) const { return +1; } + + int composite_index() const { return composite_index_; } + /// dipole direction ∈ {0=x, 1=y, 2=z} + int dipole_dir() const { return composite_index_ / 4; } + /// Pauli quaternion ∈ {0=trace, 1=σ_x, 2=σ_y, 3=σ_z} + int quaternion_index() const { return composite_index_ % 4; } + + private: + const int composite_index_ = -1; +}; +typedef GenOper<σpRσp_Descr> σpRσpOper; + /// cartesian multipole operator in \c NDIM dimensions /// \f$ \hat{O}(\vec{k}) \equiv \vec{r}^{\cdot \vec{k}} = r_1^{k_1} r_2^{k_2} /// \cdots \f$ \internal OriginDerivative is used to store cartesian diff --git a/src/bin/libint/strategy.cc b/src/bin/libint/strategy.cc index e59f6e92f..3f0c36b0c 100644 --- a/src/bin/libint/strategy.cc +++ b/src/bin/libint/strategy.cc @@ -315,6 +315,14 @@ struct MasterStrategy<σpVσp_1_1_int> { typedef boost::mpl::list value; }; template <> +struct MasterStrategy<σpRσp_1_1_sh> { + typedef boost::mpl::list value; +}; +template <> +struct MasterStrategy<σpRσp_1_1_int> { + typedef boost::mpl::list value; +}; +template <> struct MasterStrategy { typedef boost::mpl::list> value; }; From 86486371071fcbc41c36b92665c13a56ebabe1ed Mon Sep 17 00:00:00 2001 From: Kshitij Surjuse Date: Thu, 30 Apr 2026 14:53:03 -0400 Subject: [PATCH 21/22] =?UTF-8?q?Rename=20opCoulombop=20to=20=CF=83pCoulom?= =?UTF-8?q?b=CF=83p=20for=20naming=20consistency=20with=20sibling=20RKB=20?= =?UTF-8?q?integrals?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- export/tests/unit/test-2body.cc | 2 +- include/libint2/engine.impl.h | 4 +- src/bin/libint/build_libint.cc | 10 +-- .../comp_11_\317\203pCoulomb\317\203p_11.h" | 66 +++++++++---------- src/bin/libint/master_ints_list.h | 8 +-- src/bin/libint/master_rrs_list.h | 6 +- src/bin/libint/oper.h | 10 +-- src/bin/libint/strategy.cc | 8 +-- 8 files changed, 57 insertions(+), 57 deletions(-) rename src/bin/libint/comp_11_opCoulombop_11.h => "src/bin/libint/comp_11_\317\203pCoulomb\317\203p_11.h" (85%) diff --git a/export/tests/unit/test-2body.cc b/export/tests/unit/test-2body.cc index 60b6cf812..dc0c01b73 100644 --- a/export/tests/unit/test-2body.cc +++ b/export/tests/unit/test-2body.cc @@ -723,7 +723,7 @@ TEST_CASE("RKB Coulomb integrals", "[engine][2-body]") { } // Project raw dyadic into the 9 SO(3) irrep components used by - // op_coulomb_op (must match opCoulombop_Descr::Component order). + // op_coulomb_op (must match σpCoulombσp_Descr::Component order). const auto Txx = ref_raw[0]; const auto Txy = ref_raw[1]; const auto Txz = ref_raw[2]; diff --git a/include/libint2/engine.impl.h b/include/libint2/engine.impl.h index abf0db4e7..01269538d 100644 --- a/include/libint2/engine.impl.h +++ b/include/libint2/engine.impl.h @@ -1296,7 +1296,7 @@ __libint2_engine_inline const Engine::target_ptr_vec& Engine::compute2( swap_tbra = swap_tket = swap_p1p2; } } else if (oper_ == Operator::op_coulomb_op) { - // opCoulombop: only bra↔ket (particle 1↔2) swap is a symmetry (with + // σpCoulombσp: only bra↔ket (particle 1↔2) swap is a symmetry (with // (a,b)↔(b,a) component remap). Within-side swap is NOT a symmetry // because σ·p attaches to one specific function per side; moving it to // the other function changes the integral in a way IBP cannot recover @@ -1341,7 +1341,7 @@ __libint2_engine_inline const Engine::target_ptr_vec& Engine::compute2( swap_tbra = swap_tket = swap_p1p2; } } else if (oper_ == Operator::op_coulomb_op) { - // opCoulombop: only bra↔ket swap is a symmetry (with (a,b)↔(b,a) remap). + // σpCoulombσp: only bra↔ket swap is a symmetry (with (a,b)↔(b,a) remap). // ORCA canonical form: la+lb >= lc+ld only. const auto bra_total = tbra1.contr[0].l + tbra2.contr[0].l; const auto ket_total = tket1.contr[0].l + tket2.contr[0].l; diff --git a/src/bin/libint/build_libint.cc b/src/bin/libint/build_libint.cc index 206cd43c4..67700a630 100644 --- a/src/bin/libint/build_libint.cc +++ b/src/bin/libint/build_libint.cc @@ -625,7 +625,7 @@ void try_main(int argc, char* argv[]) { #define BOOST_PP_RKB_ERI_TASK_TUPLE \ (coulomb_opop, opop_coulomb_opop, op_coulomb_op) #define BOOST_PP_RKB_ERI_TASK_OPER_TUPLE \ - (CoulombσpσpOper, σpσpCoulombσpσpOper, opCoulombopOper) + (CoulombσpσpOper, σpσpCoulombσpσpOper, σpCoulombσpOper) #define BOOST_PP_RKB_ERI_TASK_LIST \ BOOST_PP_TUPLE_TO_LIST(BOOST_PP_RKB_ERI_TASK_TUPLE) #define BOOST_PP_RKB_ERI_TASK_OPER_LIST \ @@ -1163,7 +1163,7 @@ static void build_TwoPRep_2b_2k( std::shared_ptr context(new CppCodeContext(cparams)); std::shared_ptr memman(new WorstFitMemoryManager()); - // opCoulombop has a 2-fold bra↔ket-swap symmetry, with per-component sign + // σpCoulombσp has a 2-fold bra↔ket-swap symmetry, with per-component sign // flips under the swap (Antisym* flip sign; Scalar/SymTL* invariant) — this // is captured by p1_p2_swappable=true plus a dedicated predicate that // canonicalizes only la+lb<=lc+ld and emits code for *all* within-side @@ -1179,12 +1179,12 @@ static void build_TwoPRep_2b_2k( for (unsigned int lb = 0; lb <= lmax; lb++) { for (unsigned int lc = 0; lc <= lmax; lc++) { for (unsigned int ld = 0; ld <= lmax; ld++) { - // opCoulombop: only bra↔ket (particle 1↔2) swap is a symmetry. + // σpCoulombσp: only bra↔ket (particle 1↔2) swap is a symmetry. // Within-side swap is NOT (σ·p would move to a different physical // center; IBP cannot repair the sign across 1/r12). Dedicated // predicate canonicalizes la+lb<=lc+ld only (ORCA: >=) and accepts // all within-side orderings. - if constexpr (std::is_same::value) { + if constexpr (std::is_same::value) { #if LIBINT_SHELL_SET == LIBINT_SHELL_SET_STANDARD if (!(la + lb <= lc + ld)) continue; #else @@ -1229,7 +1229,7 @@ static void build_TwoPRep_2b_2k( descrs.emplace_back(OperDescrType(p)); } } - if constexpr (std::is_same::value) { + if constexpr (std::is_same::value) { // reset descriptors array descrs.resize(0); // iterate over 9 SO(3) irrep components: 1 scalar trace + 3 diff --git a/src/bin/libint/comp_11_opCoulombop_11.h "b/src/bin/libint/comp_11_\317\203pCoulomb\317\203p_11.h" similarity index 85% rename from src/bin/libint/comp_11_opCoulombop_11.h rename to "src/bin/libint/comp_11_\317\203pCoulomb\317\203p_11.h" index 38ae4763e..7391d3d31 100644 --- a/src/bin/libint/comp_11_opCoulombop_11.h +++ "b/src/bin/libint/comp_11_\317\203pCoulomb\317\203p_11.h" @@ -18,8 +18,8 @@ * */ -#ifndef LIBINT_COMP_11_OPCOULOMBOP_11_H -#define LIBINT_COMP_11_OPCOULOMBOP_11_H +#ifndef LIBINT_COMP_11_ΣPCOULOMBΣP_11_H +#define LIBINT_COMP_11_ΣPCOULOMBΣP_11_H #include #include @@ -43,15 +43,15 @@ namespace libint2 { * @tparam F basis function type. valid choices are CGShell or CGF */ template -class CR_11_opCoulombop_11 +class CR_11_σpCoulombσp_11 : public GenericRecurrenceRelation< - CR_11_opCoulombop_11, F, - GenIntegralSet_11_11> { + CR_11_σpCoulombσp_11, F, + GenIntegralSet_11_11> { public: - typedef CR_11_opCoulombop_11 ThisType; + typedef CR_11_σpCoulombσp_11 ThisType; typedef F BasisFunctionType; - typedef opCoulombopOper OperType; - typedef GenIntegralSet_11_11 TargetType; + typedef σpCoulombσpOper OperType; + typedef GenIntegralSet_11_11 TargetType; typedef GenericRecurrenceRelation ParentType; friend class GenericRecurrenceRelation&, unsigned int = 0); + CR_11_σpCoulombσp_11(const std::shared_ptr&, unsigned int = 0); static std::string descr() { return "CR"; } @@ -78,7 +78,7 @@ class CR_11_opCoulombop_11 // All shell quartets with the same quaternion component share one function. std::string generate_label() const override { - return "CR_opCoulombop_" + + return "CR_σpCoulombσp_" + std::to_string(target_->oper()->descr().component_index()); } @@ -153,30 +153,30 @@ class CR_11_opCoulombop_11 std::string rhs; unsigned int nflops = 0; switch (comp) { - case opCoulombop_Descr::Scalar: + case σpCoulombσp_Descr::Scalar: rhs = "src0[hsi] + src1[hsi] + src2[hsi]"; nflops = 2; break; - case opCoulombop_Descr::AntisymX: - case opCoulombop_Descr::AntisymY: - case opCoulombop_Descr::AntisymZ: - case opCoulombop_Descr::SymTLDiagA: + case σpCoulombσp_Descr::AntisymX: + case σpCoulombσp_Descr::AntisymY: + case σpCoulombσp_Descr::AntisymZ: + case σpCoulombσp_Descr::SymTLDiagA: rhs = "src0[hsi] - src1[hsi]"; nflops = 1; break; - case opCoulombop_Descr::SymTLDiagB: + case σpCoulombσp_Descr::SymTLDiagB: rhs = "2.0*src0[hsi] - src1[hsi] - src2[hsi]"; nflops = 3; break; - case opCoulombop_Descr::SymTLOffXY: - case opCoulombop_Descr::SymTLOffXZ: - case opCoulombop_Descr::SymTLOffYZ: + case σpCoulombσp_Descr::SymTLOffXY: + case σpCoulombσp_Descr::SymTLOffXZ: + case σpCoulombσp_Descr::SymTLOffYZ: rhs = "src0[hsi] + src1[hsi]"; nflops = 1; break; default: throw std::runtime_error( - "CR_11_opCoulombop_11::generate_code: invalid component index"); + "CR_11_σpCoulombσp_11::generate_code: invalid component index"); } def << "target[hsi] = " << rhs << ";\n}\n"; def << "/** Number of flops = " << nflops << " */\n"; @@ -186,7 +186,7 @@ class CR_11_opCoulombop_11 }; template -CR_11_opCoulombop_11::CR_11_opCoulombop_11( +CR_11_σpCoulombσp_11::CR_11_σpCoulombσp_11( const std::shared_ptr& Tint, unsigned int) : ParentType(Tint, 0) { assert(Tint->num_func_bra(/* particle */ 0) == 1); @@ -231,7 +231,7 @@ CR_11_opCoulombop_11::CR_11_opCoulombop_11( }; switch (oper->descr().component_index()) { - case opCoulombop_Descr::Scalar: { + case σpCoulombσp_Descr::Scalar: { auto Txx = T(x, x); auto Tyy = T(y, y); auto Tzz = T(z, z); @@ -240,7 +240,7 @@ CR_11_opCoulombop_11::CR_11_opCoulombop_11( nflops_ += 2; } } break; - case opCoulombop_Descr::AntisymX: { + case σpCoulombσp_Descr::AntisymX: { auto Tyz = T(y, z); auto Tzy = T(z, y); if (is_simple()) { @@ -248,7 +248,7 @@ CR_11_opCoulombop_11::CR_11_opCoulombop_11( nflops_ += 1; } } break; - case opCoulombop_Descr::AntisymY: { + case σpCoulombσp_Descr::AntisymY: { auto Tzx = T(z, x); auto Txz = T(x, z); if (is_simple()) { @@ -256,7 +256,7 @@ CR_11_opCoulombop_11::CR_11_opCoulombop_11( nflops_ += 1; } } break; - case opCoulombop_Descr::AntisymZ: { + case σpCoulombσp_Descr::AntisymZ: { auto Txy = T(x, y); auto Tyx = T(y, x); if (is_simple()) { @@ -264,7 +264,7 @@ CR_11_opCoulombop_11::CR_11_opCoulombop_11( nflops_ += 1; } } break; - case opCoulombop_Descr::SymTLDiagA: { + case σpCoulombσp_Descr::SymTLDiagA: { auto Txx = T(x, x); auto Tyy = T(y, y); if (is_simple()) { @@ -272,7 +272,7 @@ CR_11_opCoulombop_11::CR_11_opCoulombop_11( nflops_ += 1; } } break; - case opCoulombop_Descr::SymTLDiagB: { + case σpCoulombσp_Descr::SymTLDiagB: { // 2·T_zz − T_xx − T_yy: child order (Tzz, Txx, Tyy) matches generate_code auto Tzz = T(z, z); auto Txx = T(x, x); @@ -282,7 +282,7 @@ CR_11_opCoulombop_11::CR_11_opCoulombop_11( nflops_ += 3; } } break; - case opCoulombop_Descr::SymTLOffXY: { + case σpCoulombσp_Descr::SymTLOffXY: { auto Txy = T(x, y); auto Tyx = T(y, x); if (is_simple()) { @@ -290,7 +290,7 @@ CR_11_opCoulombop_11::CR_11_opCoulombop_11( nflops_ += 1; } } break; - case opCoulombop_Descr::SymTLOffXZ: { + case σpCoulombσp_Descr::SymTLOffXZ: { auto Txz = T(x, z); auto Tzx = T(z, x); if (is_simple()) { @@ -298,7 +298,7 @@ CR_11_opCoulombop_11::CR_11_opCoulombop_11( nflops_ += 1; } } break; - case opCoulombop_Descr::SymTLOffYZ: { + case σpCoulombσp_Descr::SymTLOffYZ: { auto Tyz = T(y, z); auto Tzy = T(z, y); if (is_simple()) { @@ -307,11 +307,11 @@ CR_11_opCoulombop_11::CR_11_opCoulombop_11( } } break; default: - throw std::runtime_error("CR_11_opCoulombop_11: invalid component index"); + throw std::runtime_error("CR_11_σpCoulombσp_11: invalid component index"); } -} // CR_11_opCoulombop_11::CR_11_opCoulombop_11 +} // CR_11_σpCoulombσp_11::CR_11_σpCoulombσp_11 } // namespace libint2 -#endif // LIBINT_COMP_11_OPCOULOMBOP_11_H +#endif // LIBINT_COMP_11_ΣPCOULOMBΣP_11_H diff --git a/src/bin/libint/master_ints_list.h b/src/bin/libint/master_ints_list.h index 2a6a7cd0f..158052fba 100644 --- a/src/bin/libint/master_ints_list.h +++ b/src/bin/libint/master_ints_list.h @@ -118,9 +118,9 @@ typedef GenIntegralSet_11_11 σpσpCoulombσpσp_11_11_sq; typedef GenIntegralSet_11_11 σpσpCoulombσpσp_11_11_int; -typedef GenIntegralSet_11_11 - opCoulombop_11_11_sq; -typedef GenIntegralSet_11_11 opCoulombop_11_11_int; +typedef GenIntegralSet_11_11 + σpCoulombσp_11_11_sq; +typedef GenIntegralSet_11_11 σpCoulombσp_11_11_int; typedef GenIntegralSet_11_11 R12kG12_11_11_sq; typedef GenIntegralSet_11_11 R12kG12_11_11_int; typedef GenIntegralSet_11_11 @@ -163,7 +163,7 @@ typedef boost::mpl::list< typedef boost::mpl::list< TwoPRep_11_11_sq, TwoPRep_11_11_int, Coulombσpσp_11_11_sq, Coulombσpσp_11_11_int, σpσpCoulombσpσp_11_11_sq, σpσpCoulombσpσp_11_11_int, - opCoulombop_11_11_sq, opCoulombop_11_11_int, R12kG12_11_11_sq, + σpCoulombσp_11_11_sq, σpCoulombσp_11_11_int, R12kG12_11_11_sq, R12kG12_11_11_int, R12kR12lG12_11_11_sq, R12kR12lG12_11_11_int, TiG12_11_11_sq, TiG12_11_11_int, G12TiG12_11_11_sq, G12TiG12_11_11_int, DivG12prime_xTx_11_11_sq, DivG12prime_xTx_11_11_int, diff --git a/src/bin/libint/master_rrs_list.h b/src/bin/libint/master_rrs_list.h index 067ccddf3..ae4e32311 100644 --- a/src/bin/libint/master_rrs_list.h +++ b/src/bin/libint/master_rrs_list.h @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include #include @@ -325,8 +325,8 @@ typedef CR_11_Coulombσpσp_11 CR_11_Coulombσpσp_11_int; typedef CR_11_σpσpCoulombσpσp_11 CR_11_σpσpCoulombσpσp_11_sh; typedef CR_11_σpσpCoulombσpσp_11 CR_11_σpσpCoulombσpσp_11_int; -typedef CR_11_opCoulombop_11 CR_11_opCoulombop_11_sh; -typedef CR_11_opCoulombop_11 CR_11_opCoulombop_11_int; +typedef CR_11_σpCoulombσp_11 CR_11_σpCoulombσp_11_sh; +typedef CR_11_σpCoulombσp_11 CR_11_σpCoulombσp_11_int; }; // namespace libint2 #endif // header guard diff --git a/src/bin/libint/oper.h b/src/bin/libint/oper.h index b231d80cc..29b367378 100644 --- a/src/bin/libint/oper.h +++ b/src/bin/libint/oper.h @@ -515,7 +515,7 @@ struct σpσpCoulombσpσp_Descr : public Contractable<σpσpCoulombσpσp_Descr }; typedef GenOper<σpσpCoulombσpσp_Descr> σpσpCoulombσpσpOper; -/** opCoulombop: (μ σ·p ν | 1/r_{12} | κ σ·p λ). +/** σpCoulombσp: (μ σ·p ν | 1/r_{12} | κ σ·p λ). * Gaunt LS "bilinear" operator with one σ·p on each side. * Outputs the SO(3) irreducible decomposition of the 3×3 gradient-gradient * tensor T_{ab} = ∂_a ∂_b (μν|κλ): 1 scalar trace + 3 antisymmetric @@ -524,7 +524,7 @@ typedef GenOper<σpσpCoulombσpσp_Descr> σpσpCoulombσpσpOper; * need to hand-build trace/antisym/sym-TL combinations at every contraction * site. */ -struct opCoulombop_Descr : public Contractable { +struct σpCoulombσp_Descr : public Contractable<σpCoulombσp_Descr> { typedef MultiplicativeSymm2Body_Props Properties; /// SO(3) irreducible components of the rank-2 Cartesian tensor T_{ab}. @@ -540,8 +540,8 @@ struct opCoulombop_Descr : public Contractable { SymTLOffYZ = 8, ///< T_yz + T_zy }; - opCoulombop_Descr() : component_index_(0) {} - opCoulombop_Descr(int component_index) : component_index_(component_index) { + σpCoulombσp_Descr() : component_index_(0) {} + σpCoulombσp_Descr(int component_index) : component_index_(component_index) { assert(component_index >= 0 && component_index <= 8); } @@ -576,7 +576,7 @@ struct opCoulombop_Descr : public Contractable { private: const int component_index_ = -1; }; -typedef GenOper opCoulombopOper; +typedef GenOper<σpCoulombσp_Descr> σpCoulombσpOper; /** GTG_1d is the two-body 1-dimensional Gaussian geminal */ diff --git a/src/bin/libint/strategy.cc b/src/bin/libint/strategy.cc index 3f0c36b0c..e01f76a7d 100644 --- a/src/bin/libint/strategy.cc +++ b/src/bin/libint/strategy.cc @@ -136,12 +136,12 @@ struct MasterStrategy<σpσpCoulombσpσp_11_11_int> { typedef boost::mpl::list value; }; template <> -struct MasterStrategy { - typedef boost::mpl::list value; +struct MasterStrategy<σpCoulombσp_11_11_sq> { + typedef boost::mpl::list value; }; template <> -struct MasterStrategy { - typedef boost::mpl::list value; +struct MasterStrategy<σpCoulombσp_11_11_int> { + typedef boost::mpl::list value; }; #if LIBINT_SHELLQUARTET_STRATEGY == LIBINT_SHELLQUARTET_STRATEGY_A0C0 From e5464698cf1a3f1a61f9c27d7fda87af6ed52827 Mon Sep 17 00:00:00 2001 From: Kshitij Surjuse Date: Tue, 12 May 2026 13:18:18 -0400 Subject: [PATCH 22/22] 3-center integrals in RKB basis --- CMakeLists.txt | 12 ++ cmake/modules/int_am.cmake | 15 +- export/tests/unit/test-2body.cc | 141 ++++++++++++++++++ include/libint2/config.h.cmake.in | 19 +++ include/libint2/engine.impl.h | 31 +++- src/bin/libint/build_libint.cc | 235 +++++++++++++++++++++++++----- 6 files changed, 408 insertions(+), 45 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 90b641733..cd2d49359 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -142,6 +142,10 @@ option_with_default(LIBINT2_ENABLE_ERI2 option_with_default(LIBINT2_ENABLE_RKB_ERI "Compile with support for up to N-th derivatives of relativistic restricted kinetic balance (RKB) 4-center electron repulsion integrals (-1 for OFF)" 0) +option_with_default(LIBINT2_ENABLE_RKB_ERI3 + "Compile with support for up to N-th derivatives of relativistic restricted kinetic + balance (RKB) 3-center electron repulsion integrals (-1 for OFF). + σ·p acts on the two paired (AO) centers; the unpaired/fitting center is a spectator." -1) option_with_default(LIBINT2_ENABLE_G12 "Compile with support for N-th derivatives of MP2-F12 energies with Gaussian factors (-1 for OFF)" -1) option_with_default(LIBINT2_ENABLE_G12DKH @@ -228,6 +232,14 @@ option_with_default(LIBINT2_RKB_ERI_OPT_AM "Optimize relativistic restricted kinetic balance (RKB) 4-center ERIs maximally for up to angular momentum N (N <= max-am). Can specify values for each derivative level as a semicolon-separated string (default: (max_am/2)+1)" -1) +option_with_default(LIBINT2_RKB_ERI3_MAX_AM + "Support relativistic restricted kinetic balance (RKB) 3-center ERIs for Gaussians of angular momentum up to N. + Can specify values for each derivative level as a semicolon-separated string. (default: max_am) + This option controls only the single fitting center. The paired centers (on which σ·p acts) use LIBINT2_MAX_AM." -1) +option_with_default(LIBINT2_RKB_ERI3_OPT_AM + "Optimize relativistic restricted kinetic balance (RKB) 3-center ERIs maximally for up to angular momentum N (N <= max-am). + Can specify values for each derivative level as a semicolon-separated string (default: (max_am/2)+1)" -1) + option_with_default(LIBINT2_ERI3_MAX_AM "Support 3-center ERIs for Gaussians of angular momentum up to N. diff --git a/cmake/modules/int_am.cmake b/cmake/modules/int_am.cmake index 350924f49..2dbd1904a 100644 --- a/cmake/modules/int_am.cmake +++ b/cmake/modules/int_am.cmake @@ -358,6 +358,7 @@ endmacro() process_integrals_class(ONEBODY) process_integrals_class(ERI) process_integrals_class(RKB_ERI) +process_integrals_class(RKB_ERI3) process_integrals_class(ERI3) process_integrals_class(ERI2) # unlike above, these classes (1) don't do AM_LIST and (2) require value in config.h if enabled @@ -397,7 +398,7 @@ list(REVERSE _amlist) list(APPEND Libint2_ERI_COMPONENTS "${_amlist}") message(VERBOSE "setting components ${_amlist}") -foreach(_cls ONEBODY;ERI;RKB_ERI;ERI3;ERI2;G12;G12DKH) +foreach(_cls ONEBODY;ERI;RKB_ERI;RKB_ERI3;ERI3;ERI2;G12;G12DKH) if((_cls STREQUAL G12) OR (_cls STREQUAL G12DKH)) add_feature_info( "integral class ${_cls}" @@ -448,6 +449,18 @@ foreach(_cls ONEBODY;ERI;RKB_ERI;ERI3;ERI2;G12;G12DKH) endforeach() endforeach() endif() + if (_cls STREQUAL "RKB_ERI3") + # Mirror ERI3 component naming: fitting (single) center on bra, + # paired (AO) centers on ket where σ·p acts. Paired-center AM + # tracks LIBINT_MAX_AM via _eri3_candidate0_d${_d}. + foreach(_lfit RANGE ${LIBINT_HARD_MIN_AM} ${_candidate_${_cls}_d${_d}}) # LIBINT_RKB_ERI3_MAX_AM[_LIST], fitting + foreach(_lpr RANGE ${LIBINT_HARD_MIN_AM} ${_eri3_candidate0_d${_d}}) # LIBINT_MAX_AM[_LIST], paired + if (_lfit GREATER_EQUAL _lpr) + list(APPEND _amlist "rkb_eri_${_am${_lpr}}${_am${_lpr}}${_AM${_lfit}}_d${_d}") + endif() + endforeach() + endforeach() + endif() list(REVERSE _amlist) list(APPEND Libint2_ERI_COMPONENTS "${_amlist}") message(VERBOSE "setting components ${_amlist}") diff --git a/export/tests/unit/test-2body.cc b/export/tests/unit/test-2body.cc index dc0c01b73..e37a2bd99 100644 --- a/export/tests/unit/test-2body.cc +++ b/export/tests/unit/test-2body.cc @@ -774,6 +774,147 @@ TEST_CASE("RKB Coulomb integrals", "[engine][2-body]") { } } } + + SECTION("Coulombσpσp 3-center xs_xx") { + // 3-center RKB Coulombσpσp integral, (P | σ·p_μ σ·p_ν / r12) with P on + // the bra (fitting/DF) and σ·p acting on the AO pair (μ,ν) on the ket. + // + // Reference: the same operator and BraKet computed via the 4-center + // engine with Shell::unit() at the dummy bra position. The 4-center + // engine path is already validated by the "Coulombσpσp and + // σpσpCoulombσpσp" SECTION above against a primitive-eri reference, so + // by transitivity the 4-center engine is a trustworthy reference for the + // 3-center engine. Both code paths share the dummy-shell trick + // internally, but they are independently generated (different task + // labels, different dispatch tables), so this catches dispatch-table / + // codegen wiring mistakes in the new 3-center path. + + Shell dfsh{{1.5}, {{0, false, {1.0}}}, {{-1.0, 0.5, 0.0}}}; + std::vector dfs{dfsh}; + const auto &unitshell = libint2::Shell::unit(); + + Engine engine_3c, engine_4c_ref; + try { + engine_3c = Engine(Operator::coulomb_opop, + std::max(max_nprim, libint2::max_nprim(dfs)), + std::max(max_l, libint2::max_l(dfs)), 0); + engine_3c.set(BraKet::xs_xx); + // 4-center reference — same operator, default BraKet::xx_xx + engine_4c_ref = Engine(Operator::coulomb_opop, + std::max(max_nprim, libint2::max_nprim(dfs)), + std::max(max_l, libint2::max_l(dfs)), 0); + } catch (Engine::lmax_exceeded &) { + // skip if libint not configured with -DLIBINT2_ENABLE_RKB_ERI3 >= 0 + // (or the 4-center RKB ERI was disabled) + return; + } + + const auto nshell = obs.size(); + for (int sa = 0; sa != nshell; ++sa) { + for (int sb = 0; sb != nshell; ++sb) { + const auto &results_3c = + engine_3c.compute(dfsh, obs[sa], obs[sb]); + // 4-center reference: (dfsh, unit | sh_a, sh_b). σ·p still acts on + // (C, D) = (sh_a, sh_b), matching the 3-center xs_xx mapping. + const auto &results_4c_ref = + engine_4c_ref.compute(dfsh, unitshell, obs[sa], obs[sb]); + assert(results_3c.size() == 4); + assert(results_4c_ref.size() == 4); + + const auto n_df = dfsh.size(); + const auto n_a = obs[sa].size(); + const auto n_b = obs[sb].size(); + const auto n_total = n_df * n_a * n_b; + // 4-center buffer is (df, unit, sa, sb); n_unit = 1; so the linear + // layout coincides with the 3-center (df, sa, sb) layout. + + const double ABS_TOL = 5.0E-14; + const double REL_TOL = 1.0E-9; + for (auto comp = 0; comp < 4; ++comp) { + for (size_t k = 0; k < n_total; ++k) { + const auto v_3c = results_3c[comp][k]; + const auto v_ref = results_4c_ref[comp][k]; + const auto abs_err = std::abs(v_3c - v_ref); + const auto rel_err = + std::abs(v_ref) > 1e-30 ? std::abs(abs_err / v_ref) : 0.0; + bool not_ok = rel_err > REL_TOL && abs_err > ABS_TOL; + if (not_ok) { + std::cout << "(df | sa=" << sa << " sb=" << sb + << ") comp=" << comp << " elem=" << k + << ": 3c=" << v_3c << " 4c_ref=" << v_ref + << " abs_err=" << abs_err << " rel_err=" << rel_err + << std::endl; + } + REQUIRE(!not_ok); + } + } + } + } + } + + SECTION("Coulombσpσp 3-center xx_xs alias") { + // The xx_xs braket re-routes through the same xs_xx kernel via the + // Engine::compute2 bra↔ket swap. Verify the alias yields the same + // integral values, with the output buffer in the user-requested + // (sh_a, sh_b | dfsh) index layout. + + Shell dfsh{{1.5}, {{0, false, {1.0}}}, {{-1.0, 0.5, 0.0}}}; + std::vector dfs{dfsh}; + + Engine eng_xs, eng_xxs; + try { + eng_xs = Engine(Operator::coulomb_opop, + std::max(max_nprim, libint2::max_nprim(dfs)), + std::max(max_l, libint2::max_l(dfs)), 0); + eng_xs.set(BraKet::xs_xx); + eng_xxs = Engine(Operator::coulomb_opop, + std::max(max_nprim, libint2::max_nprim(dfs)), + std::max(max_l, libint2::max_l(dfs)), 0); + eng_xxs.set(BraKet::xx_xs); + } catch (Engine::lmax_exceeded &) { + return; + } + + for (size_t sa = 0; sa != obs.size(); ++sa) { + for (size_t sb = 0; sb != obs.size(); ++sb) { + const auto n_df = dfsh.size(); + const auto n_a = obs[sa].size(); + const auto n_b = obs[sb].size(); + + const auto &res_xs = eng_xs.compute(dfsh, obs[sa], obs[sb]); + // re-set xs braket each loop in case Catch2 SECTION re-entry resets + eng_xxs.set(BraKet::xx_xs); + const auto &res_xxs = eng_xxs.compute(obs[sa], obs[sb], dfsh); + + for (auto c = 0; c < 4; ++c) { + for (size_t i_df = 0; i_df < n_df; ++i_df) { + for (size_t i_a = 0; i_a < n_a; ++i_a) { + for (size_t i_b = 0; i_b < n_b; ++i_b) { + const auto v_xs = + res_xs[c][i_df * n_a * n_b + i_a * n_b + i_b]; + const auto v_xxs = + res_xxs[c][i_a * n_b * n_df + i_b * n_df + i_df]; + const auto abs_err = abs(v_xs - v_xxs); + const auto rel_err = std::abs(v_xs) > 1e-30 + ? double(abs(abs_err / v_xs)) + : 0.0; + bool not_ok = rel_err > 1.0E-9 && abs_err > 5.0E-14; + if (not_ok) { + std::cout << "xx_xs alias mismatch: (sa=" << sa + << " sb=" << sb << ") comp=" << c + << " idx(df=" << i_df << ",a=" << i_a + << ",b=" << i_b << "): xs=" << v_xs + << " xxs=" << v_xxs << " abs_err=" << abs_err + << std::endl; + } + REQUIRE(!not_ok); + } + } + } + } + } + } + } } TEST_CASE("Erfx_Coulomb integrals", "[engine][2-body]") { diff --git a/include/libint2/config.h.cmake.in b/include/libint2/config.h.cmake.in index 3eb32e2f0..e07f3cbc8 100644 --- a/include/libint2/config.h.cmake.in +++ b/include/libint2/config.h.cmake.in @@ -77,6 +77,12 @@ #undef LIBINT_INCLUDE_RKB_ERI #endif +/* Support 3-center RKB ERI derivatives up to this order */ +#define LIBINT_INCLUDE_RKB_ERI3 @LIBINT_INCLUDE_RKB_ERI3@ +#if @LIBINT_INCLUDE_RKB_ERI3@ == -1 +#undef LIBINT_INCLUDE_RKB_ERI3 +#endif + /* Support 3-center ERI derivatives up to this order */ #define LIBINT_INCLUDE_ERI3 @LIBINT_INCLUDE_ERI3@ @@ -141,6 +147,19 @@ /* Max optimized AM for ERI and its derivatives */ #cmakedefine LIBINT_RKB_ERI_OPT_AM_LIST "@LIBINT_RKB_ERI_OPT_AM_LIST@" +/* Max AM for 3-center RKB ERI fitting center (same for all derivatives; if not defined see LIBINT_RKB_ERI3_MAX_AM_LIST). + The paired AO centers (on which σ·p acts) use LIBINT_MAX_AM. */ +#cmakedefine LIBINT_RKB_ERI3_MAX_AM @LIBINT_RKB_ERI3_MAX_AM@ + +/* Max AM for 3-center RKB ERI and its derivatives */ +#cmakedefine LIBINT_RKB_ERI3_MAX_AM_LIST "@LIBINT_RKB_ERI3_MAX_AM_LIST@" + +/* Max optimized AM for 3-center RKB ERI (same for all derivatives; if not defined see LIBINT_RKB_ERI3_OPT_AM_LIST) */ +#cmakedefine LIBINT_RKB_ERI3_OPT_AM @LIBINT_RKB_ERI3_OPT_AM@ + +/* Max optimized AM for 3-center RKB ERI and its derivatives */ +#cmakedefine LIBINT_RKB_ERI3_OPT_AM_LIST "@LIBINT_RKB_ERI3_OPT_AM_LIST@" + /* Max AM for 3-center ERI (same for all derivatives; if not defined see LIBINT_ERI3_MAX_AM_LIST) */ #cmakedefine LIBINT_ERI3_MAX_AM @LIBINT_ERI3_MAX_AM@ diff --git a/include/libint2/engine.impl.h b/include/libint2/engine.impl.h index 01269538d..85a744768 100644 --- a/include/libint2/engine.impl.h +++ b/include/libint2/engine.impl.h @@ -174,9 +174,22 @@ __libint2_engine_inline const Engine::target_ptr_vec& Engine::compute( if (nargs == 2) return (this->*compute_ptr)(shells[0], Shell::unit(), shells[1], Shell::unit(), nullptr, nullptr); - if (nargs == 3) + if (nargs == 3) { + // The 3-arg user form depends on which 3-center BraKet was set: + // xs_xx — bra = (fitting, unit), ket = (AO, AO). + // User passes (fitting, AO, AO), kernel sees + // (fitting, unit, AO, AO). + // xx_xs — bra = (AO, AO), ket = (fitting, unit). + // User passes (AO, AO, fitting), kernel sees + // (AO, AO, fitting, unit). + // The non-xx_xs branch is the default (covers xs_xx and any future + // 3-center variants that follow the xs_xx convention). + if (braket_ == BraKet::xx_xs) + return (this->*compute_ptr)(shells[0], shells[1], shells[2], + Shell::unit(), nullptr, nullptr); return (this->*compute_ptr)(shells[0], Shell::unit(), shells[1], shells[2], nullptr, nullptr); + } if (nargs == 4) return (this->*compute_ptr)(shells[0], shells[1], shells[2], shells[3], nullptr, nullptr); @@ -2015,9 +2028,11 @@ __libint2_engine_inline const Engine::target_ptr_vec& Engine::compute2( break; case BraKet::xx_xs: - assert(false && "this braket is not supported"); - abort(); - break; + // xx_xs is always canonicalized to xs_xx by swap_braket=true in the + // shell-aliasing logic above; the swapped shells (bra1, ket1, ket2) + // are already in xs_xx layout by this point. Fall through to the + // xs_xx case to compute buildfnidx from those swapped shells. + [[fallthrough]]; case BraKet::xs_xx: { /// lmax might be center dependent int ket_lmax = hard_lmax_; @@ -2243,6 +2258,14 @@ __libint2_engine_inline const Engine::target_ptr_vec& Engine::compute2( } if (swap_tket && oper_ == Operator::coulomb_opop && s > 0) oper_cart_component_phase = -1.0; + // For coulomb_opop in BraKet::xx_xs (the 3-center alias of + // xs_xx): after swap_braket the user's bra (AO pair) becomes + // the kernel's ket where σ·p attaches. If swap_tbra is also + // applied to canonicalize that AO pair, σ·p_a σ·p_b ↔ + // σ·p_b σ·p_a — antisymmetric, so x,y,z components (s>0) + // pick up a -1 sign. (The scalar component s=0 is invariant.) + if (swap_tbra && oper_ == Operator::coulomb_opop && s > 0) + oper_cart_component_phase = -1.0; // op_coulomb_op irrep layout under bra↔ket swap: antisym // components (s ∈ {1,2,3}) flip sign; scalar (0) and sym-TL // (4..8) are invariant. swap_tket is always false for this diff --git a/src/bin/libint/build_libint.cc b/src/bin/libint/build_libint.cc index 67700a630..5737e167a 100644 --- a/src/bin/libint/build_libint.cc +++ b/src/bin/libint/build_libint.cc @@ -234,9 +234,11 @@ static void build_TwoPRep_2b_2k( #endif #endif -#ifdef LIBINT_INCLUDE_ERI3 +#if defined(LIBINT_INCLUDE_ERI3) || defined(LIBINT_INCLUDE_RKB_ERI3) +template static void build_TwoPRep_1b_2k( - std::ostream& os, const std::shared_ptr& cparams, + std::ostream& os, std::string label, + const std::shared_ptr& cparams, std::shared_ptr& iface, unsigned int deriv_level); #endif @@ -640,6 +642,32 @@ void try_main(int argc, char* argv[]) { } #endif +#ifdef LIBINT_INCLUDE_RKB_ERI3 + // 3-center RKB ERIs. σ·p acts on the AO pair (ket); the fitting center is a + // spectator. Task labels are namespaced with the "3" ncenter prefix so they + // match the BOOST_PP_NBODYENGINE_MCR3_task expansion in engine.impl.h: + // ncenter("3") + oper("coulomb_opop") + deriv("") = "3coulomb_opop". + // The macro tuples mirror BOOST_PP_RKB_ERI_TASK_TUPLE but only the operators + // that make sense in xs_xx geometry are listed (opop_coulomb_opop requires + // σ·p on both bra and ket, which is incompatible with one side being a + // dummy-s fitting shell; it is intentionally omitted). +#define BOOST_PP_RKB_ERI3_TASK_TUPLE (3coulomb_opop) +#define BOOST_PP_RKB_ERI3_TASK_OPER_TUPLE (CoulombσpσpOper) +#define BOOST_PP_RKB_ERI3_TASK_LIST \ + BOOST_PP_TUPLE_TO_LIST(BOOST_PP_RKB_ERI3_TASK_TUPLE) +#define BOOST_PP_RKB_ERI3_TASK_OPER_LIST \ + BOOST_PP_TUPLE_TO_LIST(BOOST_PP_RKB_ERI3_TASK_OPER_TUPLE) + + for (unsigned int d = 0; d <= LIBINT_INCLUDE_RKB_ERI3; ++d) { +#define BOOST_PP_RKB_ERI3_MCR1(r, data, elem) \ + taskmgr.add(task_label(BOOST_PP_STRINGIZE(elem), d)); + + BOOST_PP_LIST_FOR_EACH(BOOST_PP_RKB_ERI3_MCR1, _, + BOOST_PP_RKB_ERI3_TASK_LIST) +#undef BOOST_PP_RKB_ERI3_MCR1 + } +#endif + #ifdef LIBINT_INCLUDE_ERI3 for (unsigned int d = 0; d <= LIBINT_INCLUDE_ERI3; ++d) { taskmgr.add(task_label("3eri", d)); @@ -781,6 +809,75 @@ void try_main(int argc, char* argv[]) { } #endif // LIBINT_INCLUDE_RKB_ERI +#ifdef LIBINT_INCLUDE_RKB_ERI3 + // Per-task CompilationParameters for 3-center RKB ERIs. Parallel structure + // to the LIBINT_INCLUDE_ERI3 block below: + // - center 0 (fitting): LIBINT_RKB_ERI3_MAX_AM[_LIST] + // - centers 1, 2 (AO pair): LIBINT_MAX_AM[_LIST] (paired-center default) + // - num_bf = 3 (3-center dispatch table is 3-dimensional) + for (unsigned int d = 0; d <= LIBINT_INCLUDE_RKB_ERI3; ++d) { +#if defined(LIBINT_RKB_ERI3_MAX_AM_LIST) +#define BOOST_PP_RKB_ERI3_MCR_MAXAM(r, data, elem) \ + cparams->max_am(task_label(BOOST_PP_STRINGIZE(elem), d), \ + token(LIBINT_RKB_ERI3_MAX_AM_LIST, \ + ',', d)); + BOOST_PP_LIST_FOR_EACH(BOOST_PP_RKB_ERI3_MCR_MAXAM, _, + BOOST_PP_RKB_ERI3_TASK_LIST) +#undef BOOST_PP_RKB_ERI3_MCR_MAXAM +#elif defined(LIBINT_RKB_ERI3_MAX_AM) +#define BOOST_PP_RKB_ERI3_MCR_MAXAM(r, data, elem) \ + cparams->max_am(task_label(BOOST_PP_STRINGIZE(elem), d), \ + LIBINT_RKB_ERI3_MAX_AM); + BOOST_PP_LIST_FOR_EACH(BOOST_PP_RKB_ERI3_MCR_MAXAM, _, + BOOST_PP_RKB_ERI3_TASK_LIST) +#undef BOOST_PP_RKB_ERI3_MCR_MAXAM +#endif +#if defined(LIBINT_RKB_ERI3_OPT_AM_LIST) +#define BOOST_PP_RKB_ERI3_MCR_OPTAM(r, data, elem) \ + cparams->max_am_opt(task_label(BOOST_PP_STRINGIZE(elem), d), \ + token(LIBINT_RKB_ERI3_OPT_AM_LIST, \ + ',', d)); + BOOST_PP_LIST_FOR_EACH(BOOST_PP_RKB_ERI3_MCR_OPTAM, _, + BOOST_PP_RKB_ERI3_TASK_LIST) +#undef BOOST_PP_RKB_ERI3_MCR_OPTAM +#elif defined(LIBINT_RKB_ERI3_OPT_AM) +#define BOOST_PP_RKB_ERI3_MCR_OPTAM(r, data, elem) \ + cparams->max_am_opt(task_label(BOOST_PP_STRINGIZE(elem), d), \ + LIBINT_RKB_ERI3_OPT_AM); + BOOST_PP_LIST_FOR_EACH(BOOST_PP_RKB_ERI3_MCR_OPTAM, _, + BOOST_PP_RKB_ERI3_TASK_LIST) +#undef BOOST_PP_RKB_ERI3_MCR_OPTAM +#endif + // Paired centers (1, 2) follow the default basis AM. +#if defined(LIBINT_MAX_AM_LIST) +#define BOOST_PP_RKB_ERI3_MCR_PAIRED(r, data, elem) \ + cparams->max_am(task_label(BOOST_PP_STRINGIZE(elem), d), \ + cparams->max_am(task_label("default", d)), 1); \ + cparams->max_am(task_label(BOOST_PP_STRINGIZE(elem), d), \ + cparams->max_am(task_label("default", d)), 2); + BOOST_PP_LIST_FOR_EACH(BOOST_PP_RKB_ERI3_MCR_PAIRED, _, + BOOST_PP_RKB_ERI3_TASK_LIST) +#undef BOOST_PP_RKB_ERI3_MCR_PAIRED +#else +#define BOOST_PP_RKB_ERI3_MCR_PAIRED(r, data, elem) \ + cparams->max_am(task_label(BOOST_PP_STRINGIZE(elem), d), \ + cparams->max_am("default"), 1); \ + cparams->max_am(task_label(BOOST_PP_STRINGIZE(elem), d), \ + cparams->max_am("default"), 2); + BOOST_PP_LIST_FOR_EACH(BOOST_PP_RKB_ERI3_MCR_PAIRED, _, + BOOST_PP_RKB_ERI3_TASK_LIST) +#undef BOOST_PP_RKB_ERI3_MCR_PAIRED +#endif + } + for (unsigned int d = 0; d <= LIBINT_INCLUDE_RKB_ERI3; ++d) { +#define BOOST_PP_RKB_ERI3_MCR_NBF(r, data, elem) \ + cparams->num_bf(task_label(BOOST_PP_STRINGIZE(elem), d), 3); + BOOST_PP_LIST_FOR_EACH(BOOST_PP_RKB_ERI3_MCR_NBF, _, + BOOST_PP_RKB_ERI3_TASK_LIST) +#undef BOOST_PP_RKB_ERI3_MCR_NBF + } +#endif // LIBINT_INCLUDE_RKB_ERI3 + #ifdef LIBINT_INCLUDE_ERI3 for (unsigned int d = 0; d <= LIBINT_INCLUDE_ERI3; ++d) { #if defined(LIBINT_ERI3_MAX_AM_LIST) @@ -1016,9 +1113,20 @@ void try_main(int argc, char* argv[]) { } #endif +#ifdef LIBINT_INCLUDE_RKB_ERI3 + for (unsigned int d = 0; d <= LIBINT_INCLUDE_RKB_ERI3; ++d) { +#define BOOST_PP_RKB_ERI3_MCR7(r, data, i, elem) \ + build_TwoPRep_1b_2k( \ + os, BOOST_PP_STRINGIZE(elem), cparams, iface, d); + BOOST_PP_LIST_FOR_EACH_I(BOOST_PP_RKB_ERI3_MCR7, _, + BOOST_PP_RKB_ERI3_TASK_LIST) +#undef BOOST_PP_RKB_ERI3_MCR7 + } +#endif + #ifdef LIBINT_INCLUDE_ERI3 for (unsigned int d = 0; d <= LIBINT_INCLUDE_ERI3; ++d) { - build_TwoPRep_1b_2k(os, cparams, iface, d); + build_TwoPRep_1b_2k(os, "3eri", cparams, iface, d); } #if LIBINT_ERI3_PURE_SH iface->to_params(iface->macro_define("ERI3_PURE_SH", 1)); @@ -1398,14 +1506,22 @@ static void build_TwoPRep_2b_2k( #endif // LIBINT_INCLUDE_ERI || LIBINT_INCLUDE_RKB_ERI -#ifdef LIBINT_INCLUDE_ERI3 +#if defined(LIBINT_INCLUDE_ERI3) || defined(LIBINT_INCLUDE_RKB_ERI3) -void build_TwoPRep_1b_2k(std::ostream& os, - const std::shared_ptr& cparams, - std::shared_ptr& iface, - unsigned int deriv_level) { - const std::string task = task_label("3eri", deriv_level); - typedef TwoPRep_11_11_sq TwoPRep_sh_11_11; +// Templated on OperType so the same dummy-center "3-center via 4-center" +// machinery serves plain TwoPRep (ERI3) and RKB CoulombσpσpOper (RKB_ERI3). +// The composite recurrence (CR_11_Coulombσpσp_11) decomposes σ·p_c σ·p_d into +// derivatives of plain TwoPRep_11_11 children; those children naturally route +// through DerivGaussV2 via the strategy registered for TwoPRep_11_11 — so no +// special wiring is needed here for RKB. +template +static void build_TwoPRep_1b_2k( + std::ostream& os, std::string label, + const std::shared_ptr& cparams, + std::shared_ptr& iface, unsigned int deriv_level) { + const std::string task = task_label(label, deriv_level); + typedef GenIntegralSet_11_11 TwoBody_sh_11_11; + typedef typename OperType::Descriptor OperDescrType; vector shells; const unsigned int lmax = cparams->max_am(task); const unsigned int lmax_default = @@ -1437,6 +1553,8 @@ void build_TwoPRep_1b_2k(std::ostream& os, std::shared_ptr context(new CppCodeContext(cparams)); std::shared_ptr memman(new WorstFitMemoryManager()); + const auto nullaux = typename TwoBody_sh_11_11::AuxIndexType(0u); + for (unsigned int lbra = 0; lbra <= lmax; lbra++) { for (unsigned int lc = 0; lc <= lmax_default; lc++) { for (unsigned int ld = 0; ld <= lmax_default; ld++) { @@ -1462,11 +1580,19 @@ void build_TwoPRep_1b_2k(std::ostream& os, if (!(lbra == lim && lc == lim && ld == lim)) continue; #endif + // operator components: 1 for plain TwoPRep, 4 for RKB CoulombσpσpOper + std::vector descrs(1); + if constexpr (std::is_same::value) { + descrs.resize(0); + for (int p = 0; p != 4; ++p) descrs.emplace_back(OperDescrType(p)); + } + const auto nopers = descrs.size(); + // unroll only if max_am <= cparams->max_am_opt(task) using std::max; const unsigned int max_am = max(max(lc, ld), lbra); const bool need_to_optimize = (max_am <= cparams->max_am_opt(task)); - const bool need_to_unroll = l_to_cgshellsize(lbra) * + const bool need_to_unroll = nopers * l_to_cgshellsize(lbra) * l_to_cgshellsize(lc) * l_to_cgshellsize(ld) <= cparams->unroll_threshold(); @@ -1475,10 +1601,15 @@ void build_TwoPRep_1b_2k(std::ostream& os, ? std::numeric_limits::max() : 0; dg_xxx->registry()->unroll_threshold(unroll_threshold); - dg_xxx->registry()->do_cse(need_to_optimize); - dg_xxx->registry()->condense_expr(condense_expr( - cparams->unroll_threshold(), cparams->max_vector_length() > 1)); - // dg_xxx->registry()->condense_expr(true); + // For multi-component operators (RKB), components share no + // intermediates, so CSE/condense_expr is pure overhead — disable + // (mirrors build_TwoPRep_2b_2k). + const bool do_optimize = (nopers > 1) ? false : need_to_optimize; + dg_xxx->registry()->do_cse(do_optimize); + dg_xxx->registry()->condense_expr( + do_optimize ? condense_expr(cparams->unroll_threshold(), + cparams->max_vector_length() > 1) + : false); // Need to accumulate integrals? dg_xxx->registry()->accumulate_targets(cparams->accumulate_targets()); @@ -1487,7 +1618,7 @@ void build_TwoPRep_1b_2k(std::ostream& os, //////////// // NB translational invariance is now handled by CR_DerivGauss CartesianDerivIterator<3> diter(deriv_level); - std::vector> targets; + std::vector> targets; bool last_deriv = false; do { CGShell a = (dummy_center == 0) ? CGShell::unit() : CGShell(lbra); @@ -1495,8 +1626,12 @@ void build_TwoPRep_1b_2k(std::ostream& os, CGShell c(lc); CGShell d(ld); #if LIBINT_ERI3_PURE_SH - if (dummy_center == 1 && deriv_level == 0) a.pure_sh(true); - if (dummy_center == 0 && deriv_level == 0) b.pure_sh(true); + // pure-SH on the fitting center is meaningful only for plain ERI3; + // RKB CoulombσpσpOper does not assume the fitting center is pure SH. + if constexpr (std::is_same::value) { + if (dummy_center == 1 && deriv_level == 0) a.pure_sh(true); + if (dummy_center == 0 && deriv_level == 0) b.pure_sh(true); + } #endif unsigned int center = 0; @@ -1511,19 +1646,21 @@ void build_TwoPRep_1b_2k(std::ostream& os, ++center; } - // use 4-center integrals - std::shared_ptr abcd = - TwoPRep_sh_11_11::Instance(a, b, c, d, mType(0u)); - targets.push_back(abcd); + // emit one target per operator component (1 for plain ERI3, 4 for + // RKB CoulombσpσpOper) + for (unsigned int op = 0; op != descrs.size(); ++op) { + OperType oper(descrs[op]); + std::shared_ptr abcd = + TwoBody_sh_11_11::Instance(a, b, c, d, nullaux, oper); + targets.push_back(abcd); + } last_deriv = diter.last(); if (!last_deriv) diter.next(); } while (!last_deriv); // append all derivatives as targets to the graph - for (std::vector>::const_iterator t = - targets.begin(); - t != targets.end(); ++t) { + for (auto t = targets.begin(); t != targets.end(); ++t) { std::shared_ptr t_ptr = - std::dynamic_pointer_cast(*t); + std::dynamic_pointer_cast(*t); dg_xxx->append_target(t_ptr); } @@ -1536,27 +1673,44 @@ void build_TwoPRep_1b_2k(std::ostream& os, CGShell c(lc); CGShell d(ld); #if LIBINT_ERI3_PURE_SH - if (dummy_center == 1 && deriv_level == 0) a.pure_sh(true); - if (dummy_center == 0 && deriv_level == 0) b.pure_sh(true); + if constexpr (std::is_same::value) { + if (dummy_center == 1 && deriv_level == 0) a.pure_sh(true); + if (dummy_center == 0 && deriv_level == 0) b.pure_sh(true); + } #endif - std::shared_ptr abcd = - TwoPRep_sh_11_11::Instance(a, b, c, d, mType(0u)); - abcd_label = abcd->label(); + if constexpr (std::is_same::value) { + OperType oper(descrs[0]); + std::shared_ptr abcd = + TwoBody_sh_11_11::Instance(a, b, c, d, nullaux, oper); + abcd_label = abcd->label(); + } else { + // For RKB operators, build the label by hand (matches the + // build_TwoPRep_2b_2k convention) so that the operator name is + // embedded in the source-file name without depending on the + // TwoPRep label format. + std::ostringstream oss; + oss << "_" << a.label() << "_" << b.label(); + oss << "_" << label; + oss << "_" << c.label() << "_" << d.label(); + abcd_label = oss.str(); + } } // + derivative level (if deriv_level > 0) - std::string label; + std::string eval_label; { - label = ""; + eval_label = ""; if (deriv_level != 0) { std::ostringstream oss; oss << "deriv" << deriv_level; - label += oss.str(); + eval_label += oss.str(); + } + if constexpr (std::is_same::value) { + eval_label += "eri3"; } - label += "eri3"; - label += abcd_label; + eval_label += abcd_label; } - g_progress.current_task = label; + g_progress.current_task = eval_label; g_progress.print(); std::string prefix(cparams->source_directory()); @@ -1566,7 +1720,7 @@ void build_TwoPRep_1b_2k(std::ostream& os, // this will generate code for this targets, and potentially generate // code for its prerequisites GenerateCode(dg_xxx, context, cparams, strat, tactic, memman, - decl_filenames, def_filenames, prefix, label, false); + decl_filenames, def_filenames, prefix, eval_label, false); // update max stack size and # of targets const std::shared_ptr& tparams = @@ -1580,7 +1734,7 @@ void build_TwoPRep_1b_2k(std::ostream& os, ostringstream oss; oss << context->label_to_function_name("libint2_build_" + task) << "[" << lbra << "][" << lc << "][" << ld - << "] = " << context->label_to_function_name(label) + << "] = " << context->label_to_function_name(eval_label) << context->end_of_stat() << endl; iface->to_static_init(oss.str()); @@ -1602,7 +1756,8 @@ void build_TwoPRep_1b_2k(std::ostream& os, } // end of c loop } // end of bra loop } -#endif // LIBINT_INCLUDE_ERI3 + +#endif // LIBINT_INCLUDE_ERI3 || LIBINT_INCLUDE_RKB_ERI3 #ifdef LIBINT_INCLUDE_ERI2