From 9867fdad8a3c2794571055f80fc513f141489e6c Mon Sep 17 00:00:00 2001 From: joshkamm Date: Wed, 25 Feb 2026 16:24:59 -0500 Subject: [PATCH] Paul's Feb 2026 updates: expanded becke/gauss integrals, hessian, and cintwrapper Major additions to becke.cpp and gauss.cpp with new integral routines, expanded integrals.cpp functionality, hessian updates, and libcint wrapper enhancements. Header files updated accordingly. Co-Authored-By: Claude Opus 4.6 --- env.set.local0 | 10 +- include/becke.h | 19 +- include/cintprep.h | 10 +- include/cintwrapper.h | 17 +- include/elements.h | 2 +- include/gauss.h | 1 + include/integrals.h | 2 +- src/integrals/becke.cpp | 1503 +++++++++++++++++++++++++++++-- src/integrals/gauss.cpp | 543 ++++++++++- src/integrals/hess.cpp | 26 +- src/integrals/integrals.cpp | 304 +++++-- src/integrals/integrals_aux.cpp | 4 +- src/integrals/sphericald.cpp | 2 +- src/integrals/symm.cpp | 2 +- src/libcintw/cintprep.cpp | 8 +- src/libcintw/cintwrapper.cpp | 137 ++- 16 files changed, 2348 insertions(+), 242 deletions(-) diff --git a/env.set.local0 b/env.set.local0 index 29651d9..24ab402 100644 --- a/env.set.local0 +++ b/env.set.local0 @@ -1,6 +1,6 @@ #!/bin/bash -#! Note: Hardcoded paths are for use on the Zimmerman lab cluster, Athena, which runs Rocky Linux 8. +#! Note: Hardcoded paths are for use on the Zimmerman lab cluster, Athena, which runs Rocky Linux 8. #! Other systems may require different paths. # NVHPC 20.7-21.9, and SDK 24.9-24.11 (and respective openmpi versions) have been found to successfully compile. @@ -9,12 +9,14 @@ module load nvidia-sdk/25.5 # Cmake 3.15 and above is required. module load cmake -# Add libcint to PATH for GTO evaluation. +# Add libcint to PATH for GTO evaluation. #! Note: Libcint version 5.3.0 is required for SlaterGPU. #module load libcint -export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/ndmeier/libraries/libcint/lib64 -export LIBCINT_PATH=/home/ndmeier/libraries/libcint +#export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/ndmeier/libraries/libcint/lib64 +#export LIBCINT_PATH=/home/ndmeier/libraries/libcint +export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/paulzim/libcint-master/ +export LIBCINT_PATH=/home/paulzim/libcint-master/ #OpenBLAS is also required for libcint/libcintw operations. Some systems automatically source this, but you may need to explicitly point to it. diff --git a/include/becke.h b/include/becke.h index 5c72108..54d349a 100644 --- a/include/becke.h +++ b/include/becke.h @@ -38,10 +38,21 @@ void compute_rhod(int natoms, int* atno, double* coords, vector > void compute_rhod(int natoms, int* atno, double* coords, vector > &basis, double* Pao, int nrad, int gsa, float* grid, double* rho, double* drho, int prl); void compute_rho(bool gbasis, int natoms, int* atno, double* coords, vector > &basis, double* Pao, int nrad, int gsa, float* grid, double* rho, double* drho, int prl); void compute_rhodg(bool gbasis, int natoms, int* atno, double* coords, vector > &basis, double* Pao, int nrad, int gsa, double* grid, double* rho, double* drho, int prl); +void compute_lap_hessg(bool gbasis, int natoms, int* atno, double* coords, vector > &basis, double* Pao, int nrad, int gsa, double* grid, double* lapl, double* hess, int prl); -void compute_lap_hess(int natoms, int* atno, double* coords, vector > &basis, int nrad, int gsa, double* grid, double* Pao, double* hessw, double* lapl, int htype, double* hessp, int prl); -void compute_delt(int natoms, int* atno, double* coords, vector > &basis, double* Pao, int nrad, int gsa, double* grid, double* delt, int prl); -void compute_B_field(int natoms, int* atno, double* coords, vector > &basis, double* Pao, double* Paodt, + +//finite difference Hessian +void get_hessian_fd_grid(const double dx, int gsa, double* grid, double* gridh); +void eval_hessian_fd(double dx, int gsh, double* vals, double* fdvals, int gsa, double* hess); +void eval_grad_hessian_fd(double dx, int gsh, double* vals, double* fdvals, int gsa, double* grad, double* hess); +//finite difference gradient +void get_fd_grid(const double dx, int gsa, double* grid, double* gridh); +void eval_grad_fd(double dx, int gsh, double* fdvals, int gsa, double* valg); + + +void compute_lap_hess(int natoms, int* atno, double* coords, vector > &basis, int nrad, int gsa, double* grid, double* Pao, double* rhohess, double* hessw, double* lapl, int htype, double* hessp, int prl); +void compute_delt(int natoms, int* atno, double* coords, bool gbasis, vector > &basis, double* Pao, int nrad, int gsa, double* grid, double* delt, int prl); +void compute_B_field(int natoms, int* atno, double* coords, bool gbasis, vector > &basis, double* Pao, double* Paodt, int nrad, int nang, double* ang_g, double* ang_w, int gsa, double* grid, double* B, int prl); void compute_fxcd(int natoms, int* atno, double* coords, vector > &basis, bool gga, bool tau, bool need_wt, double* Pao, @@ -58,6 +69,8 @@ void compute_fxc (int natoms, int* atno, double* coords, vector > // float* grid, float* wt, float* vxc, float* vxcs, double* fxc, int prl); void compute_fxc (bool gbasis, int natoms, int* atno, double* coords, vector > &basis, bool need_wt, bool gga, double* Pao, int gsa, float* grid, float* wt, double* vxc, double* vxcs, double* fxc, int prl); +void compute_fxcd(bool gbasis, int natoms, int* atno, double* coords, vector > &basis, bool need_wt, bool gga, + double* Pao, int gs, double* grid, double* wt, double* vxc, double* vxcs, double* fxc, int prl); void compute_delta(int natoms, int* atno, double* coords, vector > &basis1, vector > &basis2, int No, double* jCA, bool gga, bool tau, float* rho, int gsa, float* grid, float* wt, double* diff, int prl); diff --git a/include/cintprep.h b/include/cintprep.h index ab947e3..710211b 100644 --- a/include/cintprep.h +++ b/include/cintprep.h @@ -37,7 +37,7 @@ class CINTPrep { int var_dim_ri; int nbas_ri; int nenv_ri; - + public: CINTPrep(bool doing_ri = false); ~CINTPrep(); @@ -50,8 +50,8 @@ class CINTPrep { void set_atm(int *&atm_in); void set_bas(int *&bas_in); void set_env(double *&env_in); - void assign_coords(int natoms, int *atomlist, double *coords, bool in_bohr = true); - void assign_coords(int natoms, int *atomlist, float *coords, bool in_bohr = true); + void assign_coords(int natoms, int *atomlist, double *coords, bool in_bohr = true); + void assign_coords(int natoms, int *atomlist, float *coords, bool in_bohr = true); //RI funcs int get_var_dim_ri(); @@ -65,11 +65,11 @@ class CINTPrep { void copy_atoms(vector< int > &atoms_copy); void copy_coord(vector< double > &coord_copy); - + unordered_map anum_to_N; }; -// TODO +// TODO // 1. do ri stuff // 2. probably should have read_xyz/read_bas take in filenames // and manage memory associated with atm/env diff --git a/include/cintwrapper.h b/include/cintwrapper.h index fa7a85c..ef600a7 100644 --- a/include/cintwrapper.h +++ b/include/cintwrapper.h @@ -30,7 +30,7 @@ extern "C" { int *atm, int natm, int *bas, int nbas, double *env); int cint2e_sph(double *buf, int *shls, int *atm, int natm, int *bas, int nbas, double *env, CINTOpt *no_opt); - + // gradient integrals // < -ih∇ | Vnuc | -ih∇ > int cint1e_ipnucip_cart(double *buf, int *shls, @@ -71,6 +71,14 @@ extern "C" { int CINTtot_cgto_spheric(const int *bas, const int nbas); FINT CINTcgto_spheric(const FINT n, const FINT *bas); + + //was CACHE_SIZE_T + int64_t int1e_grids_sph(double *out, FINT *dims, FINT *shls, + FINT *atm, FINT natm, FINT *bas, FINT nbas, + double *env, CINTOpt *opt, double *cache); + int64_t int1e_grids_cart(double *out, FINT *dims, FINT *shls, + FINT *atm, FINT natm, FINT *bas, FINT nbas, + double *env, CINTOpt *opt, double *cache); } using namespace std; @@ -92,6 +100,11 @@ class BT { int calc_di(int i, int *bas); +//potential on a grid +void get_vri(double** val, int gs, int N, + int natm, int nbas, int nbas_ri, int nenv, + int* atm, int* bas, double* env); + void get_overlap(double * overlap, int N, int natm, int nbas, int nenv, int *atm, int* bas, double *env); @@ -157,7 +170,7 @@ void contract_dVne(int natm, int N, int nbas, double *grad_term, double *Pao, void contract_d2c2e(int natm, int N, int nbas, int Naux, int nbas_ri, double *grad_term, double *gRS, int *atm, int *bas, double *env); -void contract_d3c2e(int natm, int N, int nbas, int Naux, int nbas_ri, +void contract_d3c2e(int natm, int N, int nbas, int Naux, int nbas_ri, double *grad_term, double *gQmunu, int *atm, int *bas, double *env); #endif diff --git a/include/elements.h b/include/elements.h index cf02ecd..fc44ec1 100644 --- a/include/elements.h +++ b/include/elements.h @@ -50,7 +50,7 @@ const string elem_arr[118] = { }; const map angular_mom = { - {'S', 0}, {'P', 1}, {'D', 2}, {'F',3}, {'G',4}, {'H',5}, {'I',6} + {'S', 0}, {'P', 1}, {'D', 2}, {'F',3}, {'G',4}, {'H',5}, {'I',6}, {'J',7} }; #endif //_ELEMENTS_H_ diff --git a/include/gauss.h b/include/gauss.h index 21e3a87..42c0f59 100644 --- a/include/gauss.h +++ b/include/gauss.h @@ -15,6 +15,7 @@ void eval_ghd_ke(int gs, double* grid, double* val1, int n1, int l1, const doubl //void eval_pdke_gh(int gs, double* grid, double* val1, int n1, int l1, int m1, double norm1, double zeta1); void eval_p_gh(int gs, float* grid, float* val, int n1, int l1, int m1, float norm1, float zeta1); void eval_pd_gh(int gs, double* grid, double* val, int n1, int l1, int m1, double norm1, double zeta1); +void eval_hess_ghd(int gs, double* grid, double* val, int n1, int l1, int m1, double norm1, double zeta1); int eval_gh_full(int gs, float* grid, float** val1, int i1, int natoms, int nbas, int nenv, int N, int* atm, int* bas, double* env); void wf_to_grid_gh_ke(int natoms, int* atno, double* coords, vector > basis, double* jCA, int gs, float* grid, float* wt, double* TL, int prl); void wf_to_grid_gh_ke_2(int natoms, int* atno, double* coords, vector > basis, int nbas, int nenv, int N, int* atm, int* bas, double* env, diff --git a/include/integrals.h b/include/integrals.h index d92ec36..ae15936 100644 --- a/include/integrals.h +++ b/include/integrals.h @@ -79,7 +79,7 @@ void compute_Enp_para(int ngpu, int natoms, int* atno, float* coords, vector > &basis, int nrad, int nang, double* ang_g0, double* ang_w0, float* En, float* pVp, int prl); //electric fields in x,y,z (centered at origin) -void compute_Exyz(int natoms, int* atno, double* coords, vector > &basis, int nrad, int nang, double* ang_g, double* ang_w, double* S, double* E, int prl); +void compute_Exyz(int natoms, int* atno, double* coords, bool gbasis, vector > &basis, int nrad, int nang, double* ang_g, double* ang_w, double* S, double* E, int prl); void compute_ST(int natoms, int* atno, float* coords, vector > &basis, int nrad, int nang, double* ang_g0, double* ang_w0, double* S, double* T, int prl); void compute_ST(int natoms, int* atno, float* coords, vector > &basis, int nrad, int nang, double* ang_g0, double* ang_w0, float* S, float* T, int prl); diff --git a/src/integrals/becke.cpp b/src/integrals/becke.cpp index fc05cbb..39e3b0e 100644 --- a/src/integrals/becke.cpp +++ b/src/integrals/becke.cpp @@ -1376,6 +1376,477 @@ void compute_fxc(bool gbasis, int natoms, int* atno, double* coords, vector > &basis, bool need_wt, bool gga, + double* Pao, int gs, double* grid, double* wt, double* vxc, double* vxcs, double* fxc, int prl) +{ + //need_wt==0 --> wt vxc + //need_wt==1 --> expects vxc to be wt'd already + + if (gga && Pao==NULL) + { + printf("\n ERROR: gga functionals require Pao in compute_fxc \n"); + exit(1); + } + + int gs3 = 3*gs; + int gs6 = 6*gs; + int gs9 = 9*gs; + + int N = basis.size(); + int N2 = N*N; + int* n2i = new int[natoms]; + int imaxN = get_imax_n2i(natoms,N,basis,n2i); + + double* grid1 = new double[gs6]; + double* grid2 = new double[gs6]; + + double* Paon = Pao; + + int iN = imaxN; + double** val1 = new double*[iN]; + double** val2 = new double*[iN]; + for (int i=0;i0) s1 = n2i[m-1]; int s2 = n2i[m]; + + float Z1 = (float)atno[m]; + double A1 = coords[3*m+0]; double B1 = coords[3*m+1]; double C1 = coords[3*m+2]; + + copy_grid(gs,grid1,grid); + recenter_grid_zero(gs,grid1,-A1,-B1,-C1); + + #pragma acc parallel loop collapse(2) present(val1[0:iN][0:gs]) + for (int i1=0;i1 basis1 = basis[i1]; + int l1 = basis1[1]; int m1 = basis1[2]; int ng = basis1[3]; + int in = ig + ng; //index to find norm + + double* valm = val1[ii1]; + double* valmg = NULL; if (gga) valmg = val1p[ii1]; + for (int j=0;j0) s3 = n2i[n-1]; int s4 = n2i[n]; + + float Z2 = (float)atno[m]; + double A2 = coords[3*n+0]; double B2 = coords[3*n+1]; double C2 = coords[3*n+2]; + + #pragma acc parallel loop collapse(2) present(val2[0:iN][0:gs]) + for (int i2=0;i2 basis2 = basis[i2]; + int l2 = basis2[1]; int m2 = basis2[2]; int ng = basis2[3]; + int in = ig + ng; //index to find norm + + double* valn = val2[ii2]; + double* valng = NULL; if (gga) valng = val2p[ii2]; + for (int j=0;j1 && gs<1000) + { + #pragma acc update self(grho[0:gs3]) + printf(" grho: \n"); + for (int j=0;j0) s1 = n2i[m-1]; int s2 = n2i[m]; + + float Z1 = (float)atno[m]; + double A1 = coords[3*m+0]; double B1 = coords[3*m+1]; double C1 = coords[3*m+2]; + + copy_grid(gs,grid1,grid); + recenter_grid_zero(gs,grid1,-A1,-B1,-C1); + + #pragma acc parallel loop collapse(2) present(val1[0:iN][0:gs]) + for (int i1=0;i1 basis1 = basis[i1]; + int l1 = basis1[1]; int m1 = basis1[2]; int ng = basis1[3]; + int in = ig + ng; //index to find norm + + //printf(" (1) basis %i has %2i terms \n",i1,ng); + double* valm = val1[ii1]; + double* valmg = NULL; if (gga) valmg = val1p[ii1]; + for (int j=0;j0) s3 = n2i[n-1]; int s4 = n2i[n]; + + float Z2 = (float)atno[m]; + double A2 = coords[3*n+0]; double B2 = coords[3*n+1]; double C2 = coords[3*n+2]; + + #pragma acc parallel loop collapse(2) present(val2[0:iN][0:gs]) + for (int i2=0;i2 basis2 = basis[i2]; + int l2 = basis2[1]; int m2 = basis2[2]; int ng = basis2[3]; + int in = ig + ng; //index to find norm + + double* valn = val2[ii2]; + double* valng = NULL; if (gga) valng = val2p[ii2]; + for (int j=0;j > &basis, bool need_wt, bool gga, double* Pao, int gs, float* grid, float* wt, double* vxc, double* vxcs, double* fxc, int prl) { @@ -1874,6 +2345,272 @@ void compute_fxc(bool gbasis, int natoms, int* atno, double* coords, vector > &basis, double* Pao, int nrad, int gsa, float* grid, double* rho, double* drho, int prl) { @@ -2091,89 +2828,441 @@ void compute_rho(bool gbasis, int natoms, int* atno, double* coords, vector2) + { + #pragma acc update self(rho[0:gsa]) + printf("\n density: \n"); + print_vec(gsa,grid,rho); + } + + #pragma acc exit data delete(grid1[0:gsa6],grid2[0:gsa6]) + #pragma acc exit data delete(val0[0:gsa],val1[0:iN][0:gsa],val2[0:iN][0:gsa]) + if (need_grad) + { + #pragma acc exit data delete(delt[0:gsa3],val0g[0:gsa3],val1g[0:iN][0:gsa3],val2g[0:iN][0:gsa3]) + } + + delete [] grid1; + delete [] grid2; + delete [] n2i; + + delete [] val0; + for (int i=0;i > &basis, double* Pao, int nrad, int gsa, double* grid, double* lapl, double* hess, int prl) +{ + if (!gbasis) { printf(" ERROR: compute_lap_hessg but not gbasis \n"); exit(-1); } + + bool need_hess = 0; + if (hess!=NULL) need_hess = 1; + bool need_lapl = 0; + if (lapl!=NULL) need_lapl = 1; + + if (!need_hess && !need_lapl) + return; + + if (prl>1) printf(" compute hessg: Gaussian basis \n"); + + for (int j=0;j3) + { + printf("\n ERROR: compute_lap_hessg does not support l>3 \n"); + exit(-1); + } + + //fix this + bool lowmem = 0; + int nbatch = 1; + //if (gsa>1000000) + // lowmem = 1; + if (lowmem && need_hess) + { + printf(" WARNING; low mem mode compute_lap_hessg for Hessian not available \n"); + lowmem = 0; + } + if (lowmem) + printf("\n\n\n WARNING: lowmem mode not ready in compute_lap_hessg \n\n\n"); + + int gsa3 = 3*gsa; + int gsa6 = 6*gsa; + //dimensions of allocated arrays (for batching) + int gsaa = gsa; + int gsaa3 = gsa3; + int gsaa6 = gsa6; + if (lowmem) + { + nbatch = 4; + gsaa = gsa/nbatch; + gsaa3 = 3*gsaa; + gsaa6 = 6*gsaa;; + + printf(" compute L: low memory mode. gsa: %7i gsaa: %7i \n",gsa,gsaa); + } + + int N = basis.size(); + int* n2i = new int[natoms]; + int imaxN = get_imax_n2i(natoms,N,basis,n2i); + + int iN = imaxN; + double** val1 = new double*[iN]; + double** val2 = new double*[iN]; + for (int i=0;i0) s1 = n2i[m-1]; int s2 = n2i[m]; + + double Z1 = (double)atno[m]; + double A1 = coords[3*m+0]; double B1 = coords[3*m+1]; double C1 = coords[3*m+2]; + + if (lowmem) + { + #pragma acc parallel loop present(grid[0:gsa6],grid1[0:gsaa6]) + for (int j=0;j basis1 = basis[i1]; + int l1 = basis1[1]; int m1 = basis1[2]; int ng = basis1[3]; + int in = ig + ng; //index to find norm + + //printf(" (1) basis %i has %2i terms \n",i1,ng); + double* valm = val1[ii1]; + double* valmg = val1g[ii1]; + double* valmh = val1h[ii1]; + + for (int j=0;j0) s3 = n2i[n-1]; int s4 = n2i[n]; + + double Z2 = (double)atno[m]; + double A2 = coords[3*n+0]; double B2 = coords[3*n+1]; double C2 = coords[3*n+2]; + + #pragma acc parallel loop collapse(2) present(val2[0:iN][0:gsaa]) + for (int i2=0;i2 basis2 = basis[i2]; + int l2 = basis2[1]; int m2 = basis2[2]; int ng = basis2[3]; + int in = ig + ng; //index to find norm + + double* valn = val2[ii2]; + double* valng = val2g[ii2]; + double* valnh = val2h[ii2]; + + for (int j=0;j2) + if (!need_hess) { - #pragma acc update self(rho[0:gsa]) - printf("\n density: \n"); - print_vec(gsa,grid,rho); + #pragma acc exit data delete(hess[0:gsaa6]) + delete [] hess; } - #pragma acc exit data delete(grid1[0:gsa6],grid2[0:gsa6]) - #pragma acc exit data delete(val0[0:gsa],val1[0:iN][0:gsa],val2[0:iN][0:gsa]) - if (need_grad) - { - #pragma acc exit data delete(delt[0:gsa3],val0g[0:gsa3],val1g[0:iN][0:gsa3],val2g[0:iN][0:gsa3]) - } + #pragma acc exit data delete(grid1[0:gsaa6],grid2[0:gsaa6]) + #pragma acc exit data delete(val0[0:gsaa],val0g[0:gsaa3],val0h[0:gsaa6]) + #pragma acc exit data delete(val1[0:iN][0:gsaa],val2[0:iN][0:gsaa],val1g[0:iN][0:gsaa3],val2g[0:iN][0:gsaa3],val1h[0:iN][0:gsaa6],val2h[0:iN][0:gsaa6]) delete [] grid1; delete [] grid2; delete [] n2i; delete [] val0; + delete [] val0g; + delete [] val0h; + for (int i=0;i > } //hessw, T, hessp, not already on gpu -void compute_lap_hess(int natoms, int* atno, double* coords, vector > &basis, int nrad, int gsa, double* grid, double* Pao, double* hessw, double* lapl, int htype, double* hessp, int prl) +void compute_lap_hess(int natoms, int* atno, double* coords, vector > &basis, int nrad, int gsa, double* grid, double* Pao, double* rhohess, double* hessw, double* lapl, int htype, double* hessp, int prl) { + bool get_hessw = 0; + bool get_lapl = 0; bool get_hessp = 0; + if (hessw!=NULL) + get_hessw = 1; + if (lapl!=NULL) + get_lapl = 1; if (hessp!=NULL) { printf("\n TESTING phi hessian in compute_lap_hess \n"); get_hessp = 1; } + bool get_rhohess = 0; + if (rhohess!=NULL) + { + printf("\n TESTING Hessian(rho) in compute_lap_hess \n"); + get_rhohess = 1; + } + int gsa3 = 3*gsa; int gsa6 = 6*gsa; int gsa9 = 9*gsa; @@ -3549,10 +4650,29 @@ void compute_lap_hess(int natoms, int* atno, double* coords, vector >& basis, + double* grid, double* grid1, double* grid2, double* Pao1n, double* Pao2n, double** val1, double** val2p, double* tmp, double* delt1, double* delt2) +{ + int N = basis.size(); + int gsa3 = 3*gsa; + int gsa6 = 6*gsa; + + bool have_delt2 = 0; + if (delt2!=NULL && Pao2n!=NULL) + have_delt2 = 1; + + //skip low density matrix elements + const double dthresh = 1.e-10; + + //shouldn't call this async + if (tid>-1) acc_wait_all(); + + const int ig = 10; + + for (int m=0;m0) s1 = n2i[m-1]; int s2 = n2i[m]; + + double A1 = coords[3*m+0]; double B1 = coords[3*m+1]; double C1 = coords[3*m+2]; + + copy_grid(gsa,grid1,grid); + recenter_grid_zero(gsa,grid1,-A1,-B1,-C1); + + #pragma acc parallel loop collapse(2) present(val1[0:iN][0:gsa]) + for (int i1=0;i1 basis1 = basis[i1]; + int l1 = basis1[1]; int m1 = basis1[2]; int ng = basis1[3]; + int in = ig + ng; //index to find norm + + for (int j=0;jdthresh) + #pragma acc parallel loop present(delt1[0:gsa3],valn[0:gsa],valpm[0:gsa3]) + for (int j=0;jdthresh) + #pragma acc parallel loop present(delt2[0:gsa3],valn[0:gsa],valpm[0:gsa3]) + for (int j=0;j0) s3 = n2i[n-1]; int s4 = n2i[n]; + double A2 = coords[3*n+0]; double B2 = coords[3*n+1]; double C2 = coords[3*n+2]; + + #pragma acc parallel loop collapse(2) present(val2p[0:iN][0:gsa3]) + for (int i2=0;i2 basis2 = basis[i2]; + int l2 = basis2[1]; int m2 = basis2[2]; int ng = basis2[3]; + int in = ig + ng; //index to find norm + + for (int j=0;jdthresh) + #pragma acc parallel loop present(delt1[0:gsa3],valn[0:gsa],valpm[0:gsa3]) + for (int j=0;jdthresh) + #pragma acc parallel loop present(delt2[0:gsa3],valn[0:gsa],valpm[0:gsa3]) + for (int j=0;j > &basis, double* Pao, int nrad, int gsa, double* grid, double* delt, int prl) +void compute_delt(int natoms, int* atno, double* coords, bool gbasis, vector > &basis, double* Pao, int nrad, int gsa, double* grid, double* delt, int prl) { int gsa3 = 3*gsa; int gsa6 = 6*gsa; @@ -3836,6 +5133,9 @@ void compute_delt(int natoms, int* atno, double* coords, vector > if (sgs_basis) { printf("\n ERROR: compute_rho for SGS basis does not support drho \n"); exit(-1); } + if (gbasis) + printf(" TESTING: compute_delt for gbasis \n"); + int N = basis.size(); int N2 = N*N; int* n2i = new int[natoms]; @@ -3876,7 +5176,10 @@ void compute_delt(int natoms, int* atno, double* coords, vector > delt[m] = 0.; //real work here - compute_delt_inner(tid,gsa,natoms,coords,Rc,ss_basis,iN,n2i,basis,grid,grid1,grid2,Paon,NULL,val1,val2p,tmp,delt,NULL); + if (!gbasis) + compute_delt_inner(tid,gsa,natoms,coords,Rc,ss_basis,iN,n2i,basis,grid,grid1,grid2,Paon,NULL,val1,val2p,tmp,delt,NULL); + else + compute_delt_inner_g(tid,gsa,natoms,coords,iN,n2i,basis,grid,grid1,grid2,Paon,NULL,val1,val2p,tmp,delt,NULL); //cleanup #pragma acc exit data delete(grid1[0:gsa6],grid2[0:gsa6],tmp[0:gsa3]) @@ -3905,7 +5208,7 @@ void compute_delt(int natoms, int* atno, double* coords, vector > //nrad/nang are for secondary grid, gsa for primary grid //this function does not include the relativistic time contribution -void compute_B_field(int natoms, int* atno, double* coords, vector > &basis, double* Pao, double* Paodt, +void compute_B_field(int natoms, int* atno, double* coords, bool gbasis, vector > &basis, double* Pao, double* Paodt, int nrad, int nang, double* ang_g, double* ang_w, int gsa, double* grid, double* B, int prl) { //grid should be on cpu @@ -3962,10 +5265,10 @@ void compute_B_field(int natoms, int* atno, double* coords, vector1) printf(" compute_B_field. gsa/b: %4i %4i \n",gsa,gsb); + //testing dJ/dt term for (int j=0;j > } //gs is full grid size --> change to gsa -void compute_fxcd(int natoms, int* atno, double* coords, vector > &basis, bool gga, bool tau, bool need_wt, double* Pao, double* vxc, double* vxcs, int nrad, int gs, double* grid, double* wt, double* fxc, int prl) +void compute_fxcd(int natoms, int* atno, double* coords, vector > &basis, bool gga, bool tau, + bool need_wt, double* Pao, double* vxc, double* vxcs, int nrad, int gs, double* grid, double* wt, double* fxc, int prl) { //need_wt==0 --> wt vxc //need_wt==1 --> expects vxc to be wt'd already diff --git a/src/integrals/gauss.cpp b/src/integrals/gauss.cpp index 80031d4..c2a2d6d 100644 --- a/src/integrals/gauss.cpp +++ b/src/integrals/gauss.cpp @@ -912,6 +912,490 @@ int eval_gh_full(int gs, float* grid, float** val1, int i1, int natoms, int nbas return shl_size; } +void eval_hess_ghd(int gs, double* grid, double* val, int n1, int l1, int m1, double norm1, double zeta1) +{ + int gs6 = 6*gs; + int nlm = n1-l1-1; + double nlm2 = nlm*nlm; + double nlmo2 = nlm*0.5; + double ntm = 0.5*nlm-2.; + double zt2 = zeta1*zeta1; + + if (l1==0) + { + #pragma acc parallel loop present(grid[0:gs6],val[0:gs6]) + for (int i=0;i3 not available in eval_hess_ghd \n"); + } + + #pragma acc parallel loop present(val[0:gs6]) + for (int i=0;i4) diff --git a/src/integrals/hess.cpp b/src/integrals/hess.cpp index fb05264..3e642c1 100644 --- a/src/integrals/hess.cpp +++ b/src/integrals/hess.cpp @@ -14,7 +14,7 @@ void get_h_1s(int gs, double* grid, double* val, double zeta) double x2 = x*x; double y2 = y*y; double z2 = z*z; double xy = x*y; double xz = x*z; double yz = y*z; - double zr = zeta*r; double ozr = 1.f+zr; + double zr = zeta*r; double ozr = 1.+zr; //xx, xy, xz, yy, yz, zz val[6*i] *= (-y2-z2+x2*zr)*ezor; @@ -41,15 +41,15 @@ void get_h_2s(int gs, double* grid, double* val, double zeta) double x2 = x*x; double y2 = y*y; double z2 = z*z; double xy = x*y; double xz = x*z; double yz = y*z; - double zr = zeta*r; double ozr = 1.f+zr; + double zr = zeta*r; double ozr = 1.+zr; //xx, xy, xz, yy, yz, zz val[6*i] *= (y2+z2+zt2*x2*r2 - zr*(x2+r2))*ezor; val[6*i+1] *= xy*(-1.f-zr+zt2*r2)*ezor; val[6*i+2] *= xz*(-1.f-zr+zt2*r2)*ezor; - val[6*i+3] *= (z2+zt2*y2*(y2+z2)-zr*(2.f*y2+z2)+x2*(1.f+zt2*y2-zr))*ezor; + val[6*i+3] *= (z2+zt2*y2*(y2+z2)-zr*(2.*y2+z2)+x2*(1.+zt2*y2-zr))*ezor; val[6*i+4] *= yz*(-1.f-zr+zt2*r2)*ezor; - val[6*i+5] *= (y2+x2*(1.f+zt2*z2-zr) + zt2*z2*(y2+z2) - zr*(y2+2.f*z2))*ezor; + val[6*i+5] *= (y2+x2*(1.+zt2*z2-zr) + zt2*z2*(y2+z2) - zr*(y2+2.*z2))*ezor; } return; } @@ -68,10 +68,10 @@ void get_h_2px(int gs, double* grid, double* val, double zeta) double x2 = x*x; double y2 = y*y; double z2 = z*z; double xy = x*y; double xz = x*z; double yz = y*z; - double zr = zeta*r; double ozr = 1.f+zr; + double zr = zeta*r; double ozr = 1.+zr; //xx, xy, xz, yy, yz, zz - val[6*i] *= x*(-3.f*(y2+z2)+x2*(-2.f+zr))*ezor; + val[6*i] *= x*(-3.*(y2+z2)+x2*(-2.+zr))*ezor; val[6*i+1] *= y*(-y2-z2+x2*zr)*ezor; val[6*i+2] *= z*(-y2-z2+x2*zr)*ezor; val[6*i+3] *= x*(-x2-z2 + y2*zr)*ezor; @@ -95,13 +95,13 @@ void get_h_2py(int gs, double* grid, double* val, double zeta) double x2 = x*x; double y2 = y*y; double z2 = z*z; double xy = x*y; double xz = x*z; double yz = y*z; - double zr = zeta*r; double ozr = 1.f+zr; + double zr = zeta*r; double ozr = 1.+zr; //xx, xy, xz, yy, yz, zz val[6*i] *= y*(-y2-z2+x2*zr)*ezor; val[6*i+1] *= x*(-x2-z2+y2*zr)*ezor; val[6*i+2] *= x*y*z*ozr*ezor; - val[6*i+3] *= y*(-3.f*x2-3.f*z2+y2*(-2.f+zr))*ezor; + val[6*i+3] *= y*(-3.*x2-3.*z2+y2*(-2.+zr))*ezor; val[6*i+4] *= z*(-x2-z2+y2*zr)*ezor; val[6*i+5] *= y*(-x2-y2+z2*zr)*ezor; } @@ -122,7 +122,7 @@ void get_h_2pz(int gs, double* grid, double* val, double zeta) double x2 = x*x; double y2 = y*y; double z2 = z*z; double xy = x*y; double xz = x*z; double yz = y*z; - double zr = zeta*r; double ozr = 1.f+zr; + double zr = zeta*r; double ozr = 1.+zr; //xx, xy, xz, yy, yz, zz val[6*i] *= z*(-y2-z2+x2*zr)*ezor; @@ -130,7 +130,7 @@ void get_h_2pz(int gs, double* grid, double* val, double zeta) val[6*i+2] *= x*(-x2-y2+z2*zr)*ezor; val[6*i+3] *= z*(-x2-z2+y2*zr)*ezor; val[6*i+4] *= y*(-x2-y2+z2*zr)*ezor; - val[6*i+5] *= z*(-3.f*(x2+y2)+z2*(-2.f+zr))*ezor; + val[6*i+5] *= z*(-3.*(x2+y2)+z2*(-2.+zr))*ezor; } return; } @@ -479,7 +479,7 @@ void eval_h(int gs, double* grid, double* val, int n1, int l1, int m1, double ze } else if (n1==3) { - printf(" WARNING: n=3 Hessian is being tested \n"); + //printf(" WARNING: n=3 Hessian is being tested \n"); if (l1==2) { if (m1==-2) @@ -536,8 +536,8 @@ void eval_h(int gs, float* grid, float* val, int n1, int l1, int m1, float zeta1 #pragma acc enter data create(gridd[0:gs6],vald[0:gs]) eval_h(gs,gridd,vald,n1,l1,m1,zeta1); - #pragma acc parallel loop present(val[0:gs],vald[0:gs]) - for (int j=0;j > & return; } +void reduce_Exyz(int i1, int i2, int N, int gs, double* val1m, double* val2m, double* grid1m, double A1, double B1, double C1, double* E) +{ + int gs6 = 6*gs; + int N2 = N*N; + + double valx = 0.; double valy = 0.; double valz = 0.; + #pragma acc parallel loop present(val1m[0:gs],val2m[0:gs],grid1m[0:gs6]) reduction(+:valx,valy,valz) + for (int j=0;j > &basis, int nrad, int nang, double* ang_g, double* ang_w, double* S, double* E, int prl) +void compute_Exyz(int natoms, int* atno, double* coords, bool gbasis, vector > &basis, int nrad, int nang, double* ang_g, double* ang_w, double* S, double* E, int prl) { if (prl>1) printf(" beginning compute_E (double precision) \n"); @@ -4266,9 +4324,15 @@ void compute_Exyz(int natoms, int* atno, double* coords, vector > double* val2m = new double[gs]; double* val2n = new double[gs]; + double* tmp = NULL; + if (gbasis) + tmp = new double[gs]; + int* n2i = new int[natoms]; int imaxN = get_imax_n2i(natoms,N,basis,n2i); - //printf(" iN: %i \n",imaxN); + printf(" iN: %i \n",imaxN); + + const int ig = 10; #if USE_ACC #pragma acc enter data copyin(ang_g[0:3*nang],ang_w[0:nang]) @@ -4278,6 +4342,10 @@ void compute_Exyz(int natoms, int* atno, double* coords, vector > #pragma acc enter data create(grid1m[0:gs6],grid1n[0:gs6],wt1[0:gs]) #pragma acc enter data create(grid2m[0:gs6],grid2n[0:gs6],wt2[0:gs]) #pragma acc enter data create(val1m[0:gs],val1n[0:gs],val2m[0:gs],val2n[0:gs]) + if (gbasis) + { + #pragma acc enter data create(tmp[0:gs]) + } #endif acc_assign(3*N2,E,0.); @@ -4301,8 +4369,60 @@ void compute_Exyz(int natoms, int* atno, double* coords, vector > //working on this block of the matrix int s1 = 0; if (m>0) s1 = n2i[m-1]; int s2 = n2i[m]; + double Z1 = atno[m]; double A1 = coords[3*m+0]; double B1 = coords[3*m+1]; double C1 = coords[3*m+2]; + if (gbasis) + { + generate_central_grid_2d(-1,1,grid1m,wt1,Z1,nrad,nang,ang_g,ang_w); + + for (int i1=s1;i1 basis1 = basis[i1]; + int n1 = basis1[0]; int l1 = basis1[1]; int m1 = basis1[2]; int ng1 = basis1[3]; + + vector basis2 = basis[i2]; + int n2 = basis2[0]; int l2 = basis2[1]; int m2 = basis2[2]; int ng2 = basis2[3]; + + #pragma acc parallel loop present(val1m[0:gs],val2m[0:gs]) + for (int k=0;k > eval_shd(ii1,gs,grid1m,val1m,n1,l1,m1,zeta1); //basis 1 eval_shd(ii1,gs,grid1m,val2m,n2,l2,m2,zeta2); //basis 2 - double valx = 0.; double valy = 0.; double valz = 0.; - #pragma acc parallel loop present(val1m[0:gs],val2m[0:gs],grid1m[0:gs6]) reduction(+:valx,valy,valz) - for (int j=0;j > double A2 = coords[3*n+0]; double B2 = coords[3*n+1]; double C2 = coords[3*n+2]; double A12 = A2-A1; double B12 = B2-B1; double C12 = C2-C1; + if (gbasis) + { + //new grid with zeta dependence + generate_central_grid_2d(-1,1,grid1m,wt1,Z1,nrad,nang,ang_g,ang_w); + generate_central_grid_2d(-1,1,grid2m,wt2,Z2,nrad,nang,ang_g,ang_w); + + //grid1 at 0,0,0 now has r1 at 3, r2 at 4 + add_r2_to_grid(gs,grid1m,A12,B12,C12); + recenter_grid(gs,grid2m,A12,B12,C12); + + //optimize this + //becke_weight_2d(gs,grid1m,wt1,grid2m,wt2,zeta1,zeta2,A12,B12,C12); + becke_weight_2d(gs,grid1m,wt1,grid2m,wt2,Z1,Z2,A12,B12,C12); + + copy_grid(gs,grid2n,grid2m); + recenter_grid(gs,grid2n,-A12,-B12,-C12); //grid 2 centered on atom 1 + + copy_grid(gs,grid1n,grid1m); + recenter_grid_zero(gs,grid1n,-A12,-B12,-C12); //grid 1 centered on atom 2 + + //needs to happen after becke weighting + add_r1_to_grid(gs,grid2m,0.,0.,0.); + + for (int i1=s1;i1 basis1 = basis[i1]; + int n1 = basis1[0]; int l1 = basis1[1]; int m1 = basis1[2]; int ng1 = basis1[3]; + + vector basis2 = basis[i2]; + int n2 = basis2[0]; int l2 = basis2[1]; int m2 = basis2[2]; int ng2 = basis2[3]; + + #pragma acc parallel loop present(val1m[0:gs],val1n[0:gs],val2m[0:gs],val2n[0:gs]) + for (int k=0;k > eval_shd(ii1,gs,grid1n,val2m,n2,l2,m2,zeta2); //basis 2 on center 1 eval_shd(ii1,gs,grid2n,val2n,n2,l2,m2,zeta2); //basis 2 on center 2 - double valx = 0.; double valy = 0.; double valz = 0.; - #pragma acc parallel loop present(val1m[0:gs],val1n[0:gs],val2m[0:gs],val2n[0:gs],grid1m[0:gs6],grid2n[0:gs6]) reduction(+:valx,valy,valz) - for (int j=0;jm } //loop m over natoms - double* norm = new double[N]; - for (int i=0;i-1) + printf(" atomic Exa: %8.5f %8.5f %8.5f \n",Exat,Eyat,Ezat); + + double Zfact = 1./Ztot; + Exat *= Zfact; + Eyat *= Zfact; + Ezat *= Zfact; - double Zfact = 1./Ztot; - Exat *= Zfact; - Eyat *= Zfact; - Ezat *= Zfact; #pragma acc parallel loop present(E[0:3*N2],S[0:N2]) for (int j=0;j > for (int j=0;j > #pragma acc exit data delete(val1m[0:gs],val1n[0:gs],val2m[0:gs],val2n[0:gs]) #pragma acc exit data delete(n2i[0:natoms]) #pragma acc exit data delete(coords[0:3*natoms],atno[0:natoms]) + if (gbasis) + { + #pragma acc exit data delete(tmp[0:gs]) + } #endif delete [] n2i; @@ -4515,6 +4687,8 @@ void compute_Exyz(int natoms, int* atno, double* coords, vector > delete [] grid2n; delete [] wt1; delete [] wt2; + if (gbasis) + delete [] tmp; return; } diff --git a/src/integrals/integrals_aux.cpp b/src/integrals/integrals_aux.cpp index 4a52e24..63320b9 100644 --- a/src/integrals/integrals_aux.cpp +++ b/src/integrals/integrals_aux.cpp @@ -1458,7 +1458,7 @@ int get_imax_n2i(int natoms, int N, vector >& basis, int* n2i) int wa2 = basis[i][9]; if (wa2!=wa) { - int cmaxN = i-iprev; + int cmaxN = i-iprev; if (cmaxN>imaxN) imaxN = cmaxN; n2i[wa] = i; wa = wa2; @@ -1493,7 +1493,7 @@ int get_natoms_with_basis(int natoms, int* atno, vector >& basis) else natoms1 = n+1; } - + return natoms1; } diff --git a/src/integrals/sphericald.cpp b/src/integrals/sphericald.cpp index 93b8ac7..df2553f 100644 --- a/src/integrals/sphericald.cpp +++ b/src/integrals/sphericald.cpp @@ -517,7 +517,7 @@ void get_3dz2d(int tid, int gs, double* grid, double* val, double zeta) val[i] *= (2.*z*z-x*x-y*y)*ezr; //ambiguous for which r to use - //val[i] *= 3.*z*z-r*r; + //val[i] *= 3.*z*z-r*r; } } diff --git a/src/integrals/symm.cpp b/src/integrals/symm.cpp index 7be72b5..49e9dc7 100644 --- a/src/integrals/symm.cpp +++ b/src/integrals/symm.cpp @@ -113,7 +113,7 @@ void eval_c2v_mos(int natoms, int gsa, int iN, int N, float* norm, float* tm1, f short y_symm = 0; float vy1 = tm1[0]; float vy2 = tm1[1]; // +/- y on atom 1 if (fabs(vy1)> atom_symbol; if (iss.fail() || elem_2_int.count(atom_symbol) == 0) { - printf("ERROR reading basis set %s at line %d\n", + printf("ERROR reading basis set %s at line %d\n", basfile.c_str(), i); exit(1); } @@ -279,7 +279,7 @@ void CINTPrep::read_bas(string inpbas) { basis_t basis; basis.nuc = atom_num; while (i < lines.size()) { - if (!all_of(lines[i].begin(), lines[i].end(), [](char c) {return isspace(c);}) + if (!all_of(lines[i].begin(), lines[i].end(), [](char c) {return isspace(c);}) && lines[i].c_str()[0] != '*') { iss.str(lines[i]); string ang; @@ -669,4 +669,6 @@ void CINTPrep::prep_env() { env[offset] = 0.; env[offset + 1] = 1.; } // if do_ri + + return; } diff --git a/src/libcintw/cintwrapper.cpp b/src/libcintw/cintwrapper.cpp index e0586d7..db8ddd8 100644 --- a/src/libcintw/cintwrapper.cpp +++ b/src/libcintw/cintwrapper.cpp @@ -22,13 +22,6 @@ using namespace std; -//extern "C" void dgetri(int *N, double **A, int *lda, int *IPIV, -// double *WORK, int *lwork, int *INFO); -//extern "C" void dgetrf(int *M, int *N, double **A, int *lda, int *IPIV, -// int *INFO); -//extern "C" void dsyev(char *jobz, char *uplo, int *n, double *a, int *lda, -// double *w, double * work, int *lwork, int* info); - bool BT::DO_CART = true; int binom(int n, int k) { @@ -43,6 +36,120 @@ int calc_di(int i, int *bas) { return idx_i; } +void get_vri(double** val, int gs, int N, + int natm, int nbas, int nbas_ri, int nenv, + int* atm, int* bas, double* env) +{ + int idx_i = 0; + int di, dj; + int shls[4] = {0, 0, 0, gs}; + + int nbasx = nbas; + int b0 = 0; + int Nx = N; + if (nbas_ri>0) + { + b0 = nbas; + nbasx = nbas_ri; + Nx = nbas_ri; + + printf(" using ri basis in get_vri \n"); + } + + //CPMZ something is off about the cache size estimate + int cache_size = int1e_grids_sph(NULL, NULL, shls, atm, natm, bas, nbas+nbas_ri, env, NULL, NULL); + //printf(" cache_size: %4i \n",cache_size); + cache_size += gs; + cache_size *= 10; + //double* cache = new double[cache_size](); + + //CPMZ just let libcint allocate for itself + double* cache = NULL; + + if (BT::DO_CART) + { + for (int i=0;i