@@ -336,6 +336,7 @@ NEP_Charge::NEP_Charge(const char* file_potential, const int num_atoms)
336336 nep_data.D_real .resize (num_atoms);
337337 nep_data.charge .resize (num_atoms);
338338 nep_data.charge_derivative .resize (num_atoms * annmb.dim );
339+ nep_data.bec .resize (num_atoms * 9 );
339340 if (paramb.charge_mode >= 4 ) {
340341 nep_data.C6 .resize (num_atoms);
341342 nep_data.C6_derivative .resize (num_atoms * annmb.dim );
@@ -787,6 +788,242 @@ static __global__ void zero_total_charge(const int N, float* g_charge)
787788 }
788789}
789790
791+ static __global__ void find_bec_diagonal (const int N, const float * g_q, float * g_bec)
792+ {
793+ int n1 = threadIdx .x + blockIdx .x * blockDim .x ;
794+ if (n1 < N) {
795+ g_bec[n1 + N * 0 ] = g_q[n1];
796+ g_bec[n1 + N * 1 ] = 0 .0f ;
797+ g_bec[n1 + N * 2 ] = 0 .0f ;
798+ g_bec[n1 + N * 3 ] = 0 .0f ;
799+ g_bec[n1 + N * 4 ] = g_q[n1];
800+ g_bec[n1 + N * 5 ] = 0 .0f ;
801+ g_bec[n1 + N * 6 ] = 0 .0f ;
802+ g_bec[n1 + N * 7 ] = 0 .0f ;
803+ g_bec[n1 + N * 8 ] = g_q[n1];
804+ }
805+ }
806+
807+ static __global__ void find_bec_radial (
808+ const NEP_Charge::ParaMB paramb,
809+ const NEP_Charge::ANN annmb,
810+ const int N,
811+ const int N1,
812+ const int N2,
813+ const Box box,
814+ const int * g_NN,
815+ const int * g_NL,
816+ const int * g_type,
817+ const double * g_x,
818+ const double * g_y,
819+ const double * g_z,
820+ const float * g_charge_derivative,
821+ float * g_bec)
822+ {
823+ int n1 = blockIdx .x * blockDim .x + threadIdx .x + N1;
824+ if (n1 < N2) {
825+ int t1 = g_type[n1];
826+ double x1 = g_x[n1];
827+ double y1 = g_y[n1];
828+ double z1 = g_z[n1];
829+ for (int i1 = 0 ; i1 < g_NN[n1]; ++i1) {
830+ int n2 = g_NL[n1 + N * i1];
831+ int t2 = g_type[n2];
832+ double x12double = g_x[n2] - x1;
833+ double y12double = g_y[n2] - y1;
834+ double z12double = g_z[n2] - z1;
835+ apply_mic (box, x12double, y12double, z12double);
836+ float r12[3 ] = {float (x12double), float (y12double), float (z12double)};
837+ float d12 = sqrt (r12[0 ] * r12[0 ] + r12[1 ] * r12[1 ] + r12[2 ] * r12[2 ]);
838+ float d12inv = 1 .0f / d12;
839+ float fc12, fcp12;
840+ float rc = (paramb.charge_mode >= 4 ) ? paramb.rc_angular : paramb.rc_radial ;
841+ if (paramb.use_typewise_cutoff ) {
842+ rc = min (
843+ (COVALENT_RADIUS[paramb.atomic_numbers [t1]] +
844+ COVALENT_RADIUS[paramb.atomic_numbers [t2]]) *
845+ ((paramb.charge_mode >= 4 ) ? paramb.typewise_cutoff_angular_factor : paramb.typewise_cutoff_radial_factor ),
846+ rc);
847+ }
848+ float rcinv = 1 .0f / rc;
849+ find_fc_and_fcp (rc, rcinv, d12, fc12, fcp12);
850+ float fn12[MAX_NUM_N];
851+ float fnp12[MAX_NUM_N];
852+ float f12[3 ] = {0 .0f };
853+
854+ find_fn_and_fnp (paramb.basis_size_radial , rcinv, d12, fc12, fcp12, fn12, fnp12);
855+ for (int n = 0 ; n <= paramb.n_max_radial ; ++n) {
856+ float gnp12 = 0 .0f ;
857+ for (int k = 0 ; k <= paramb.basis_size_radial ; ++k) {
858+ int c_index = (n * (paramb.basis_size_radial + 1 ) + k) * paramb.num_types_sq ;
859+ c_index += t1 * paramb.num_types + t2;
860+ gnp12 += fnp12[k] * annmb.c [c_index];
861+ }
862+ const float tmp12 = g_charge_derivative[n1 + n * N] * gnp12 * d12inv;
863+ for (int d = 0 ; d < 3 ; ++d) {
864+ f12[d] += tmp12 * r12[d];
865+ }
866+ }
867+
868+ float bec_xx = 0 .5f * (r12[0 ] * f12[0 ]);
869+ float bec_xy = 0 .5f * (r12[0 ] * f12[1 ]);
870+ float bec_xz = 0 .5f * (r12[0 ] * f12[2 ]);
871+ float bec_yx = 0 .5f * (r12[1 ] * f12[0 ]);
872+ float bec_yy = 0 .5f * (r12[1 ] * f12[1 ]);
873+ float bec_yz = 0 .5f * (r12[1 ] * f12[2 ]);
874+ float bec_zx = 0 .5f * (r12[2 ] * f12[0 ]);
875+ float bec_zy = 0 .5f * (r12[2 ] * f12[1 ]);
876+ float bec_zz = 0 .5f * (r12[2 ] * f12[2 ]);
877+
878+ atomicAdd (&g_bec[n1], bec_xx);
879+ atomicAdd (&g_bec[n1 + N], bec_xy);
880+ atomicAdd (&g_bec[n1 + N * 2 ], bec_xz);
881+ atomicAdd (&g_bec[n1 + N * 3 ], bec_yx);
882+ atomicAdd (&g_bec[n1 + N * 4 ], bec_yy);
883+ atomicAdd (&g_bec[n1 + N * 5 ], bec_yz);
884+ atomicAdd (&g_bec[n1 + N * 6 ], bec_zx);
885+ atomicAdd (&g_bec[n1 + N * 7 ], bec_zy);
886+ atomicAdd (&g_bec[n1 + N * 8 ], bec_zz);
887+
888+ atomicAdd (&g_bec[n2], -bec_xx);
889+ atomicAdd (&g_bec[n2 + N], -bec_xy);
890+ atomicAdd (&g_bec[n2 + N * 2 ], -bec_xz);
891+ atomicAdd (&g_bec[n2 + N * 3 ], -bec_yx);
892+ atomicAdd (&g_bec[n2 + N * 4 ], -bec_yy);
893+ atomicAdd (&g_bec[n2 + N * 5 ], -bec_yz);
894+ atomicAdd (&g_bec[n2 + N * 6 ], -bec_zx);
895+ atomicAdd (&g_bec[n2 + N * 7 ], -bec_zy);
896+ atomicAdd (&g_bec[n2 + N * 8 ], -bec_zz);
897+ }
898+ }
899+ }
900+
901+ static __global__ void find_bec_angular (
902+ NEP_Charge::ParaMB paramb,
903+ NEP_Charge::ANN annmb,
904+ const int N,
905+ const int N1,
906+ const int N2,
907+ const Box box,
908+ const int * g_NN_angular,
909+ const int * g_NL_angular,
910+ const int * g_type,
911+ const double * g_x,
912+ const double * g_y,
913+ const double * g_z,
914+ const float * g_charge_derivative,
915+ const float * g_sum_fxyz,
916+ float * g_bec)
917+ {
918+ int n1 = blockIdx .x * blockDim .x + threadIdx .x + N1;
919+ if (n1 < N2) {
920+ float Fp[MAX_DIM_ANGULAR] = {0 .0f };
921+ float sum_fxyz[NUM_OF_ABC * MAX_NUM_N];
922+ for (int d = 0 ; d < paramb.dim_angular ; ++d) {
923+ Fp[d] = g_charge_derivative[(paramb.n_max_radial + 1 + d) * N + n1];
924+ }
925+ for (int n = 0 ; n < paramb.n_max_angular + 1 ; ++n) {
926+ for (int abc = 0 ; abc < (paramb.L_max + 1 ) * (paramb.L_max + 1 ) - 1 ; ++abc) {
927+ sum_fxyz[n * NUM_OF_ABC + abc] =
928+ g_sum_fxyz[(n * ((paramb.L_max + 1 ) * (paramb.L_max + 1 ) - 1 ) + abc) * N + n1];
929+ }
930+ }
931+
932+ int t1 = g_type[n1];
933+ double x1 = g_x[n1];
934+ double y1 = g_y[n1];
935+ double z1 = g_z[n1];
936+ for (int i1 = 0 ; i1 < g_NN_angular[n1]; ++i1) {
937+ int n2 = g_NL_angular[n1 + N * i1];
938+ double x12double = g_x[n2] - x1;
939+ double y12double = g_y[n2] - y1;
940+ double z12double = g_z[n2] - z1;
941+ apply_mic (box, x12double, y12double, z12double);
942+ float r12[3 ] = {float (x12double), float (y12double), float (z12double)};
943+ float d12 = sqrt (r12[0 ] * r12[0 ] + r12[1 ] * r12[1 ] + r12[2 ] * r12[2 ]);
944+ float f12[3 ] = {0 .0f };
945+ float fc12, fcp12;
946+ int t2 = g_type[n2];
947+ float rc = paramb.rc_angular ;
948+ if (paramb.use_typewise_cutoff ) {
949+ rc = min (
950+ (COVALENT_RADIUS[paramb.atomic_numbers [t1]] +
951+ COVALENT_RADIUS[paramb.atomic_numbers [t2]]) *
952+ paramb.typewise_cutoff_angular_factor ,
953+ rc);
954+ }
955+ float rcinv = 1 .0f / rc;
956+ find_fc_and_fcp (rc, rcinv, d12, fc12, fcp12);
957+
958+ float fn12[MAX_NUM_N];
959+ float fnp12[MAX_NUM_N];
960+ find_fn_and_fnp (paramb.basis_size_angular , rcinv, d12, fc12, fcp12, fn12, fnp12);
961+ for (int n = 0 ; n <= paramb.n_max_angular ; ++n) {
962+ float gn12 = 0 .0f ;
963+ float gnp12 = 0 .0f ;
964+ for (int k = 0 ; k <= paramb.basis_size_angular ; ++k) {
965+ int c_index = (n * (paramb.basis_size_angular + 1 ) + k) * paramb.num_types_sq ;
966+ c_index += t1 * paramb.num_types + t2 + paramb.num_c_radial ;
967+ gn12 += fn12[k] * annmb.c [c_index];
968+ gnp12 += fnp12[k] * annmb.c [c_index];
969+ }
970+ accumulate_f12 (
971+ paramb.L_max ,
972+ paramb.num_L ,
973+ n,
974+ paramb.n_max_angular + 1 ,
975+ d12,
976+ r12,
977+ gn12,
978+ gnp12,
979+ Fp,
980+ sum_fxyz,
981+ f12);
982+ }
983+
984+ float bec_xx = 0 .5f * (r12[0 ] * f12[0 ]);
985+ float bec_xy = 0 .5f * (r12[0 ] * f12[1 ]);
986+ float bec_xz = 0 .5f * (r12[0 ] * f12[2 ]);
987+ float bec_yx = 0 .5f * (r12[1 ] * f12[0 ]);
988+ float bec_yy = 0 .5f * (r12[1 ] * f12[1 ]);
989+ float bec_yz = 0 .5f * (r12[1 ] * f12[2 ]);
990+ float bec_zx = 0 .5f * (r12[2 ] * f12[0 ]);
991+ float bec_zy = 0 .5f * (r12[2 ] * f12[1 ]);
992+ float bec_zz = 0 .5f * (r12[2 ] * f12[2 ]);
993+
994+ atomicAdd (&g_bec[n1], bec_xx);
995+ atomicAdd (&g_bec[n1 + N], bec_xy);
996+ atomicAdd (&g_bec[n1 + N * 2 ], bec_xz);
997+ atomicAdd (&g_bec[n1 + N * 3 ], bec_yx);
998+ atomicAdd (&g_bec[n1 + N * 4 ], bec_yy);
999+ atomicAdd (&g_bec[n1 + N * 5 ], bec_yz);
1000+ atomicAdd (&g_bec[n1 + N * 6 ], bec_zx);
1001+ atomicAdd (&g_bec[n1 + N * 7 ], bec_zy);
1002+ atomicAdd (&g_bec[n1 + N * 8 ], bec_zz);
1003+
1004+ atomicAdd (&g_bec[n2], -bec_xx);
1005+ atomicAdd (&g_bec[n2 + N], -bec_xy);
1006+ atomicAdd (&g_bec[n2 + N * 2 ], -bec_xz);
1007+ atomicAdd (&g_bec[n2 + N * 3 ], -bec_yx);
1008+ atomicAdd (&g_bec[n2 + N * 4 ], -bec_yy);
1009+ atomicAdd (&g_bec[n2 + N * 5 ], -bec_yz);
1010+ atomicAdd (&g_bec[n2 + N * 6 ], -bec_zx);
1011+ atomicAdd (&g_bec[n2 + N * 7 ], -bec_zy);
1012+ atomicAdd (&g_bec[n2 + N * 8 ], -bec_zz);
1013+ }
1014+ }
1015+ }
1016+
1017+ static __global__ void scale_bec (const int N, const float * sqrt_epsilon_inf, float * g_bec)
1018+ {
1019+ int n1 = threadIdx .x + blockIdx .x * blockDim .x ;
1020+ if (n1 < N) {
1021+ for (int d = 0 ; d < 9 ; ++d) {
1022+ g_bec[n1 + N * d] *= sqrt_epsilon_inf[0 ];
1023+ }
1024+ }
1025+ }
1026+
7901027static __global__ void find_force_charge_real_space_only (
7911028 const int N,
7921029 const NEP_Charge::Charge_Para charge_para,
@@ -1805,6 +2042,59 @@ void NEP_Charge::compute_large_box(
18052042 zero_total_charge<<<1 , 1024 >>> (N, nep_data.charge .data ());
18062043 GPU_CHECK_KERNEL
18072044
2045+ if (true ) { // TODO
2046+ // get BEC (the diagonal part)
2047+ find_bec_diagonal<<<grid_size, BLOCK_SIZE>>> (
2048+ N,
2049+ nep_data.charge .data (),
2050+ nep_data.bec .data ());
2051+ GPU_CHECK_KERNEL
2052+
2053+ // get BEC (radial descriptor part)
2054+ find_bec_radial<<<grid_size, BLOCK_SIZE>>> (
2055+ paramb,
2056+ annmb,
2057+ N,
2058+ N1,
2059+ N2,
2060+ box,
2061+ (paramb.charge_mode >= 4 ) ? nep_data.NN_angular .data () : nep_data.NN_radial .data (),
2062+ (paramb.charge_mode >= 4 ) ? nep_data.NL_angular .data () : nep_data.NL_radial .data (),
2063+ type.data (),
2064+ position_per_atom.data (),
2065+ position_per_atom.data () + N,
2066+ position_per_atom.data () + N * 2 ,
2067+ nep_data.charge_derivative .data (),
2068+ nep_data.bec .data ());
2069+ GPU_CHECK_KERNEL
2070+
2071+ // get BEC (angular descriptor part)
2072+ find_bec_angular<<<grid_size, BLOCK_SIZE>>> (
2073+ paramb,
2074+ annmb,
2075+ N,
2076+ N1,
2077+ N2,
2078+ box,
2079+ nep_data.NN_angular .data (),
2080+ nep_data.NL_angular .data (),
2081+ type.data (),
2082+ position_per_atom.data (),
2083+ position_per_atom.data () + N,
2084+ position_per_atom.data () + N * 2 ,
2085+ nep_data.charge_derivative .data (),
2086+ nep_data.sum_fxyz .data (),
2087+ nep_data.bec .data ());
2088+ GPU_CHECK_KERNEL
2089+
2090+ // scale q to q * sqrt(epsilon_inf)
2091+ scale_bec<<<grid_size, BLOCK_SIZE>>> (
2092+ N,
2093+ annmb.sqrt_epsilon_inf ,
2094+ nep_data.bec .data ());
2095+ GPU_CHECK_KERNEL
2096+ }
2097+
18082098 if (paramb.charge_mode == 1 || paramb.charge_mode == 2 || paramb.charge_mode == 4 ) {
18092099 find_k_and_G (box.cpu_h );
18102100 find_structure_factor<<<(charge_para.num_kpoints_max - 1 ) / 64 + 1 , 64 >>> (
@@ -2107,6 +2397,57 @@ void NEP_Charge::compute_small_box(
21072397 zero_total_charge<<<N, 1024 >>> (N, nep_data.charge .data ());
21082398 GPU_CHECK_KERNEL
21092399
2400+ if (true ) { // TODO
2401+ // get BEC (the diagonal part)
2402+ find_bec_diagonal<<<grid_size, BLOCK_SIZE>>> (
2403+ N,
2404+ nep_data.charge .data (),
2405+ nep_data.bec .data ());
2406+ GPU_CHECK_KERNEL
2407+
2408+ // get BEC (radial descriptor part)
2409+ find_bec_radial_small_box<<<grid_size, BLOCK_SIZE>>> (
2410+ paramb,
2411+ annmb,
2412+ N,
2413+ N1,
2414+ N2,
2415+ (paramb.charge_mode >= 4 ) ? NN_angular.data () : NN_radial.data (),
2416+ (paramb.charge_mode >= 4 ) ? NL_angular.data () : NL_radial.data (),
2417+ type.data (),
2418+ (paramb.charge_mode >= 4 ) ? r12.data () + size_x12 * 3 : r12.data (),
2419+ (paramb.charge_mode >= 4 ) ? r12.data () + size_x12 * 4 : r12.data () + size_x12,
2420+ (paramb.charge_mode >= 4 ) ? r12.data () + size_x12 * 5 : r12.data () + size_x12 * 2 ,
2421+ nep_data.charge_derivative .data (),
2422+ nep_data.bec .data ());
2423+ GPU_CHECK_KERNEL
2424+
2425+ // get BEC (angular descriptor part)
2426+ find_bec_angular_small_box<<<grid_size, BLOCK_SIZE>>> (
2427+ paramb,
2428+ annmb,
2429+ N,
2430+ N1,
2431+ N2,
2432+ NN_angular.data (),
2433+ NL_angular.data (),
2434+ type.data (),
2435+ r12.data () + size_x12 * 3 ,
2436+ r12.data () + size_x12 * 4 ,
2437+ r12.data () + size_x12 * 5 ,
2438+ nep_data.charge_derivative .data (),
2439+ nep_data.sum_fxyz .data (),
2440+ nep_data.bec .data ());
2441+ GPU_CHECK_KERNEL
2442+
2443+ // scale q to q * sqrt(epsilon_inf)
2444+ scale_bec<<<grid_size, BLOCK_SIZE>>> (
2445+ N,
2446+ annmb.sqrt_epsilon_inf ,
2447+ nep_data.bec .data ());
2448+ GPU_CHECK_KERNEL
2449+ }
2450+
21102451 if (paramb.charge_mode == 1 || paramb.charge_mode == 2 || paramb.charge_mode == 4 ) {
21112452 find_k_and_G (box.cpu_h );
21122453 find_structure_factor<<<(charge_para.num_kpoints_max - 1 ) / 64 + 1 , 64 >>> (
@@ -2386,3 +2727,5 @@ const GPU_Vector<int>& NEP_Charge::get_NN_radial_ptr() { return nep_data.NN_radi
23862727const GPU_Vector<int >& NEP_Charge::get_NL_radial_ptr () { return nep_data.NL_radial ; }
23872728
23882729GPU_Vector<float >& NEP_Charge::get_charge_reference () { return nep_data.charge ; }
2730+
2731+ GPU_Vector<float >& NEP_Charge::get_bec_reference () { return nep_data.bec ; }
0 commit comments