Skip to content

Commit ae2abb5

Browse files
authored
Merge pull request brucefan1983#1180 from brucefan1983/bec_gpumd_part
calculate bec in gpumd part
2 parents 6517ebd + 9258a6a commit ae2abb5

File tree

7 files changed

+610
-4
lines changed

7 files changed

+610
-4
lines changed

src/force/nep_charge.cu

Lines changed: 343 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -336,6 +336,7 @@ NEP_Charge::NEP_Charge(const char* file_potential, const int num_atoms)
336336
nep_data.D_real.resize(num_atoms);
337337
nep_data.charge.resize(num_atoms);
338338
nep_data.charge_derivative.resize(num_atoms * annmb.dim);
339+
nep_data.bec.resize(num_atoms * 9);
339340
if (paramb.charge_mode >= 4) {
340341
nep_data.C6.resize(num_atoms);
341342
nep_data.C6_derivative.resize(num_atoms * annmb.dim);
@@ -787,6 +788,242 @@ static __global__ void zero_total_charge(const int N, float* g_charge)
787788
}
788789
}
789790

791+
static __global__ void find_bec_diagonal(const int N, const float* g_q, float* g_bec)
792+
{
793+
int n1 = threadIdx.x + blockIdx.x * blockDim.x;
794+
if (n1 < N) {
795+
g_bec[n1 + N * 0] = g_q[n1];
796+
g_bec[n1 + N * 1] = 0.0f;
797+
g_bec[n1 + N * 2] = 0.0f;
798+
g_bec[n1 + N * 3] = 0.0f;
799+
g_bec[n1 + N * 4] = g_q[n1];
800+
g_bec[n1 + N * 5] = 0.0f;
801+
g_bec[n1 + N * 6] = 0.0f;
802+
g_bec[n1 + N * 7] = 0.0f;
803+
g_bec[n1 + N * 8] = g_q[n1];
804+
}
805+
}
806+
807+
static __global__ void find_bec_radial(
808+
const NEP_Charge::ParaMB paramb,
809+
const NEP_Charge::ANN annmb,
810+
const int N,
811+
const int N1,
812+
const int N2,
813+
const Box box,
814+
const int* g_NN,
815+
const int* g_NL,
816+
const int* g_type,
817+
const double* g_x,
818+
const double* g_y,
819+
const double* g_z,
820+
const float* g_charge_derivative,
821+
float* g_bec)
822+
{
823+
int n1 = blockIdx.x * blockDim.x + threadIdx.x + N1;
824+
if (n1 < N2) {
825+
int t1 = g_type[n1];
826+
double x1 = g_x[n1];
827+
double y1 = g_y[n1];
828+
double z1 = g_z[n1];
829+
for (int i1 = 0; i1 < g_NN[n1]; ++i1) {
830+
int n2 = g_NL[n1 + N * i1];
831+
int t2 = g_type[n2];
832+
double x12double = g_x[n2] - x1;
833+
double y12double = g_y[n2] - y1;
834+
double z12double = g_z[n2] - z1;
835+
apply_mic(box, x12double, y12double, z12double);
836+
float r12[3] = {float(x12double), float(y12double), float(z12double)};
837+
float d12 = sqrt(r12[0] * r12[0] + r12[1] * r12[1] + r12[2] * r12[2]);
838+
float d12inv = 1.0f / d12;
839+
float fc12, fcp12;
840+
float rc = (paramb.charge_mode >= 4) ? paramb.rc_angular : paramb.rc_radial;
841+
if (paramb.use_typewise_cutoff) {
842+
rc = min(
843+
(COVALENT_RADIUS[paramb.atomic_numbers[t1]] +
844+
COVALENT_RADIUS[paramb.atomic_numbers[t2]]) *
845+
((paramb.charge_mode >= 4) ? paramb.typewise_cutoff_angular_factor : paramb.typewise_cutoff_radial_factor),
846+
rc);
847+
}
848+
float rcinv = 1.0f / rc;
849+
find_fc_and_fcp(rc, rcinv, d12, fc12, fcp12);
850+
float fn12[MAX_NUM_N];
851+
float fnp12[MAX_NUM_N];
852+
float f12[3] = {0.0f};
853+
854+
find_fn_and_fnp(paramb.basis_size_radial, rcinv, d12, fc12, fcp12, fn12, fnp12);
855+
for (int n = 0; n <= paramb.n_max_radial; ++n) {
856+
float gnp12 = 0.0f;
857+
for (int k = 0; k <= paramb.basis_size_radial; ++k) {
858+
int c_index = (n * (paramb.basis_size_radial + 1) + k) * paramb.num_types_sq;
859+
c_index += t1 * paramb.num_types + t2;
860+
gnp12 += fnp12[k] * annmb.c[c_index];
861+
}
862+
const float tmp12 = g_charge_derivative[n1 + n * N] * gnp12 * d12inv;
863+
for (int d = 0; d < 3; ++d) {
864+
f12[d] += tmp12 * r12[d];
865+
}
866+
}
867+
868+
float bec_xx = 0.5f* (r12[0] * f12[0]);
869+
float bec_xy = 0.5f* (r12[0] * f12[1]);
870+
float bec_xz = 0.5f* (r12[0] * f12[2]);
871+
float bec_yx = 0.5f* (r12[1] * f12[0]);
872+
float bec_yy = 0.5f* (r12[1] * f12[1]);
873+
float bec_yz = 0.5f* (r12[1] * f12[2]);
874+
float bec_zx = 0.5f* (r12[2] * f12[0]);
875+
float bec_zy = 0.5f* (r12[2] * f12[1]);
876+
float bec_zz = 0.5f* (r12[2] * f12[2]);
877+
878+
atomicAdd(&g_bec[n1], bec_xx);
879+
atomicAdd(&g_bec[n1 + N], bec_xy);
880+
atomicAdd(&g_bec[n1 + N * 2], bec_xz);
881+
atomicAdd(&g_bec[n1 + N * 3], bec_yx);
882+
atomicAdd(&g_bec[n1 + N * 4], bec_yy);
883+
atomicAdd(&g_bec[n1 + N * 5], bec_yz);
884+
atomicAdd(&g_bec[n1 + N * 6], bec_zx);
885+
atomicAdd(&g_bec[n1 + N * 7], bec_zy);
886+
atomicAdd(&g_bec[n1 + N * 8], bec_zz);
887+
888+
atomicAdd(&g_bec[n2], -bec_xx);
889+
atomicAdd(&g_bec[n2 + N], -bec_xy);
890+
atomicAdd(&g_bec[n2 + N * 2], -bec_xz);
891+
atomicAdd(&g_bec[n2 + N * 3], -bec_yx);
892+
atomicAdd(&g_bec[n2 + N * 4], -bec_yy);
893+
atomicAdd(&g_bec[n2 + N * 5], -bec_yz);
894+
atomicAdd(&g_bec[n2 + N * 6], -bec_zx);
895+
atomicAdd(&g_bec[n2 + N * 7], -bec_zy);
896+
atomicAdd(&g_bec[n2 + N * 8], -bec_zz);
897+
}
898+
}
899+
}
900+
901+
static __global__ void find_bec_angular(
902+
NEP_Charge::ParaMB paramb,
903+
NEP_Charge::ANN annmb,
904+
const int N,
905+
const int N1,
906+
const int N2,
907+
const Box box,
908+
const int* g_NN_angular,
909+
const int* g_NL_angular,
910+
const int* g_type,
911+
const double* g_x,
912+
const double* g_y,
913+
const double* g_z,
914+
const float* g_charge_derivative,
915+
const float* g_sum_fxyz,
916+
float* g_bec)
917+
{
918+
int n1 = blockIdx.x * blockDim.x + threadIdx.x + N1;
919+
if (n1 < N2) {
920+
float Fp[MAX_DIM_ANGULAR] = {0.0f};
921+
float sum_fxyz[NUM_OF_ABC * MAX_NUM_N];
922+
for (int d = 0; d < paramb.dim_angular; ++d) {
923+
Fp[d] = g_charge_derivative[(paramb.n_max_radial + 1 + d) * N + n1];
924+
}
925+
for (int n = 0; n < paramb.n_max_angular + 1; ++n) {
926+
for (int abc = 0; abc < (paramb.L_max + 1) * (paramb.L_max + 1) - 1; ++abc) {
927+
sum_fxyz[n * NUM_OF_ABC + abc] =
928+
g_sum_fxyz[(n * ((paramb.L_max + 1) * (paramb.L_max + 1) - 1) + abc) * N + n1];
929+
}
930+
}
931+
932+
int t1 = g_type[n1];
933+
double x1 = g_x[n1];
934+
double y1 = g_y[n1];
935+
double z1 = g_z[n1];
936+
for (int i1 = 0; i1 < g_NN_angular[n1]; ++i1) {
937+
int n2 = g_NL_angular[n1 + N * i1];
938+
double x12double = g_x[n2] - x1;
939+
double y12double = g_y[n2] - y1;
940+
double z12double = g_z[n2] - z1;
941+
apply_mic(box, x12double, y12double, z12double);
942+
float r12[3] = {float(x12double), float(y12double), float(z12double)};
943+
float d12 = sqrt(r12[0] * r12[0] + r12[1] * r12[1] + r12[2] * r12[2]);
944+
float f12[3] = {0.0f};
945+
float fc12, fcp12;
946+
int t2 = g_type[n2];
947+
float rc = paramb.rc_angular;
948+
if (paramb.use_typewise_cutoff) {
949+
rc = min(
950+
(COVALENT_RADIUS[paramb.atomic_numbers[t1]] +
951+
COVALENT_RADIUS[paramb.atomic_numbers[t2]]) *
952+
paramb.typewise_cutoff_angular_factor,
953+
rc);
954+
}
955+
float rcinv = 1.0f / rc;
956+
find_fc_and_fcp(rc, rcinv, d12, fc12, fcp12);
957+
958+
float fn12[MAX_NUM_N];
959+
float fnp12[MAX_NUM_N];
960+
find_fn_and_fnp(paramb.basis_size_angular, rcinv, d12, fc12, fcp12, fn12, fnp12);
961+
for (int n = 0; n <= paramb.n_max_angular; ++n) {
962+
float gn12 = 0.0f;
963+
float gnp12 = 0.0f;
964+
for (int k = 0; k <= paramb.basis_size_angular; ++k) {
965+
int c_index = (n * (paramb.basis_size_angular + 1) + k) * paramb.num_types_sq;
966+
c_index += t1 * paramb.num_types + t2 + paramb.num_c_radial;
967+
gn12 += fn12[k] * annmb.c[c_index];
968+
gnp12 += fnp12[k] * annmb.c[c_index];
969+
}
970+
accumulate_f12(
971+
paramb.L_max,
972+
paramb.num_L,
973+
n,
974+
paramb.n_max_angular + 1,
975+
d12,
976+
r12,
977+
gn12,
978+
gnp12,
979+
Fp,
980+
sum_fxyz,
981+
f12);
982+
}
983+
984+
float bec_xx = 0.5f* (r12[0] * f12[0]);
985+
float bec_xy = 0.5f* (r12[0] * f12[1]);
986+
float bec_xz = 0.5f* (r12[0] * f12[2]);
987+
float bec_yx = 0.5f* (r12[1] * f12[0]);
988+
float bec_yy = 0.5f* (r12[1] * f12[1]);
989+
float bec_yz = 0.5f* (r12[1] * f12[2]);
990+
float bec_zx = 0.5f* (r12[2] * f12[0]);
991+
float bec_zy = 0.5f* (r12[2] * f12[1]);
992+
float bec_zz = 0.5f* (r12[2] * f12[2]);
993+
994+
atomicAdd(&g_bec[n1], bec_xx);
995+
atomicAdd(&g_bec[n1 + N], bec_xy);
996+
atomicAdd(&g_bec[n1 + N * 2], bec_xz);
997+
atomicAdd(&g_bec[n1 + N * 3], bec_yx);
998+
atomicAdd(&g_bec[n1 + N * 4], bec_yy);
999+
atomicAdd(&g_bec[n1 + N * 5], bec_yz);
1000+
atomicAdd(&g_bec[n1 + N * 6], bec_zx);
1001+
atomicAdd(&g_bec[n1 + N * 7], bec_zy);
1002+
atomicAdd(&g_bec[n1 + N * 8], bec_zz);
1003+
1004+
atomicAdd(&g_bec[n2], -bec_xx);
1005+
atomicAdd(&g_bec[n2 + N], -bec_xy);
1006+
atomicAdd(&g_bec[n2 + N * 2], -bec_xz);
1007+
atomicAdd(&g_bec[n2 + N * 3], -bec_yx);
1008+
atomicAdd(&g_bec[n2 + N * 4], -bec_yy);
1009+
atomicAdd(&g_bec[n2 + N * 5], -bec_yz);
1010+
atomicAdd(&g_bec[n2 + N * 6], -bec_zx);
1011+
atomicAdd(&g_bec[n2 + N * 7], -bec_zy);
1012+
atomicAdd(&g_bec[n2 + N * 8], -bec_zz);
1013+
}
1014+
}
1015+
}
1016+
1017+
static __global__ void scale_bec(const int N, const float* sqrt_epsilon_inf, float* g_bec)
1018+
{
1019+
int n1 = threadIdx.x + blockIdx.x * blockDim.x;
1020+
if (n1 < N) {
1021+
for (int d = 0; d < 9; ++d) {
1022+
g_bec[n1 + N * d] *= sqrt_epsilon_inf[0];
1023+
}
1024+
}
1025+
}
1026+
7901027
static __global__ void find_force_charge_real_space_only(
7911028
const int N,
7921029
const NEP_Charge::Charge_Para charge_para,
@@ -1805,6 +2042,59 @@ void NEP_Charge::compute_large_box(
18052042
zero_total_charge<<<1, 1024>>>(N, nep_data.charge.data());
18062043
GPU_CHECK_KERNEL
18072044

2045+
if (true) { // TODO
2046+
// get BEC (the diagonal part)
2047+
find_bec_diagonal<<<grid_size, BLOCK_SIZE>>>(
2048+
N,
2049+
nep_data.charge.data(),
2050+
nep_data.bec.data());
2051+
GPU_CHECK_KERNEL
2052+
2053+
// get BEC (radial descriptor part)
2054+
find_bec_radial<<<grid_size, BLOCK_SIZE>>>(
2055+
paramb,
2056+
annmb,
2057+
N,
2058+
N1,
2059+
N2,
2060+
box,
2061+
(paramb.charge_mode >= 4) ? nep_data.NN_angular.data() : nep_data.NN_radial.data(),
2062+
(paramb.charge_mode >= 4) ? nep_data.NL_angular.data() : nep_data.NL_radial.data(),
2063+
type.data(),
2064+
position_per_atom.data(),
2065+
position_per_atom.data() + N,
2066+
position_per_atom.data() + N * 2,
2067+
nep_data.charge_derivative.data(),
2068+
nep_data.bec.data());
2069+
GPU_CHECK_KERNEL
2070+
2071+
// get BEC (angular descriptor part)
2072+
find_bec_angular<<<grid_size, BLOCK_SIZE>>>(
2073+
paramb,
2074+
annmb,
2075+
N,
2076+
N1,
2077+
N2,
2078+
box,
2079+
nep_data.NN_angular.data(),
2080+
nep_data.NL_angular.data(),
2081+
type.data(),
2082+
position_per_atom.data(),
2083+
position_per_atom.data() + N,
2084+
position_per_atom.data() + N * 2,
2085+
nep_data.charge_derivative.data(),
2086+
nep_data.sum_fxyz.data(),
2087+
nep_data.bec.data());
2088+
GPU_CHECK_KERNEL
2089+
2090+
// scale q to q * sqrt(epsilon_inf)
2091+
scale_bec<<<grid_size, BLOCK_SIZE>>>(
2092+
N,
2093+
annmb.sqrt_epsilon_inf,
2094+
nep_data.bec.data());
2095+
GPU_CHECK_KERNEL
2096+
}
2097+
18082098
if (paramb.charge_mode == 1 || paramb.charge_mode == 2 || paramb.charge_mode == 4) {
18092099
find_k_and_G(box.cpu_h);
18102100
find_structure_factor<<<(charge_para.num_kpoints_max - 1) / 64 + 1, 64>>>(
@@ -2107,6 +2397,57 @@ void NEP_Charge::compute_small_box(
21072397
zero_total_charge<<<N, 1024>>>(N, nep_data.charge.data());
21082398
GPU_CHECK_KERNEL
21092399

2400+
if (true) { // TODO
2401+
// get BEC (the diagonal part)
2402+
find_bec_diagonal<<<grid_size, BLOCK_SIZE>>>(
2403+
N,
2404+
nep_data.charge.data(),
2405+
nep_data.bec.data());
2406+
GPU_CHECK_KERNEL
2407+
2408+
// get BEC (radial descriptor part)
2409+
find_bec_radial_small_box<<<grid_size, BLOCK_SIZE>>>(
2410+
paramb,
2411+
annmb,
2412+
N,
2413+
N1,
2414+
N2,
2415+
(paramb.charge_mode >= 4) ? NN_angular.data() : NN_radial.data(),
2416+
(paramb.charge_mode >= 4) ? NL_angular.data() : NL_radial.data(),
2417+
type.data(),
2418+
(paramb.charge_mode >= 4) ? r12.data() + size_x12 * 3 : r12.data(),
2419+
(paramb.charge_mode >= 4) ? r12.data() + size_x12 * 4 : r12.data() + size_x12,
2420+
(paramb.charge_mode >= 4) ? r12.data() + size_x12 * 5 : r12.data() + size_x12 * 2,
2421+
nep_data.charge_derivative.data(),
2422+
nep_data.bec.data());
2423+
GPU_CHECK_KERNEL
2424+
2425+
// get BEC (angular descriptor part)
2426+
find_bec_angular_small_box<<<grid_size, BLOCK_SIZE>>>(
2427+
paramb,
2428+
annmb,
2429+
N,
2430+
N1,
2431+
N2,
2432+
NN_angular.data(),
2433+
NL_angular.data(),
2434+
type.data(),
2435+
r12.data() + size_x12 * 3,
2436+
r12.data() + size_x12 * 4,
2437+
r12.data() + size_x12 * 5,
2438+
nep_data.charge_derivative.data(),
2439+
nep_data.sum_fxyz.data(),
2440+
nep_data.bec.data());
2441+
GPU_CHECK_KERNEL
2442+
2443+
// scale q to q * sqrt(epsilon_inf)
2444+
scale_bec<<<grid_size, BLOCK_SIZE>>>(
2445+
N,
2446+
annmb.sqrt_epsilon_inf,
2447+
nep_data.bec.data());
2448+
GPU_CHECK_KERNEL
2449+
}
2450+
21102451
if (paramb.charge_mode == 1 || paramb.charge_mode == 2 || paramb.charge_mode == 4) {
21112452
find_k_and_G(box.cpu_h);
21122453
find_structure_factor<<<(charge_para.num_kpoints_max - 1) / 64 + 1, 64>>>(
@@ -2386,3 +2727,5 @@ const GPU_Vector<int>& NEP_Charge::get_NN_radial_ptr() { return nep_data.NN_radi
23862727
const GPU_Vector<int>& NEP_Charge::get_NL_radial_ptr() { return nep_data.NL_radial; }
23872728

23882729
GPU_Vector<float>& NEP_Charge::get_charge_reference() { return nep_data.charge; }
2730+
2731+
GPU_Vector<float>& NEP_Charge::get_bec_reference() { return nep_data.bec; }

src/force/nep_charge.cuh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ struct NEP_Charge_Data {
5454
GPU_Vector<float> C6; // C6
5555
GPU_Vector<float> D_C6; // related to dynamic C6
5656
GPU_Vector<float> C6_derivative; // dC6/dq
57+
GPU_Vector<float> bec; // BEC
5758
};
5859

5960
class NEP_Charge : public Potential
@@ -143,6 +144,8 @@ public:
143144

144145
GPU_Vector<float>& get_charge_reference();
145146

147+
GPU_Vector<float>& get_bec_reference();
148+
146149
private:
147150
ParaMB paramb;
148151
ANN annmb;

0 commit comments

Comments
 (0)