Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 42 additions & 15 deletions src/alge/cs_gradient_cuda.cu
Original file line number Diff line number Diff line change
Expand Up @@ -1838,7 +1838,7 @@ cs_reconstruct_vector_gradient_cuda(const cs_mesh_t *m,
// b_face_cells);


// _compute_reconstruct_v_b_face_v2<<<get_gridsize(n_b_faces, blocksize) * 3, blocksize, 0, stream>>>
// _compute_reconstruct_v_b_face_v2<<<get_gridsize(n_b_faces * 3, blocksize), blocksize, 0, stream>>>
// ( n_b_faces * 3,
// coefb_d,
// coefa_d,
Expand All @@ -1849,7 +1849,34 @@ cs_reconstruct_vector_gradient_cuda(const cs_mesh_t *m,
// grad_d,
// b_f_face_normal,
// b_face_cells);

// _compute_reconstruct_v_b_face_new<<<get_gridsize(n_b_faces, blocksize), blocksize, 0, stream>>>
// ( n_b_faces,
// coefb_d,
// coefa_d,
// pvar_d,
// inc,
// diipb,
// r_grad_d,
// grad_d,
// b_f_face_normal,
// b_face_cells,
// cell_b_faces_idx);


_compute_reconstruct_v_b_face_new_v2<<<get_gridsize(n_b_faces * 3, blocksize), blocksize, 0, stream>>>
( n_b_faces * 3,
coefb_d,
coefa_d,
pvar_d,
inc,
diipb,
r_grad_d,
grad_d,
b_f_face_normal,
b_face_cells,
cell_b_faces_idx);

/*************************************Kernels Scatter conflict free************************************/
// _compute_reconstruct_v_b_face_cf<<<get_gridsize(n_b_faces, blocksize), blocksize, 0, stream>>>
// ( n_b_faces,
Expand All @@ -1863,7 +1890,7 @@ cs_reconstruct_vector_gradient_cuda(const cs_mesh_t *m,
// b_f_face_normal,
// b_face_cells);

// _compute_reconstruct_v_b_face_v2_cf<<<get_gridsize(n_b_faces, blocksize) * 3, blocksize, 0, stream>>>
// _compute_reconstruct_v_b_face_v2_cf<<<get_gridsize(n_b_faces * 3, blocksize), blocksize, 0, stream>>>
// ( n_b_faces * 3,
// coefb_d,
// coefa_d,
Expand Down Expand Up @@ -1938,19 +1965,19 @@ cs_reconstruct_vector_gradient_cuda(const cs_mesh_t *m,


/*************************************Kernels Gather shared memory***************************************/
_compute_reconstruct_v_b_face_gather_v5<<<get_gridsize(n_b_cells, blocksize), blocksize, 0, stream>>>
( n_b_cells,
coefb_d,
coefa_d,
pvar_d,
inc,
diipb,
r_grad_d,
grad_d,
b_f_face_normal,
b_cells,
cell_b_faces,
cell_b_faces_idx);
// _compute_reconstruct_v_b_face_gather_v5<<<get_gridsize(n_b_cells, blocksize), blocksize, 0, stream>>>
// ( n_b_cells,
// coefb_d,
// coefa_d,
// pvar_d,
// inc,
// diipb,
// r_grad_d,
// grad_d,
// b_f_face_normal,
// b_cells,
// cell_b_faces,
// cell_b_faces_idx);


CS_CUDA_CHECK(cudaEventRecord(b_faces_2, stream));
Expand Down
120 changes: 120 additions & 0 deletions src/alge/cs_reconstruct_vector_gradient_scatter.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,126 @@ _compute_reconstruct_v_b_face(cs_lnum_t n_b_faces,
}
}



__global__ static void
_compute_reconstruct_v_b_face_new(cs_lnum_t n_b_faces,
const cs_real_33_t *restrict coefbv,
const cs_real_3_t *restrict coefav,
const cs_real_3_t *restrict pvar,
int inc,
const cs_real_3_t *restrict diipb,
const cs_real_33_t *restrict r_grad,
cs_real_33_t *restrict grad,
const cs_real_3_t *restrict b_f_face_normal,
const cs_lnum_t *restrict b_face_cells,
const cs_lnum_t *restrict cell_b_faces_idx)
{
cs_lnum_t f_id = blockIdx.x * blockDim.x + threadIdx.x;

if(f_id >= n_b_faces){
return;
}
cs_lnum_t c_id;
cs_real_t pfac, rfac, vecfac, g_cij;

c_id = b_face_cells[f_id];

int need_atomic = cell_b_faces_idx[c_id+1] - cell_b_faces_idx[c_id] - 1;

for (cs_lnum_t i = 0; i < 3; i++) {

pfac = inc*coefav[f_id][i];

for (cs_lnum_t k = 0; k < 3; k++){
pfac += coefbv[f_id][i][k] * pvar[c_id][k];
}

pfac -= pvar[c_id][i];

/* Reconstruction part */
rfac = 0.;
for (cs_lnum_t k = 0; k < 3; k++) {
vecfac = r_grad[c_id][k][0] * diipb[f_id][0]
+ r_grad[c_id][k][1] * diipb[f_id][1]
+ r_grad[c_id][k][2] * diipb[f_id][2];
rfac += coefbv[f_id][i][k] * vecfac;
}
for (cs_lnum_t j = 0; j < 3; j++){
g_cij = (pfac + rfac) * b_f_face_normal[f_id][j];
if (need_atomic) {
atomicAdd(&grad[c_id][i][j], g_cij);
}
else {
grad[c_id][i][j] += g_cij;
}
}
}
}



__global__ static void
_compute_reconstruct_v_b_face_new_v2(cs_lnum_t n_b_faces,
const cs_real_33_t *restrict coefbv,
const cs_real_3_t *restrict coefav,
const cs_real_3_t *restrict pvar,
int inc,
const cs_real_3_t *restrict diipb,
const cs_real_33_t *restrict r_grad,
cs_real_33_t *restrict grad,
const cs_real_3_t *restrict b_f_face_normal,
const cs_lnum_t *restrict b_face_cells,
const cs_lnum_t *restrict cell_b_faces_idx)
{
cs_lnum_t f_idt = blockIdx.x * blockDim.x + threadIdx.x;

if(f_idt >= n_b_faces){
return;
}

size_t f_id = f_idt / 3;
size_t i = f_idt % 3;

cs_lnum_t c_id;
cs_real_t pond, ktpond, pfac, rfac, vecfac, g_cij;

// if (coupled_faces[f_id * cpl_stride])
// return;

c_id = b_face_cells[f_id];

int need_atomic = cell_b_faces_idx[c_id+1] - cell_b_faces_idx[c_id] - 1;

pfac = inc*coefav[f_id][i];

for (cs_lnum_t k = 0; k < 3; k++){
pfac += coefbv[f_id][i][k] * pvar[c_id][k];
}

pfac -= pvar[c_id][i];

/* Reconstruction part */
rfac = 0.;
for (cs_lnum_t k = 0; k < 3; k++) {
vecfac = r_grad[c_id][k][0] * diipb[f_id][0]
+ r_grad[c_id][k][1] * diipb[f_id][1]
+ r_grad[c_id][k][2] * diipb[f_id][2];
rfac += coefbv[f_id][i][k] * vecfac;
}

for (cs_lnum_t j = 0; j < 3; j++){
g_cij = (pfac + rfac) * b_f_face_normal[f_id][j];
if (need_atomic) {
atomicAdd(&grad[c_id][i][j], g_cij);
}
else {
grad[c_id][i][j] += g_cij;
}
}

}

__global__ static void
_compute_reconstruct_correction(cs_lnum_t n_cells,
cs_lnum_t has_dc,
Expand Down