-
Notifications
You must be signed in to change notification settings - Fork 39
Expand file tree
/
Copy pathsimple_2D_cuda.cu
More file actions
119 lines (106 loc) · 5.29 KB
/
simple_2D_cuda.cu
File metadata and controls
119 lines (106 loc) · 5.29 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
/*! \file simple_2D_cuda.cu
* \brief Definitions of the cuda 2D simple algorithm functions. */
#include <math.h>
#include <stdio.h>
#include "../global/global.h"
#include "../global/global_cuda.h"
#include "../hydro/hydro_cuda.h"
#include "../integrators/simple_2D_cuda.h"
#include "../reconstruction/plm_cuda.h"
#include "../reconstruction/ppm_cuda.h"
#include "../reconstruction/reconstruction.h"
#include "../riemann_solvers/exact_cuda.h"
#include "../riemann_solvers/hllc_cuda.h"
#include "../riemann_solvers/roe_cuda.h"
#include "../utils/gpu.hpp"
void Simple_Algorithm_2D_CUDA(Real *d_conserved, int nx, int ny, int x_off, int y_off, int n_ghost, Real dx, Real dy,
Real xbound, Real ybound, Real dt, int n_fields, int custom_grav)
{
// Here, *dev_conserved contains the entire
// set of conserved variables on the grid
// concatenated into a 1-d array
int n_cells = nx * ny;
[[maybe_unused]] int nz = 1;
int ngrid = (n_cells + TPB - 1) / TPB;
// set values for GPU kernels
// number of blocks per 1D grid
dim3 dim2dGrid(ngrid, 1, 1);
// number of threads per 1D block
dim3 dim1dBlock(TPB, 1, 1);
if (!memory_allocated) {
// allocate memory on the GPU
dev_conserved = d_conserved;
// GPU_Error_Check( cudaMalloc((void**)&dev_conserved,
// n_fields*n_cells*sizeof(Real)) );
GPU_Error_Check(cudaMalloc((void **)&Q_Lx, n_fields * n_cells * sizeof(Real)));
GPU_Error_Check(cudaMalloc((void **)&Q_Rx, n_fields * n_cells * sizeof(Real)));
GPU_Error_Check(cudaMalloc((void **)&Q_Ly, n_fields * n_cells * sizeof(Real)));
GPU_Error_Check(cudaMalloc((void **)&Q_Ry, n_fields * n_cells * sizeof(Real)));
GPU_Error_Check(cudaMalloc((void **)&F_x, n_fields * n_cells * sizeof(Real)));
GPU_Error_Check(cudaMalloc((void **)&F_y, n_fields * n_cells * sizeof(Real)));
// If memory is single allocated: memory_allocated becomes true and
// successive timesteps won't allocate memory. If the memory is not single
// allocated: memory_allocated remains Null and memory is allocated every
// timestep.
memory_allocated = true;
}
// Step 1: Do the reconstruction
#if defined(PLMP) or defined(PLMC)
hipLaunchKernelGGL(PLM_cuda<0>, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama);
hipLaunchKernelGGL(PLM_cuda<1>, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, gama);
#endif // PLMP or PLMC
#if defined(PPMP) or defined(PPMC)
hipLaunchKernelGGL(PPM_cuda<0>, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama);
hipLaunchKernelGGL(PPM_cuda<1>, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, gama);
#endif
GPU_Error_Check();
// Step 2: Calculate the fluxes
#ifdef EXACT
hipLaunchKernelGGL(HIP_KERNEL_NAME(Calculate_Exact_Fluxes_CUDA<reconstruction::Kind::chosen, 0>), dim2dGrid,
dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_cells, gama, n_fields);
hipLaunchKernelGGL(HIP_KERNEL_NAME(Calculate_Exact_Fluxes_CUDA<reconstruction::Kind::chosen, 1>), dim2dGrid,
dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_cells, gama, n_fields);
#endif
#ifdef ROE
hipLaunchKernelGGL(HIP_KERNEL_NAME(Calculate_Roe_Fluxes_CUDA<reconstruction::Kind::chosen, 0>), dim2dGrid, dim1dBlock,
0, 0, dev_conserved, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_cells, gama, n_fields);
hipLaunchKernelGGL(HIP_KERNEL_NAME(Calculate_Roe_Fluxes_CUDA<reconstruction::Kind::chosen, 1>), dim2dGrid, dim1dBlock,
0, 0, dev_conserved, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_cells, gama, n_fields);
#endif
#ifdef HLLC
hipLaunchKernelGGL(HIP_KERNEL_NAME(Calculate_HLLC_Fluxes_CUDA<reconstruction::Kind::chosen, 0>), dim2dGrid,
dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_cells, gama, n_fields);
hipLaunchKernelGGL(HIP_KERNEL_NAME(Calculate_HLLC_Fluxes_CUDA<reconstruction::Kind::chosen, 1>), dim2dGrid,
dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_cells, gama, n_fields);
#endif
GPU_Error_Check();
#ifdef DE
// Compute the divergence of Vel before updating the conserved array, this
// solves synchronization issues when adding this term on
// Update_Conserved_Variables
hipLaunchKernelGGL(Partial_Update_Advected_Internal_Energy_2D, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx,
Q_Ly, Q_Ry, nx, ny, n_ghost, dx, dy, dt, gama, n_fields);
#endif
// Step 3: Update the conserved variable array
hipLaunchKernelGGL(Update_Conserved_Variables_2D, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, F_x, F_y, nx, ny, x_off,
y_off, n_ghost, dx, dy, xbound, ybound, dt, gama, n_fields, custom_grav);
GPU_Error_Check();
// Synchronize the total and internal energy
#ifdef DE
hipLaunchKernelGGL(Select_Internal_Energy_2D, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, n_ghost, n_fields);
hipLaunchKernelGGL(Sync_Energies_2D, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, n_ghost, gama, n_fields);
GPU_Error_Check();
#endif
return;
}
void Free_Memory_Simple_2D()
{
// free the GPU memory
cudaFree(dev_conserved);
cudaFree(Q_Lx);
cudaFree(Q_Rx);
cudaFree(Q_Ly);
cudaFree(Q_Ry);
cudaFree(F_x);
cudaFree(F_y);
}