cholla/src/integrators/VL_2D_cuda.cu at f07c6b799effd442c5933845083afcdbb9a940a7 · cholla-hydro/cholla · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
/*! \file VL_2D_cuda.cu
 *  \brief Definitions of the cuda 2D VL algorithm functions. */

#ifdef VL

  #include <math.h>
  #include <stdio.h>

  #include "../global/global.h"
  #include "../global/global_cuda.h"
  #include "../hydro/hydro_cuda.h"
  #include "../integrators/VL_2D_cuda.h"
  #include "../reconstruction/plm_cuda.h"
  #include "../reconstruction/ppm_cuda.h"
  #include "../reconstruction/reconstruction.h"
  #include "../riemann_solvers/exact_cuda.h"
  #include "../riemann_solvers/hllc_cuda.h"
  #include "../riemann_solvers/roe_cuda.h"
  #include "../utils/gpu.hpp"

__global__ void Update_Conserved_Variables_2D_half(Real *dev_conserved, Real *dev_conserved_half, Real *dev_F_x,
                                                   Real *dev_F_y, int nx, int ny, int n_ghost, Real dx, Real dy,
                                                   Real dt, Real gamma, int n_fields);

void VL_Algorithm_2D_CUDA(Real *d_conserved, int nx, int ny, int x_off, int y_off, int n_ghost, Real dx, Real dy,
                          Real xbound, Real ybound, Real dt, int n_fields, int custom_grav)
{
  // Here, *dev_conserved contains the entire
  // set of conserved variables on the grid
  // concatenated into a 1-d array

  int n_cells             = nx * ny;
  [[maybe_unused]] int nz = 1;
  int ngrid               = (n_cells + TPB - 1) / TPB;

  // set values for GPU kernels
  // number of blocks per 1D grid
  dim3 dim2dGrid(ngrid, 1, 1);
  // number of threads per 1D block
  dim3 dim1dBlock(TPB, 1, 1);

  if (!memory_allocated) {
    // allocate GPU arrays
    // GPU_Error_Check( cudaMalloc((void**)&dev_conserved,
    // n_fields*n_cells*sizeof(Real)) );
    dev_conserved = d_conserved;
    GPU_Error_Check(cudaMalloc((void **)&dev_conserved_half, n_fields * n_cells * sizeof(Real)));
    GPU_Error_Check(cudaMalloc((void **)&Q_Lx, n_fields * n_cells * sizeof(Real)));
    GPU_Error_Check(cudaMalloc((void **)&Q_Rx, n_fields * n_cells * sizeof(Real)));
    GPU_Error_Check(cudaMalloc((void **)&Q_Ly, n_fields * n_cells * sizeof(Real)));
    GPU_Error_Check(cudaMalloc((void **)&Q_Ry, n_fields * n_cells * sizeof(Real)));
    GPU_Error_Check(cudaMalloc((void **)&F_x, n_fields * n_cells * sizeof(Real)));
    GPU_Error_Check(cudaMalloc((void **)&F_y, n_fields * n_cells * sizeof(Real)));

    // If memory is single allocated: memory_allocated becomes true and
    // successive timesteps won't allocate memory. If the memory is not single
    // allocated: memory_allocated remains Null and memory is allocated every
    // timestep.
    memory_allocated = true;
  }

  // Step 1: Use PCM reconstruction to put conserved variables into interface arrays
  // This step has been fused into the Riemann solver kernels

  // Step 2: Calculate first-order upwind fluxes
  #ifdef EXACT
  hipLaunchKernelGGL(HIP_KERNEL_NAME(Calculate_Exact_Fluxes_CUDA<reconstruction::Kind::pcm, 0>), dim2dGrid, dim1dBlock,
                     0, 0, dev_conserved, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_cells, gama, n_fields);
  hipLaunchKernelGGL(HIP_KERNEL_NAME(Calculate_Exact_Fluxes_CUDA<reconstruction::Kind::pcm, 1>), dim2dGrid, dim1dBlock,
                     0, 0, dev_conserved, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_cells, gama, n_fields);
  #endif
  #ifdef ROE
  hipLaunchKernelGGL(HIP_KERNEL_NAME(Calculate_Roe_Fluxes_CUDA<reconstruction::Kind::pcm, 0>), dim2dGrid, dim1dBlock, 0,
                     0, dev_conserved, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_cells, gama, n_fields);
  hipLaunchKernelGGL(HIP_KERNEL_NAME(Calculate_Roe_Fluxes_CUDA<reconstruction::Kind::pcm, 1>), dim2dGrid, dim1dBlock, 0,
                     0, dev_conserved, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_cells, gama, n_fields);
  #endif
  #ifdef HLLC
  hipLaunchKernelGGL(HIP_KERNEL_NAME(Calculate_HLLC_Fluxes_CUDA<reconstruction::Kind::pcm, 0>), dim2dGrid, dim1dBlock,
                     0, 0, dev_conserved, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_cells, gama, n_fields);
  hipLaunchKernelGGL(HIP_KERNEL_NAME(Calculate_HLLC_Fluxes_CUDA<reconstruction::Kind::pcm, 1>), dim2dGrid, dim1dBlock,
                     0, 0, dev_conserved, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_cells, gama, n_fields);
  #endif
  GPU_Error_Check();

  // Step 3: Update the conserved variables half a timestep
  hipLaunchKernelGGL(Update_Conserved_Variables_2D_half, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, dev_conserved_half,
                     F_x, F_y, nx, ny, n_ghost, dx, dy, 0.5 * dt, gama, n_fields);
  GPU_Error_Check();

  // Step 4: Construct left and right interface values using updated conserved
  // variables
  #if defined(PLMP) or defined(PLMC)
  hipLaunchKernelGGL(PLM_cuda<0>, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, dx, dt,
                     gama);
  hipLaunchKernelGGL(PLM_cuda<1>, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, dy, dt,
                     gama);
  #endif  // PLMP or PLMC
  #if defined(PPMP) or defined(PPMC)
  hipLaunchKernelGGL(PPM_cuda<0>, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, dx, dt,
                     gama);
  hipLaunchKernelGGL(PPM_cuda<1>, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, dy, dt,
                     gama);
  #endif  // PPMC
  GPU_Error_Check();

  // Step 5: Calculate the fluxes again
  #ifdef EXACT
  hipLaunchKernelGGL(HIP_KERNEL_NAME(Calculate_Exact_Fluxes_CUDA<reconstruction::Kind::chosen, 0>), dim2dGrid,
                     dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_cells, gama, n_fields);
  hipLaunchKernelGGL(HIP_KERNEL_NAME(Calculate_Exact_Fluxes_CUDA<reconstruction::Kind::chosen, 1>), dim2dGrid,
                     dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_cells, gama, n_fields);
  #endif
  #ifdef ROE
  hipLaunchKernelGGL(HIP_KERNEL_NAME(Calculate_Roe_Fluxes_CUDA<reconstruction::Kind::chosen, 0>), dim2dGrid, dim1dBlock,
                     0, 0, dev_conserved, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_cells, gama, n_fields);
  hipLaunchKernelGGL(HIP_KERNEL_NAME(Calculate_Roe_Fluxes_CUDA<reconstruction::Kind::chosen, 1>), dim2dGrid, dim1dBlock,
                     0, 0, dev_conserved, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_cells, gama, n_fields);
  #endif
  #ifdef HLLC
  hipLaunchKernelGGL(HIP_KERNEL_NAME(Calculate_HLLC_Fluxes_CUDA<reconstruction::Kind::chosen, 0>), dim2dGrid,
                     dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_cells, gama, n_fields);
  hipLaunchKernelGGL(HIP_KERNEL_NAME(Calculate_HLLC_Fluxes_CUDA<reconstruction::Kind::chosen, 1>), dim2dGrid,
                     dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_cells, gama, n_fields);
  #endif
  GPU_Error_Check();

  #ifdef DE
  // Compute the divergence of velocity before updating the conserved array,
  // this solves synchronization issues when adding this term on
  // Update_Conserved_Variables
  hipLaunchKernelGGL(Partial_Update_Advected_Internal_Energy_2D, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx,
                     Q_Ly, Q_Ry, nx, ny, n_ghost, dx, dy, dt, gama, n_fields);
  #endif

  // Step 6: Update the conserved variable array
  hipLaunchKernelGGL(Update_Conserved_Variables_2D, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, F_x, F_y, nx, ny, x_off,
                     y_off, n_ghost, dx, dy, xbound, ybound, dt, gama, n_fields, custom_grav);
  GPU_Error_Check();

  #ifdef DE
  hipLaunchKernelGGL(Select_Internal_Energy_2D, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, n_ghost, n_fields);
  hipLaunchKernelGGL(Sync_Energies_2D, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, n_ghost, gama, n_fields);
  GPU_Error_Check();
  #endif

  return;
}

void Free_Memory_VL_2D()
{
  // free the GPU memory
  cudaFree(dev_conserved);
  cudaFree(dev_conserved_half);
  cudaFree(Q_Lx);
  cudaFree(Q_Rx);
  cudaFree(Q_Ly);
  cudaFree(Q_Ry);
  cudaFree(F_x);
  cudaFree(F_y);
}

__global__ void Update_Conserved_Variables_2D_half(Real *dev_conserved, Real *dev_conserved_half, Real *dev_F_x,
                                                   Real *dev_F_y, int nx, int ny, int n_ghost, Real dx, Real dy,
                                                   Real dt, Real gamma, int n_fields)
{
  int id, xid, yid, n_cells;
  int imo, jmo;

  Real dtodx = dt / dx;
  Real dtody = dt / dy;

  n_cells = nx * ny;

  // get a global thread ID
  int blockId = blockIdx.x + blockIdx.y * gridDim.x;
  id          = threadIdx.x + blockId * blockDim.x;
  yid         = id / nx;
  xid         = id - yid * nx;

  #ifdef DE
  Real d, d_inv, vx, vy, vz;
  Real vx_imo, vx_ipo, vy_jmo, vy_jpo, P;
  int ipo, jpo;
  #endif

  // all threads but one outer ring of ghost cells
  if (xid > 0 && xid < nx - 1 && yid > 0 && yid < ny - 1) {
    imo = xid - 1 + yid * nx;
    jmo = xid + (yid - 1) * nx;
  #ifdef DE
    d     = dev_conserved[id];
    d_inv = 1.0 / d;
    vx    = dev_conserved[1 * n_cells + id] * d_inv;
    vy    = dev_conserved[2 * n_cells + id] * d_inv;
    vz    = dev_conserved[3 * n_cells + id] * d_inv;
    P     = (dev_conserved[4 * n_cells + id] - 0.5 * d * (vx * vx + vy * vy + vz * vz)) * (gamma - 1.0);
    // if (d < 0.0 || d != d) printf("Negative density before half step
    // update.\n"); if (P < 0.0) printf("%d Negative pressure before half step
    // update.\n", id);
    ipo    = xid + 1 + yid * nx;
    jpo    = xid + (yid + 1) * nx;
    vx_imo = dev_conserved[1 * n_cells + imo] / dev_conserved[imo];
    vx_ipo = dev_conserved[1 * n_cells + ipo] / dev_conserved[ipo];
    vy_jmo = dev_conserved[2 * n_cells + jmo] / dev_conserved[jmo];
    vy_jpo = dev_conserved[2 * n_cells + jpo] / dev_conserved[jpo];
  #endif
    // update the conserved variable array
    dev_conserved_half[id] =
        dev_conserved[id] + dtodx * (dev_F_x[imo] - dev_F_x[id]) + dtody * (dev_F_y[jmo] - dev_F_y[id]);
    dev_conserved_half[n_cells + id] = dev_conserved[n_cells + id] +
                                       dtodx * (dev_F_x[n_cells + imo] - dev_F_x[n_cells + id]) +
                                       dtody * (dev_F_y[n_cells + jmo] - dev_F_y[n_cells + id]);
    dev_conserved_half[2 * n_cells + id] = dev_conserved[2 * n_cells + id] +
                                           dtodx * (dev_F_x[2 * n_cells + imo] - dev_F_x[2 * n_cells + id]) +
                                           dtody * (dev_F_y[2 * n_cells + jmo] - dev_F_y[2 * n_cells + id]);
    dev_conserved_half[3 * n_cells + id] = dev_conserved[3 * n_cells + id] +
                                           dtodx * (dev_F_x[3 * n_cells + imo] - dev_F_x[3 * n_cells + id]) +
                                           dtody * (dev_F_y[3 * n_cells + jmo] - dev_F_y[3 * n_cells + id]);
    dev_conserved_half[4 * n_cells + id] = dev_conserved[4 * n_cells + id] +
                                           dtodx * (dev_F_x[4 * n_cells + imo] - dev_F_x[4 * n_cells + id]) +
                                           dtody * (dev_F_y[4 * n_cells + jmo] - dev_F_y[4 * n_cells + id]);
  #ifdef SCALAR
    for (int i = 0; i < NSCALARS; i++) {
      dev_conserved_half[(5 + i) * n_cells + id] =
          dev_conserved[(5 + i) * n_cells + id] +
          dtodx * (dev_F_x[(5 + i) * n_cells + imo] - dev_F_x[(5 + i) * n_cells + id]) +
          dtody * (dev_F_y[(5 + i) * n_cells + jmo] - dev_F_y[(5 + i) * n_cells + id]);
    }
  #endif
  #ifdef DE
    dev_conserved_half[(n_fields - 1) * n_cells + id] =
        dev_conserved[(n_fields - 1) * n_cells + id] +
        dtodx * (dev_F_x[(n_fields - 1) * n_cells + imo] - dev_F_x[(n_fields - 1) * n_cells + id]) +
        dtody * (dev_F_y[(n_fields - 1) * n_cells + jmo] - dev_F_y[(n_fields - 1) * n_cells + id]) +
        0.5 * P * (dtodx * (vx_imo - vx_ipo) + dtody * (vy_jmo - vy_jpo));
  #endif
  }
}

#endif  // VL