From 2fcbb5682ce8c280c276a3a9d8775e8569183f42 Mon Sep 17 00:00:00 2001 From: alserene Date: Tue, 15 Oct 2024 12:38:30 +1100 Subject: [PATCH 001/145] First pass at filtering CUDA kernel. --- build_cffi.py | 13 +- src/py21cmfast/src/InitialConditions.c | 4 + src/py21cmfast/src/filtering.c | 12 +- src/py21cmfast/src/filtering.cu | 219 +++++++++++++++++++++++++ src/py21cmfast/src/filtering.h | 1 + src/py21cmfast/src/hello_world.cu | 11 ++ src/py21cmfast/src/hello_world.h | 6 + 7 files changed, 257 insertions(+), 9 deletions(-) create mode 100644 src/py21cmfast/src/filtering.cu create mode 100644 src/py21cmfast/src/hello_world.cu create mode 100644 src/py21cmfast/src/hello_world.h diff --git a/build_cffi.py b/build_cffi.py index 75ee9b8e2..134220815 100755 --- a/build_cffi.py +++ b/build_cffi.py @@ -27,6 +27,10 @@ if f.endswith(".c") ] +# compiled cuda code +extra_objects = [os.path.join(CLOC, "hello_world.o"), os.path.join(CLOC, "filtering.o")] +extra_link_args = ["-lcudart"] + # Set the C-code logging level. # If DEBUG is set, we default to the highest level, but if not, # we set it to the level just above no logging at all. @@ -72,9 +76,14 @@ libraries = ["m", "gsl", "gslcblas", "fftw3f_omp", "fftw3f"] +# GPU fft libraries +# if True: +# libraries += ["cufft", "cufftw"] + # stuff for gperftools if "PROFILE" in os.environ: - libraries += ["profiler", "tcmalloc"] + # libraries += ["profiler", "tcmalloc"] + libraries += ["profiler"] # we need this even if DEBUG is off extra_compile_args += ["-g"] @@ -110,6 +119,8 @@ library_dirs=library_dirs, libraries=libraries, extra_compile_args=extra_compile_args, + extra_objects=extra_objects, + extra_link_args=extra_link_args, ) # Header files containing types, globals and function prototypes diff --git a/src/py21cmfast/src/InitialConditions.c b/src/py21cmfast/src/InitialConditions.c index 046d2f671..fbbd5dc98 100644 --- a/src/py21cmfast/src/InitialConditions.c +++ b/src/py21cmfast/src/InitialConditions.c @@ -19,6 +19,7 @@ #include "indexing.h" #include "dft.h" #include "filtering.h" +#include "hello_world.h" #include "InitialConditions.h" @@ -153,6 +154,9 @@ int ComputeInitialConditions( int status; + printf("Begin computing initial conditions"); + call_cuda(); + Try{ // This Try wraps the entire function so we don't indent. // Makes the parameter structs visible to a variety of functions/macros diff --git a/src/py21cmfast/src/filtering.c b/src/py21cmfast/src/filtering.c index 65c48912e..49827bbb7 100644 --- a/src/py21cmfast/src/filtering.c +++ b/src/py21cmfast/src/filtering.c @@ -81,11 +81,11 @@ void filter_box(fftwf_complex *box, int RES, int filter_type, float R, float R_p switch(RES) { case 0: dimension = user_params_global->DIM; - midpoint = MIDDLE; + midpoint = MIDDLE; // DIM / 2 break; case 1: dimension = user_params_global->HII_DIM; - midpoint = HII_MIDDLE; + midpoint = HII_MIDDLE; // HII_DIM / 2 break; default: LOG_ERROR("Resolution for filter functions must be 0(DIM) or 1(HII_DIM)"); @@ -105,6 +105,7 @@ void filter_box(fftwf_complex *box, int RES, int filter_type, float R, float R_p int n_x, n_z, n_y; float k_x, k_y, k_z, k_mag_sq, kR; unsigned long long grid_index; + #pragma omp for for (n_x=0; n_xmidpoint) {k_x =(n_x-dimension) * DELTA_K;} @@ -118,6 +119,7 @@ void filter_box(fftwf_complex *box, int RES, int filter_type, float R, float R_p k_z = n_z * DELTA_K_PARA; k_mag_sq = k_x*k_x + k_y*k_y + k_z*k_z; + // Get index of flattened 3d array grid_index = RES==1 ? HII_C_INDEX(n_x, n_y, n_z) : C_INDEX(n_x, n_y, n_z); if (filter_type == 0){ // real space top-hat @@ -125,20 +127,14 @@ void filter_box(fftwf_complex *box, int RES, int filter_type, float R, float R_p box[grid_index] *= real_tophat_filter(kR); } else if (filter_type == 1){ // k-space top hat - //NOTE: why was this commented???? - // This is actually (kR^2) but since we zero the value and find kR > 1 this is more computationally efficient - // kR = 0.17103765852*( k_x*k_x + k_y*k_y + k_z*k_z )*R*R; kR = sqrt(k_mag_sq)*R; box[grid_index] *= sharp_k_filter(kR); } else if (filter_type == 2){ // gaussian - // This is actually (kR^2) but since we zero the value and find kR > 1 this is more computationally efficient kR = k_mag_sq*R*R; box[grid_index] *= gaussian_filter(kR); } - //The next two filters are not given by the HII_FILTER global, but used for specific grids else if (filter_type == 3){ // exponentially decaying tophat, param == scale of decay (MFP) - //NOTE: This should be optimized, I havne't looked at it in a while box[grid_index] *= exp_mfp_filter(sqrt(k_mag_sq),R,R_param,R_const); } else if (filter_type == 4){ //spherical shell, R_param == inner radius diff --git a/src/py21cmfast/src/filtering.cu b/src/py21cmfast/src/filtering.cu new file mode 100644 index 000000000..b6361405b --- /dev/null +++ b/src/py21cmfast/src/filtering.cu @@ -0,0 +1,219 @@ +#include +#include +#include +#include +#include + +// GPU +#include +#include +#include +// #include +// #include + +#include "cexcept.h" +#include "exceptions.h" +#include "logger.h" +#include "Constants.h" +#include "InputParameters.h" +#include "indexing.h" +#include "dft.h" + +__device__ inline double real_tophat_filter(double kR) { + // Second order taylor expansion around kR==0 + if (kR < 1e-4) + return 1 - kR*kR/10; + return 3.0*pow(kR, -3) * (sin(kR) - cos(kR)*kR); +} + +__device__ inline double sharp_k_filter(double kR) { + if (kR * 0.413566994 > 1) + return 0.; + return 1; +} + +__device__ inline double gaussian_filter(double kR_squared) { + return exp(-0.643 * 0.643 * kR_squared / 2.); +} + +__device__ inline double exp_mfp_filter(double k, double R, double mfp, double exp_term) { + double f; + double kR = k * R; + double ratio = mfp / R; + + // Second order taylor expansion around kR==0 + if (kR < 1e-4) { + double ts_0 = 6 * pow(ratio, 3) - exp_term * (6 * pow(ratio, 3) + 6 * pow(ratio, 2) + 3 * ratio); + return ts_0 + (exp_term * (2 * pow(ratio, 2) + 0.5 * ratio) - 2 * ts_0 * pow(ratio, 2)) * kR * kR; + } + // Davies & Furlanetto MFP-eps(r) window function + f = (kR * kR * pow(ratio, 2) + 2 * ratio + 1) * ratio * cos(kR); + f += (kR * kR * (pow(ratio, 2) - pow(ratio, 3)) + ratio + 1) * sin(kR) / kR; + f *= exp_term; + f -= 2 * pow(ratio, 2); + f *= -3 * ratio/pow(pow(kR * ratio, 2) + 1, 2); + return f; +} + +__device__ inline double spherical_shell_filter(double k, double R_outer, double R_inner) { + double kR_inner = k * R_inner; + double kR_outer = k * R_outer; + + // Second order taylor expansion around kR_outer==0 + if (kR_outer < 1e-4) + return 1. - kR_outer*kR_outer / 10 * \ + (pow(R_inner / R_outer, 5) - 1) / \ + (pow(R_inner / R_outer, 3) - 1); + + return 3.0 / (pow(kR_outer, 3) - pow(kR_inner, 3)) \ + * (sin(kR_outer) - cos(kR_outer) * kR_outer \ + - sin(kR_inner) + cos(kR_inner) * kR_inner); +} + +// __global__ void filter_box_kernel(fftwf_complex *box, int dimension, int midpoint, int midpoint_para, double delta_k, float R, float R_param, double R_const, int filter_type) { +__global__ void filter_box_kernel(cuFloatComplex *box, int dimension, int midpoint, int midpoint_para, double delta_k, float R, float R_param, double R_const, int filter_type) { + + // Get index of d_box (flattened k-box) + unsigned long long idx = blockIdx.x * blockDim.x + threadIdx.x; + + float kR; + + // Compute the 3D indices (n_x, n_y, n_z) for the k-box from the flattened index (idx) + int n_z = idx % (midpoint_para + 1); + unsigned long long remaining = idx / (midpoint_para + 1); // Calculate remaining index + int n_y = remaining % dimension; + int n_x = remaining / dimension; + + // Compute wave vector components + float k_x = (n_x - dimension * (n_x > midpoint)) * delta_k; // Wrap around midpoint + float k_y = (n_y - dimension * (n_y > midpoint)) * delta_k; + float k_z = n_z * delta_k; + + // Compute squared magnitude of wave vector + float k_mag_sq = k_x*k_x + k_y*k_y + k_z*k_z; + + if (filter_type == 0) { // real space top-hat + kR = sqrt(k_mag_sq) * R; + // box[idx] *= real_tophat_filter(kR); + // box[idx] = cuCmul(box[idx], real_tophat_filter(kR)); + box[idx] = cuCmulf(box[idx], make_cuFloatComplex((float)real_tophat_filter(kR), 0.f)); + } + else if (filter_type == 1) { // k-space top hat + kR = sqrt(k_mag_sq) * R; + // box[idx] *= sharp_k_filter(kR); + box[idx] = cuCmulf(box[idx], make_cuFloatComplex((float)sharp_k_filter(kR), 0.f)); + } + else if (filter_type == 2) { // gaussian + kR = k_mag_sq * R * R; + // box[idx] *= gaussian_filter(kR); + box[idx] = cuCmulf(box[idx], make_cuFloatComplex((float)gaussian_filter(kR), 0.f)); + } + else if (filter_type == 3) { // exponentially decaying tophat + // box[idx] *= exp_mfp_filter(sqrt(k_mag_sq), R, R_param, R_const); + box[idx] = cuCmulf(box[idx], make_cuFloatComplex((float)exp_mfp_filter(sqrt(k_mag_sq), R, R_param, R_const), 0.f)); + } + else if (filter_type == 4) { //spherical shell + // box[idx] *= spherical_shell_filter(sqrt(k_mag_sq), R, R_param); + box[idx] = cuCmulf(box[idx], make_cuFloatComplex((float)spherical_shell_filter(sqrt(k_mag_sq), R, R_param), 0.f)); + } + else { + if (idx == 0) { + LOG_WARNING("Filter type %i is undefined. Box is unfiltered.", filter_type); + } + } + +} + +// *box is a pointer, so only memory address is passed, not entire array +void filter_box(fftwf_complex *box, int RES, int filter_type, float R, float R_param) { + + // Get required values + int dimension, midpoint, midpoint_para, num_pixels; + switch(RES) { + case 0: + dimension = user_params_global->DIM; + midpoint = MIDDLE; // DIM / 2 + midpoint_para = MID_PARA; // NON_CUBIC_FACTOR * HII_DIM / 2 + num_pixels = KSPACE_NUM_PIXELS; + break; + case 1: + dimension = user_params_global->HII_DIM; + midpoint = HII_MIDDLE; // HII_DIM / 2 + midpoint_para = HII_MID_PARA; // NON_CUBIC_FACTOR * HII_DIM / 2 + num_pixels = HII_KSPACE_NUM_PIXELS; + break; + default: + LOG_ERROR("Resolution for filter functions must be 0(DIM) or 1(HII_DIM)"); + Throw(ValueError); + break; + } + double delta_k = DELTA_K; + double R_const; + if (filter_type == 3) { + R_const = exp(-R / R_param); + } + + // Get size of flattened array + size_t size = num_pixels * sizeof(fftwf_complex); + + // Allocate device memory + fftwf_complex* d_box; + cudaMalloc(&d_box, size); + + // Copy array from host to device + cudaMemcpy(d_box, box, size, cudaMemcpyHostToDevice); + + // Invoke kernel + int threadsPerBlock = 256; + int numBlocks = (num_pixels + threadsPerBlock - 1) / threadsPerBlock; + filter_box_kernel<<>>(reinterpret_cast(d_box), dimension, midpoint, midpoint_para, delta_k, R, R_param, R_const, filter_type); + + // Copy results from device to host + cudaMemcpy(box, d_box, size, cudaMemcpyDeviceToHost); + + // Deallocate device memory + cudaFree(d_box); +} + +// Test function to filter a box without computing a whole output box +int test_filter(UserParams *user_params, CosmoParams *cosmo_params, AstroParams *astro_params, FlagOptions *flag_options + , float *input_box, double R, double R_param, int filter_flag, double *result) { + int i,j,k; + unsigned long long int ii; + + Broadcast_struct_global_all(user_params, cosmo_params, astro_params, flag_options); + + //setup the box + fftwf_complex *box_unfiltered = (fftwf_complex *) fftwf_malloc(sizeof(fftwf_complex)*HII_KSPACE_NUM_PIXELS); + fftwf_complex *box_filtered = (fftwf_complex *) fftwf_malloc(sizeof(fftwf_complex)*HII_KSPACE_NUM_PIXELS); + + for (i=0; iHII_DIM; i++) + for (j=0; jHII_DIM; j++) + for (k=0; kUSE_FFTW_WISDOM, user_params->HII_DIM, HII_D_PARA, user_params->N_THREADS, box_unfiltered); + + float num_pixels = HII_TOT_NUM_PIXELS; + for(ii=0;iiUSE_FFTW_WISDOM, user_params->HII_DIM, HII_D_PARA, user_params->N_THREADS, box_filtered); + + for (i=0; iHII_DIM; i++) + for (j=0; jHII_DIM; j++) + for (k=0; k +#include void filter_box(fftwf_complex *box, int RES, int filter_type, float R, float R_param); int test_filter(UserParams *user_params, CosmoParams *cosmo_params, AstroParams *astro_params, FlagOptions *flag_options diff --git a/src/py21cmfast/src/hello_world.cu b/src/py21cmfast/src/hello_world.cu new file mode 100644 index 000000000..0d9e4a0fa --- /dev/null +++ b/src/py21cmfast/src/hello_world.cu @@ -0,0 +1,11 @@ +#include + +__global__ void hello_kernel() { + printf("Hello World from GPU! BlockIdx: %d, ThreadIdx: %d\n", blockIdx.x, threadIdx.x); +} + +int call_cuda() { + hello_kernel<<<3, 3>>>(); + cudaDeviceSynchronize(); + return 0; +} \ No newline at end of file diff --git a/src/py21cmfast/src/hello_world.h b/src/py21cmfast/src/hello_world.h new file mode 100644 index 000000000..d11e30a1a --- /dev/null +++ b/src/py21cmfast/src/hello_world.h @@ -0,0 +1,6 @@ +#ifndef _HELLO_WORLD_H +#define _HELLO_WORLD_H + +int call_cuda(); + +#endif // HELLO_WORLD_H \ No newline at end of file From 1ad9912a63ffe0e6487f1a71e4193c307cdd8f70 Mon Sep 17 00:00:00 2001 From: alserene Date: Thu, 17 Oct 2024 13:19:33 +1100 Subject: [PATCH 002/145] Wrap header files in extern C for C linkage. --- build_cffi.py | 1 + src/py21cmfast/src/BrightnessTemperatureBox.h | 6 +++ src/py21cmfast/src/HaloBox.h | 6 +++ src/py21cmfast/src/HaloField.h | 6 +++ src/py21cmfast/src/InitialConditions.h | 6 +++ src/py21cmfast/src/InputParameters.h | 6 +++ src/py21cmfast/src/IonisationBox.h | 6 +++ src/py21cmfast/src/LuminosityFunction.h | 6 +++ src/py21cmfast/src/PerturbField.h | 6 +++ src/py21cmfast/src/PerturbHaloField.h | 6 +++ src/py21cmfast/src/SpinTemperatureBox.h | 6 +++ src/py21cmfast/src/Stochasticity.h | 6 +++ src/py21cmfast/src/_inputparams_wrapper.h | 8 ++++ src/py21cmfast/src/_outputstructs_wrapper.h | 7 ++++ src/py21cmfast/src/bubble_helper_progs.h | 6 +++ src/py21cmfast/src/cosmology.h | 6 +++ src/py21cmfast/src/debugging.h | 6 +++ src/py21cmfast/src/dft.c | 6 +++ src/py21cmfast/src/dft.h | 6 +++ src/py21cmfast/src/elec_interp.h | 7 ++++ src/py21cmfast/src/exceptions.h | 6 +++ src/py21cmfast/src/filtering.c | 25 ++++++++++-- src/py21cmfast/src/filtering.cu | 38 ++++++++++++------- src/py21cmfast/src/filtering.h | 12 ++++++ src/py21cmfast/src/heating_helper_progs.h | 6 +++ src/py21cmfast/src/hello_world.h | 7 ++++ src/py21cmfast/src/hmf.h | 6 +++ src/py21cmfast/src/interp_tables.h | 6 +++ src/py21cmfast/src/interpolation.h | 6 +++ src/py21cmfast/src/logger.h | 7 ++++ src/py21cmfast/src/photoncons.h | 6 +++ src/py21cmfast/src/recombinations.h | 6 +++ src/py21cmfast/src/subcell_rsds.h | 6 +++ src/py21cmfast/src/thermochem.h | 6 +++ 34 files changed, 245 insertions(+), 17 deletions(-) diff --git a/build_cffi.py b/build_cffi.py index 134220815..4767a518f 100755 --- a/build_cffi.py +++ b/build_cffi.py @@ -28,6 +28,7 @@ ] # compiled cuda code +# extra_objects = [os.path.join(CLOC, "hello_world.o")] extra_objects = [os.path.join(CLOC, "hello_world.o"), os.path.join(CLOC, "filtering.o")] extra_link_args = ["-lcudart"] diff --git a/src/py21cmfast/src/BrightnessTemperatureBox.h b/src/py21cmfast/src/BrightnessTemperatureBox.h index 449f2c775..17875177e 100644 --- a/src/py21cmfast/src/BrightnessTemperatureBox.h +++ b/src/py21cmfast/src/BrightnessTemperatureBox.h @@ -5,9 +5,15 @@ #include "InputParameters.h" #include "OutputStructs.h" +#ifdef __cplusplus +extern "C" { +#endif int ComputeBrightnessTemp(float redshift, UserParams *user_params, CosmoParams *cosmo_params, AstroParams *astro_params, FlagOptions *flag_options, TsBox *spin_temp, IonizedBox *ionized_box, PerturbedField *perturb_field, BrightnessTemp *box); +#ifdef __cplusplus +} +#endif #endif diff --git a/src/py21cmfast/src/HaloBox.h b/src/py21cmfast/src/HaloBox.h index e368d2e46..dbf47a7c3 100644 --- a/src/py21cmfast/src/HaloBox.h +++ b/src/py21cmfast/src/HaloBox.h @@ -11,6 +11,9 @@ #include "InputParameters.h" #include "OutputStructs.h" +#ifdef __cplusplus +extern "C" { +#endif //Compute the HaloBox Object int ComputeHaloBox(double redshift, UserParams *user_params, CosmoParams *cosmo_params, AstroParams *astro_params, FlagOptions * flag_options, InitialConditions *ini_boxes, PerturbedField * perturbed_field, PerturbHaloField *halos, @@ -21,4 +24,7 @@ int test_halo_props(double redshift, UserParams *user_params, CosmoParams *cosmo FlagOptions * flag_options, float * vcb_grid, float *J21_LW_grid, float *z_re_grid, float *Gamma12_ion_grid, PerturbHaloField *halos, float *halo_props_out); +#ifdef __cplusplus +} +#endif #endif diff --git a/src/py21cmfast/src/HaloField.h b/src/py21cmfast/src/HaloField.h index 09ce507d4..a3116cd6d 100644 --- a/src/py21cmfast/src/HaloField.h +++ b/src/py21cmfast/src/HaloField.h @@ -5,9 +5,15 @@ #include "InputParameters.h" #include "OutputStructs.h" +#ifdef __cplusplus +extern "C" { +#endif int ComputeHaloField(float redshift_desc, float redshift, UserParams *user_params, CosmoParams *cosmo_params, AstroParams *astro_params, FlagOptions *flag_options, InitialConditions *boxes, unsigned long long int random_seed, HaloField * halos_desc, HaloField *halos); +#ifdef __cplusplus +} +#endif #endif diff --git a/src/py21cmfast/src/InitialConditions.h b/src/py21cmfast/src/InitialConditions.h index 181bd0868..a3f8a5f00 100644 --- a/src/py21cmfast/src/InitialConditions.h +++ b/src/py21cmfast/src/InitialConditions.h @@ -6,6 +6,9 @@ #include "OutputStructs.h" #include +#ifdef __cplusplus +extern "C" { +#endif int ComputeInitialConditions( unsigned long long random_seed, UserParams *user_params, CosmoParams *cosmo_params, InitialConditions *boxes @@ -14,4 +17,7 @@ int ComputeInitialConditions( void seed_rng_threads(gsl_rng * rng_arr[], unsigned long long int seed); void free_rng_threads(gsl_rng * rng_arr[]); +#ifdef __cplusplus +} +#endif #endif diff --git a/src/py21cmfast/src/InputParameters.h b/src/py21cmfast/src/InputParameters.h index b4c104197..c9e979c14 100644 --- a/src/py21cmfast/src/InputParameters.h +++ b/src/py21cmfast/src/InputParameters.h @@ -6,7 +6,13 @@ // Since it is unguarded, make sure to ONLY include this file from here #include "_inputparams_wrapper.h" +#ifdef __cplusplus +extern "C" { +#endif void Broadcast_struct_global_all(UserParams *user_params, CosmoParams *cosmo_params, AstroParams *astro_params, FlagOptions *flag_options); void Broadcast_struct_global_noastro(UserParams *user_params, CosmoParams *cosmo_params); +#ifdef __cplusplus +} +#endif #endif diff --git a/src/py21cmfast/src/IonisationBox.h b/src/py21cmfast/src/IonisationBox.h index 302e69fa3..03fe6ed8b 100644 --- a/src/py21cmfast/src/IonisationBox.h +++ b/src/py21cmfast/src/IonisationBox.h @@ -4,6 +4,9 @@ #include "InputParameters.h" #include "OutputStructs.h" +#ifdef __cplusplus +extern "C" { +#endif int ComputeIonizedBox(float redshift, float prev_redshift, UserParams *user_params, CosmoParams *cosmo_params, AstroParams *astro_params, FlagOptions *flag_options, PerturbedField *perturbed_field, PerturbedField *previous_perturbed_field, @@ -11,4 +14,7 @@ int ComputeIonizedBox(float redshift, float prev_redshift, UserParams *user_para HaloBox *halos, InitialConditions *ini_boxes, IonizedBox *box); +#ifdef __cplusplus +} +#endif #endif diff --git a/src/py21cmfast/src/LuminosityFunction.h b/src/py21cmfast/src/LuminosityFunction.h index cf7fb8417..e639f2e45 100644 --- a/src/py21cmfast/src/LuminosityFunction.h +++ b/src/py21cmfast/src/LuminosityFunction.h @@ -3,7 +3,13 @@ #include "InputParameters.h" +#ifdef __cplusplus +extern "C" { +#endif int ComputeLF(int nbins, UserParams *user_params, CosmoParams *cosmo_params, AstroParams *astro_params, FlagOptions *flag_options, int component, int NUM_OF_REDSHIFT_FOR_LF, float *z_LF, float *M_TURNs, double *M_uv_z, double *M_h_z, double *log10phi); +#ifdef __cplusplus +} +#endif #endif diff --git a/src/py21cmfast/src/PerturbField.h b/src/py21cmfast/src/PerturbField.h index 2a2da997d..50550aec1 100644 --- a/src/py21cmfast/src/PerturbField.h +++ b/src/py21cmfast/src/PerturbField.h @@ -4,9 +4,15 @@ #include "InputParameters.h" #include "OutputStructs.h" +#ifdef __cplusplus +extern "C" { +#endif int ComputePerturbField( float redshift, UserParams *user_params, CosmoParams *cosmo_params, InitialConditions *boxes, PerturbedField *perturbed_field ); +#ifdef __cplusplus +} +#endif #endif diff --git a/src/py21cmfast/src/PerturbHaloField.h b/src/py21cmfast/src/PerturbHaloField.h index dfc9b6c1b..16f82e0f6 100644 --- a/src/py21cmfast/src/PerturbHaloField.h +++ b/src/py21cmfast/src/PerturbHaloField.h @@ -4,9 +4,15 @@ #include "InputParameters.h" #include "OutputStructs.h" +#ifdef __cplusplus +extern "C" { +#endif int ComputePerturbHaloField(float redshift, UserParams *user_params, CosmoParams *cosmo_params, AstroParams *astro_params, FlagOptions *flag_options, InitialConditions *boxes, HaloField *halos, PerturbHaloField *halos_perturbed); +#ifdef __cplusplus +} +#endif #endif diff --git a/src/py21cmfast/src/SpinTemperatureBox.h b/src/py21cmfast/src/SpinTemperatureBox.h index 2d081ee37..2ace07e90 100644 --- a/src/py21cmfast/src/SpinTemperatureBox.h +++ b/src/py21cmfast/src/SpinTemperatureBox.h @@ -4,6 +4,9 @@ #include "InputParameters.h" #include "OutputStructs.h" +#ifdef __cplusplus +extern "C" { +#endif int ComputeTsBox(float redshift, float prev_redshift, UserParams *user_params, CosmoParams *cosmo_params, AstroParams *astro_params, FlagOptions *flag_options, float perturbed_field_redshift, short cleanup, @@ -14,4 +17,7 @@ int UpdateXraySourceBox(UserParams *user_params, CosmoParams *cosmo_params, AstroParams *astro_params, FlagOptions *flag_options, HaloBox *halobox, double R_inner, double R_outer, int R_ct, XraySourceBox *source_box); +#ifdef __cplusplus +} +#endif #endif diff --git a/src/py21cmfast/src/Stochasticity.h b/src/py21cmfast/src/Stochasticity.h index be76ee7af..f5de71e57 100644 --- a/src/py21cmfast/src/Stochasticity.h +++ b/src/py21cmfast/src/Stochasticity.h @@ -4,6 +4,9 @@ #include "InputParameters.h" #include "OutputStructs.h" +#ifdef __cplusplus +extern "C" { +#endif int stochastic_halofield(UserParams *user_params, CosmoParams *cosmo_params, AstroParams *astro_params, FlagOptions *flag_options , unsigned long long int seed, float redshift_desc, float redshift, float *dens_field, float *halo_overlap_box, HaloField *halos_desc, HaloField *halos); @@ -19,4 +22,7 @@ double expected_nhalo(double redshift, UserParams *user_params, CosmoParams *cos //used in HaloField.c to assign rng to DexM halos int add_properties_cat(unsigned long long int seed, float redshift, HaloField *halos); +#ifdef __cplusplus +} +#endif #endif diff --git a/src/py21cmfast/src/_inputparams_wrapper.h b/src/py21cmfast/src/_inputparams_wrapper.h index 67a2285c0..7d519fb32 100644 --- a/src/py21cmfast/src/_inputparams_wrapper.h +++ b/src/py21cmfast/src/_inputparams_wrapper.h @@ -4,6 +4,10 @@ //WARNING: DO NOT #include THIS FILE IN THE C CODE EXCEPT FOR IN InputParameters.h +// #ifdef __cplusplus +// extern "C" { +// #endif + typedef struct CosmoParams{ float SIGMA_8; @@ -210,3 +214,7 @@ extern AstroParams *astro_params_global; extern FlagOptions *flag_options_global; extern GlobalParams global_params; + +// #ifdef __cplusplus +// } +// #endif diff --git a/src/py21cmfast/src/_outputstructs_wrapper.h b/src/py21cmfast/src/_outputstructs_wrapper.h index 43924f52c..1d84aaf06 100644 --- a/src/py21cmfast/src/_outputstructs_wrapper.h +++ b/src/py21cmfast/src/_outputstructs_wrapper.h @@ -3,6 +3,9 @@ */ //WARNING: DO NOT #include THIS FILE IN THE C CODE EXCEPT FOR IN OutputStructs.h +// #ifdef __cplusplus +// extern "C" { +// #endif typedef struct InitialConditions{ float *lowres_density, *lowres_vx, *lowres_vy, *lowres_vz, *lowres_vx_2LPT, *lowres_vy_2LPT, *lowres_vz_2LPT; float *hires_density, *hires_vx, *hires_vy, *hires_vz, *hires_vx_2LPT, *hires_vy_2LPT, *hires_vz_2LPT; //cw addition @@ -91,3 +94,7 @@ typedef struct IonizedBox{ typedef struct BrightnessTemp{ float *brightness_temp; } BrightnessTemp; + +// #ifdef __cplusplus +// } +// #endif diff --git a/src/py21cmfast/src/bubble_helper_progs.h b/src/py21cmfast/src/bubble_helper_progs.h index 9a580724f..3ba2b3913 100644 --- a/src/py21cmfast/src/bubble_helper_progs.h +++ b/src/py21cmfast/src/bubble_helper_progs.h @@ -2,7 +2,13 @@ #ifndef _BUBBLEHELP_H #define _BUBBLEHELP_H +#ifdef __cplusplus +extern "C" { +#endif //NOTE: This file is only used for the old bubble finding algorithm which updates the whole sphere void update_in_sphere(float * box, int dimensions, int dimensions_ncf, float R, float xf, float yf, float zf); +#ifdef __cplusplus +} +#endif #endif diff --git a/src/py21cmfast/src/cosmology.h b/src/py21cmfast/src/cosmology.h index 244b6b922..ef1839e53 100644 --- a/src/py21cmfast/src/cosmology.h +++ b/src/py21cmfast/src/cosmology.h @@ -1,6 +1,9 @@ #ifndef _PS_H #define _PS_H +#ifdef __cplusplus +extern "C" { +#endif double init_ps(); double dicke(double z); double sigma_z0(double M); @@ -29,4 +32,7 @@ double hubble(float z); double t_hubble(float z); double M_J_WDM(); +#ifdef __cplusplus +} +#endif #endif diff --git a/src/py21cmfast/src/debugging.h b/src/py21cmfast/src/debugging.h index a6ae388a0..8793faf35 100644 --- a/src/py21cmfast/src/debugging.h +++ b/src/py21cmfast/src/debugging.h @@ -6,6 +6,9 @@ #include "InputParameters.h" #include "OutputStructs.h" +#ifdef __cplusplus +extern "C" { +#endif //Input debugging void writeFlagOptions(FlagOptions *p); void writeUserParams(UserParams *p); @@ -24,4 +27,7 @@ int SomethingThatCatches(bool sub_func); int FunctionThatCatches(bool sub_func, bool pass, double* result); void FunctionThatThrows(); +#ifdef __cplusplus +} +#endif #endif diff --git a/src/py21cmfast/src/dft.c b/src/py21cmfast/src/dft.c index 3486d8ba1..d1ddaead3 100644 --- a/src/py21cmfast/src/dft.c +++ b/src/py21cmfast/src/dft.c @@ -15,6 +15,9 @@ #include "dft.h" +// #ifdef __cplusplus +// extern "C" +// #endif int dft_c2r_cube(bool use_wisdom, int dim, int dim_los, int n_threads, fftwf_complex *box){ char wisdom_filename[500]; unsigned flag = FFTW_ESTIMATE; @@ -43,6 +46,9 @@ int dft_c2r_cube(bool use_wisdom, int dim, int dim_los, int n_threads, fftwf_com return(0); } +// #ifdef __cplusplus +// extern "C" +// #endif int dft_r2c_cube(bool use_wisdom, int dim, int dim_los, int n_threads, fftwf_complex *box){ char wisdom_filename[500]; unsigned flag = FFTW_ESTIMATE; diff --git a/src/py21cmfast/src/dft.h b/src/py21cmfast/src/dft.h index 55815d2da..f5cccd4fe 100644 --- a/src/py21cmfast/src/dft.h +++ b/src/py21cmfast/src/dft.h @@ -8,8 +8,14 @@ #include "InputParameters.h" +#ifdef __cplusplus +extern "C" { +#endif int dft_c2r_cube(bool use_wisdom, int dim, int dim_los, int n_threads, fftwf_complex *box); int dft_r2c_cube(bool use_wisdom, int dim, int dim_los, int n_threads, fftwf_complex *box); int CreateFFTWWisdoms(UserParams *user_params, CosmoParams *cosmo_params); +#ifdef __cplusplus +} +#endif #endif diff --git a/src/py21cmfast/src/elec_interp.h b/src/py21cmfast/src/elec_interp.h index 77ba7932c..d75f138a6 100644 --- a/src/py21cmfast/src/elec_interp.h +++ b/src/py21cmfast/src/elec_interp.h @@ -5,6 +5,10 @@ #define x_int_NXHII 14 #define x_int_NENERGY 258 +#ifdef __cplusplus +extern "C" { +#endif + void initialize_interp_arrays(); // Primary functions to compute heating fractions and number of Lya photons or ionization produced, @@ -25,4 +29,7 @@ int locate_xHII_index(float xHII_call); // TODO: remove it and make it static in elec_interp.c extern float x_int_XHII[x_int_NXHII]; +#ifdef __cplusplus +} +#endif #endif diff --git a/src/py21cmfast/src/exceptions.h b/src/py21cmfast/src/exceptions.h index 195cdbbd0..45e258134 100644 --- a/src/py21cmfast/src/exceptions.h +++ b/src/py21cmfast/src/exceptions.h @@ -3,6 +3,9 @@ #include "cexcept.h" +#ifdef __cplusplus +extern "C" { +#endif define_exception_type(int); //NOTE: declaration here, definition in debugging.c @@ -22,4 +25,7 @@ extern struct exception_context the_exception_context[1]; #define CATCH_GSL_ERROR(status) if(status>0) {LOG_ERROR("GSL Error Encountered (Code = %d): %s", status, gsl_strerror(status)); Throw(GSLError);} +#ifdef __cplusplus +} +#endif #endif diff --git a/src/py21cmfast/src/filtering.c b/src/py21cmfast/src/filtering.c index 49827bbb7..e8afd337d 100644 --- a/src/py21cmfast/src/filtering.c +++ b/src/py21cmfast/src/filtering.c @@ -16,6 +16,7 @@ #include "InputParameters.h" #include "indexing.h" #include "dft.h" +#include "filtering.h" double real_tophat_filter(double kR){ //Second order taylor expansion around kR==0 @@ -76,7 +77,7 @@ double spherical_shell_filter(double k, double R_outer, double R_inner){ - sin(kR_inner) + cos(kR_inner)*kR_inner); } -void filter_box(fftwf_complex *box, int RES, int filter_type, float R, float R_param){ +void filter_box_cpu(fftwf_complex *box, int RES, int filter_type, float R, float R_param){ int dimension, midpoint; //TODO: figure out why defining as ULL breaks this switch(RES) { case 0: @@ -152,8 +153,17 @@ void filter_box(fftwf_complex *box, int RES, int filter_type, float R, float R_p return; } +void filter_box(fftwf_complex *box, int RES, int filter_type, float R, float R_param){ + if (1) { + filter_box_gpu(box, RES, filter_type, R, R_param); + } else { + // Call the CPU version + filter_box_cpu(box, RES, filter_type, R, R_param); + } +} + //Test function to filter a box without computing a whole output box -int test_filter(UserParams *user_params, CosmoParams *cosmo_params, AstroParams *astro_params, FlagOptions *flag_options +int test_filter_cpu(UserParams *user_params, CosmoParams *cosmo_params, AstroParams *astro_params, FlagOptions *flag_options , float *input_box, double R, double R_param, int filter_flag, double *result){ int i,j,k; unsigned long long int ii; @@ -177,7 +187,7 @@ int test_filter(UserParams *user_params, CosmoParams *cosmo_params, AstroParams memcpy(box_filtered,box_unfiltered,sizeof(fftwf_complex) * HII_KSPACE_NUM_PIXELS); - filter_box(box_filtered,1,filter_flag,R,R_param); + filter_box_cpu(box_filtered,1,filter_flag,R,R_param); dft_c2r_cube(user_params->USE_FFTW_WISDOM, user_params->HII_DIM, HII_D_PARA, user_params->N_THREADS, box_filtered); @@ -191,3 +201,12 @@ int test_filter(UserParams *user_params, CosmoParams *cosmo_params, AstroParams return 0; } + +int test_filter(UserParams *user_params, CosmoParams *cosmo_params, AstroParams *astro_params, FlagOptions *flag_options, + float *input_box, double R, double R_param, int filter_flag, double *result) { + if (1) { + return test_filter_gpu(user_params, cosmo_params, astro_params, flag_options, input_box, R, R_param, filter_flag, result); + } else { + return test_filter_cpu(user_params, cosmo_params, astro_params, flag_options, input_box, R, R_param, filter_flag, result); + } +} diff --git a/src/py21cmfast/src/filtering.cu b/src/py21cmfast/src/filtering.cu index b6361405b..58c98abd1 100644 --- a/src/py21cmfast/src/filtering.cu +++ b/src/py21cmfast/src/filtering.cu @@ -1,6 +1,7 @@ #include #include #include +#include #include #include @@ -14,10 +15,12 @@ #include "cexcept.h" #include "exceptions.h" #include "logger.h" + #include "Constants.h" #include "InputParameters.h" #include "indexing.h" #include "dft.h" +#include "filtering.h" __device__ inline double real_tophat_filter(double kR) { // Second order taylor expansion around kR==0 @@ -73,12 +76,11 @@ __device__ inline double spherical_shell_filter(double k, double R_outer, double // __global__ void filter_box_kernel(fftwf_complex *box, int dimension, int midpoint, int midpoint_para, double delta_k, float R, float R_param, double R_const, int filter_type) { __global__ void filter_box_kernel(cuFloatComplex *box, int dimension, int midpoint, int midpoint_para, double delta_k, float R, float R_param, double R_const, int filter_type) { - // Get index of d_box (flattened k-box) + // Get index of box (flattened k-box) unsigned long long idx = blockIdx.x * blockDim.x + threadIdx.x; - float kR; - // Compute the 3D indices (n_x, n_y, n_z) for the k-box from the flattened index (idx) + // Based on convenience macros in indexing.h int n_z = idx % (midpoint_para + 1); unsigned long long remaining = idx / (midpoint_para + 1); // Calculate remaining index int n_y = remaining % dimension; @@ -92,10 +94,10 @@ __global__ void filter_box_kernel(cuFloatComplex *box, int dimension, int midpoi // Compute squared magnitude of wave vector float k_mag_sq = k_x*k_x + k_y*k_y + k_z*k_z; + float kR; if (filter_type == 0) { // real space top-hat kR = sqrt(k_mag_sq) * R; // box[idx] *= real_tophat_filter(kR); - // box[idx] = cuCmul(box[idx], real_tophat_filter(kR)); box[idx] = cuCmulf(box[idx], make_cuFloatComplex((float)real_tophat_filter(kR), 0.f)); } else if (filter_type == 1) { // k-space top hat @@ -116,16 +118,20 @@ __global__ void filter_box_kernel(cuFloatComplex *box, int dimension, int midpoi // box[idx] *= spherical_shell_filter(sqrt(k_mag_sq), R, R_param); box[idx] = cuCmulf(box[idx], make_cuFloatComplex((float)spherical_shell_filter(sqrt(k_mag_sq), R, R_param), 0.f)); } - else { - if (idx == 0) { - LOG_WARNING("Filter type %i is undefined. Box is unfiltered.", filter_type); - } - } + // This doesn't work from device + // else { + // if (idx == 0) { + // LOG_WARNING("Filter type %i is undefined. Box is unfiltered.", filter_type); + // } + // } } // *box is a pointer, so only memory address is passed, not entire array -void filter_box(fftwf_complex *box, int RES, int filter_type, float R, float R_param) { +// #ifdef __cplusplus +// extern "C" +// #endif +void filter_box_gpu(fftwf_complex *box, int RES, int filter_type, float R, float R_param) { // Get required values int dimension, midpoint, midpoint_para, num_pixels; @@ -167,6 +173,7 @@ void filter_box(fftwf_complex *box, int RES, int filter_type, float R, float R_p int threadsPerBlock = 256; int numBlocks = (num_pixels + threadsPerBlock - 1) / threadsPerBlock; filter_box_kernel<<>>(reinterpret_cast(d_box), dimension, midpoint, midpoint_para, delta_k, R, R_param, R_const, filter_type); + // filter_box_kernel<<>>((cuFloatComplex *)d_box, dimension, midpoint, midpoint_para, delta_k, R, R_param, R_const, filter_type); // Copy results from device to host cudaMemcpy(box, d_box, size, cudaMemcpyDeviceToHost); @@ -176,7 +183,7 @@ void filter_box(fftwf_complex *box, int RES, int filter_type, float R, float R_p } // Test function to filter a box without computing a whole output box -int test_filter(UserParams *user_params, CosmoParams *cosmo_params, AstroParams *astro_params, FlagOptions *flag_options +int test_filter_gpu(UserParams *user_params, CosmoParams *cosmo_params, AstroParams *astro_params, FlagOptions *flag_options , float *input_box, double R, double R_param, int filter_flag, double *result) { int i,j,k; unsigned long long int ii; @@ -194,16 +201,19 @@ int test_filter(UserParams *user_params, CosmoParams *cosmo_params, AstroParams dft_r2c_cube(user_params->USE_FFTW_WISDOM, user_params->HII_DIM, HII_D_PARA, user_params->N_THREADS, box_unfiltered); - float num_pixels = HII_TOT_NUM_PIXELS; + cuFloatComplex* box_unfiltered_cu = reinterpret_cast(box_unfiltered); + + // float num_pixels = HII_TOT_NUM_PIXELS; for(ii=0;iiUSE_FFTW_WISDOM, user_params->HII_DIM, HII_D_PARA, user_params->N_THREADS, box_filtered); diff --git a/src/py21cmfast/src/filtering.h b/src/py21cmfast/src/filtering.h index 53a6dc787..f82961565 100644 --- a/src/py21cmfast/src/filtering.h +++ b/src/py21cmfast/src/filtering.h @@ -4,8 +4,20 @@ #include #include +#ifdef __cplusplus +extern "C" { +#endif void filter_box(fftwf_complex *box, int RES, int filter_type, float R, float R_param); +void filter_box_cpu(fftwf_complex *box, int RES, int filter_type, float R, float R_param); +void filter_box_gpu(fftwf_complex *box, int RES, int filter_type, float R, float R_param); int test_filter(UserParams *user_params, CosmoParams *cosmo_params, AstroParams *astro_params, FlagOptions *flag_options , float *input_box, double R, double R_param, int filter_flag, double *result); +int test_filter_cpu(UserParams *user_params, CosmoParams *cosmo_params, AstroParams *astro_params, FlagOptions *flag_options + , float *input_box, double R, double R_param, int filter_flag, double *result); +int test_filter_gpu(UserParams *user_params, CosmoParams *cosmo_params, AstroParams *astro_params, FlagOptions *flag_options + , float *input_box, double R, double R_param, int filter_flag, double *result); +#ifdef __cplusplus +} +#endif #endif diff --git a/src/py21cmfast/src/heating_helper_progs.h b/src/py21cmfast/src/heating_helper_progs.h index dd8f33954..62ddbb2de 100644 --- a/src/py21cmfast/src/heating_helper_progs.h +++ b/src/py21cmfast/src/heating_helper_progs.h @@ -1,6 +1,9 @@ #ifndef _HEATHELPER_H #define _HEATHELPER_H +#ifdef __cplusplus +extern "C" { +#endif // * initialization routine * // int init_heat(); @@ -50,4 +53,7 @@ double nu_tau_one(double zp, double zpp, double x_e, double HI_filling_factor_zp //xray heating integrals over frequency double integrate_over_nu(double zp, double local_x_e, double lower_int_limit, int FLAG); +#ifdef __cplusplus +} +#endif #endif diff --git a/src/py21cmfast/src/hello_world.h b/src/py21cmfast/src/hello_world.h index d11e30a1a..a4a096113 100644 --- a/src/py21cmfast/src/hello_world.h +++ b/src/py21cmfast/src/hello_world.h @@ -1,6 +1,13 @@ #ifndef _HELLO_WORLD_H #define _HELLO_WORLD_H +#ifdef __cplusplus +extern "C" { +#endif + int call_cuda(); +#ifdef __cplusplus +} +#endif #endif // HELLO_WORLD_H \ No newline at end of file diff --git a/src/py21cmfast/src/hmf.h b/src/py21cmfast/src/hmf.h index f225e1f69..af7b92f40 100644 --- a/src/py21cmfast/src/hmf.h +++ b/src/py21cmfast/src/hmf.h @@ -7,6 +7,9 @@ #define MAX_DELTAC_FRAC (float)0.99 //max delta/deltac for the mass function integrals #define DELTA_MIN -1 //minimum delta for Lagrangian mass function integrals +#ifdef __cplusplus +extern "C" { +#endif //Parameters used for gsl integral on the mass function struct parameters_gsl_MF_integrals{ //parameters for all MF integrals @@ -67,4 +70,7 @@ double sheth_delc_dexm(double del, double sig); float Mass_limit_bisection(float Mmin, float Mmax, float PL, float FRAC); double euler_to_lagrangian_delta(double delta); +#ifdef __cplusplus +} +#endif #endif diff --git a/src/py21cmfast/src/interp_tables.h b/src/py21cmfast/src/interp_tables.h index 87e8fc4c6..3c62c09af 100644 --- a/src/py21cmfast/src/interp_tables.h +++ b/src/py21cmfast/src/interp_tables.h @@ -6,6 +6,9 @@ //Functions within interp_tables.c need the parameter structures, but we don't want to pass them all down the chain, so we broadcast them //TODO: in future it would be better to use a context struct. See `HaloBox.c` +#ifdef __cplusplus +extern "C" { +#endif void initialise_SFRD_spline(int Nbin, float zmin, float zmax, float Alpha_star, float Alpha_star_mini, float Fstar10, float Fstar7_MINI, float mturn_a_const, bool minihalos); double EvaluateSFRD(double redshift, double Mlim_Fstar); @@ -62,4 +65,7 @@ void free_conditional_tables(); void free_global_tables(); void free_dNdM_tables(); +#ifdef __cplusplus +} +#endif #endif diff --git a/src/py21cmfast/src/interpolation.h b/src/py21cmfast/src/interpolation.h index 2fc9315ac..e356c0966 100644 --- a/src/py21cmfast/src/interpolation.h +++ b/src/py21cmfast/src/interpolation.h @@ -3,6 +3,9 @@ #include +#ifdef __cplusplus +extern "C" { +#endif typedef struct RGTable1D{ int n_bin; double x_min; @@ -58,4 +61,7 @@ double EvaluateRGTable2D(double x, double y, RGTable2D *table); double EvaluateRGTable1D_f(double x, RGTable1D_f *table); double EvaluateRGTable2D_f(double x, double y, RGTable2D_f *table); +#ifdef __cplusplus +} +#endif #endif diff --git a/src/py21cmfast/src/logger.h b/src/py21cmfast/src/logger.h index 596a40b81..4db098611 100644 --- a/src/py21cmfast/src/logger.h +++ b/src/py21cmfast/src/logger.h @@ -42,6 +42,10 @@ #include #include +#ifdef __cplusplus +extern "C" { +#endif + // === auxiliary functions static inline char *timenow(); @@ -132,4 +136,7 @@ static inline char *timenow() { return buffer; } +#ifdef __cplusplus +} +#endif #endif diff --git a/src/py21cmfast/src/photoncons.h b/src/py21cmfast/src/photoncons.h index 30f253ab0..06791993b 100644 --- a/src/py21cmfast/src/photoncons.h +++ b/src/py21cmfast/src/photoncons.h @@ -4,6 +4,9 @@ #include #include "InputParameters.h" +#ifdef __cplusplus +extern "C" { +#endif //This is directly accessed in the wrapper currently //TODO: remove this global declaration and make an internal checking function extern bool photon_cons_allocated; @@ -29,4 +32,7 @@ int ObtainPhotonConsData(double *z_at_Q_data, double *Q_data, int *Ndata_analyti void set_alphacons_params(double norm, double slope); double get_fesc_fit(double redshift); +#ifdef __cplusplus +} +#endif #endif diff --git a/src/py21cmfast/src/recombinations.h b/src/py21cmfast/src/recombinations.h index a2ffc7cf5..e3cebd13f 100644 --- a/src/py21cmfast/src/recombinations.h +++ b/src/py21cmfast/src/recombinations.h @@ -1,7 +1,13 @@ #ifndef _RECOMB_H #define _RECOMB_H +#ifdef __cplusplus +extern "C" { +#endif double splined_recombination_rate(double z_eff, double gamma12_bg); void init_MHR(); /*initializes the lookup table for the PDF density integral in MHR00 model at redshift z*/ +#ifdef __cplusplus +} +#endif #endif diff --git a/src/py21cmfast/src/subcell_rsds.h b/src/py21cmfast/src/subcell_rsds.h index 409c5251c..6d69e17d0 100644 --- a/src/py21cmfast/src/subcell_rsds.h +++ b/src/py21cmfast/src/subcell_rsds.h @@ -4,6 +4,9 @@ #include "InputParameters.h" #include "OutputStructs.h" +#ifdef __cplusplus +extern "C" { +#endif double apply_subcell_rsds( UserParams *user_params, CosmoParams *cosmo_params, @@ -18,4 +21,7 @@ double apply_subcell_rsds( float H ); +#ifdef __cplusplus +} +#endif #endif diff --git a/src/py21cmfast/src/thermochem.h b/src/py21cmfast/src/thermochem.h index 8b3156fcd..90e7a6f40 100644 --- a/src/py21cmfast/src/thermochem.h +++ b/src/py21cmfast/src/thermochem.h @@ -3,6 +3,9 @@ #include "InputParameters.h" +#ifdef __cplusplus +extern "C" { +#endif float ComputeTau(UserParams *user_params, CosmoParams *cosmo_params, int Npoints, float *redshifts, float *global_xHI); double molecular_cooling_threshold(float z); double atomic_cooling_threshold(float z); @@ -21,4 +24,7 @@ double HeII_ion_crosssec(double nu); double HI_ion_crosssec(double nu); double neutral_fraction(double density, double T4, double gamma, int usecaseB); +#ifdef __cplusplus +} +#endif #endif From 8d130631fc21bbbb10fe696925397b9fcc21a950 Mon Sep 17 00:00:00 2001 From: alserene Date: Thu, 17 Oct 2024 15:49:22 +1100 Subject: [PATCH 003/145] Add synchronisation for error catching in development. --- src/py21cmfast/src/exceptions.h | 3 +++ src/py21cmfast/src/filtering.cu | 13 +++++++++++++ 2 files changed, 16 insertions(+) diff --git a/src/py21cmfast/src/exceptions.h b/src/py21cmfast/src/exceptions.h index 45e258134..00ddd2bf7 100644 --- a/src/py21cmfast/src/exceptions.h +++ b/src/py21cmfast/src/exceptions.h @@ -22,8 +22,11 @@ extern struct exception_context the_exception_context[1]; #define InfinityorNaNError 7 #define MassDepZetaError 8 #define MemoryAllocError 9 +#define CudaError 10 #define CATCH_GSL_ERROR(status) if(status>0) {LOG_ERROR("GSL Error Encountered (Code = %d): %s", status, gsl_strerror(status)); Throw(GSLError);} +#define CATCH_CUDA_ERROR(err) if(err != cudaSuccess) {LOG_ERROR("CUDA Error Encountered: %s", cudaGetErrorString(err)); Throw(CudaError);} + #ifdef __cplusplus } diff --git a/src/py21cmfast/src/filtering.cu b/src/py21cmfast/src/filtering.cu index 58c98abd1..d2c19e6be 100644 --- a/src/py21cmfast/src/filtering.cu +++ b/src/py21cmfast/src/filtering.cu @@ -175,6 +175,19 @@ void filter_box_gpu(fftwf_complex *box, int RES, int filter_type, float R, float filter_box_kernel<<>>(reinterpret_cast(d_box), dimension, midpoint, midpoint_para, delta_k, R, R_param, R_const, filter_type); // filter_box_kernel<<>>((cuFloatComplex *)d_box, dimension, midpoint, midpoint_para, delta_k, R, R_param, R_const, filter_type); + // Only use during development! + cudaError_t err = cudaDeviceSynchronize(); + CATCH_CUDA_ERROR(err); + // if (err != cudaSuccess) { + // LOG_ERROR("cudaDeviceSynchronize error: %s", cudaGetErrorString(err)); + // Throw(RuntimeError); + // } + err = cudaGetLastError(); + if (err != cudaSuccess) { + LOG_ERROR("Kernel launch error: %s", cudaGetErrorString(err)); + Throw(CudaError); // Or the appropriate exception type + } + // Copy results from device to host cudaMemcpy(box, d_box, size, cudaMemcpyDeviceToHost); From 4f4f10c39d7bdde91915984e003133223c242d89 Mon Sep 17 00:00:00 2001 From: alserene Date: Mon, 21 Oct 2024 19:08:55 +1100 Subject: [PATCH 004/145] Initial (uncompleted) GPU implementation. --- src/py21cmfast/src/PerturbField.c | 19 +- src/py21cmfast/src/PerturbField.cu | 883 +++++++++++++++++++++++++++++ src/py21cmfast/src/PerturbField.h | 8 + 3 files changed, 909 insertions(+), 1 deletion(-) create mode 100644 src/py21cmfast/src/PerturbField.cu diff --git a/src/py21cmfast/src/PerturbField.c b/src/py21cmfast/src/PerturbField.c index 02f453a3c..499199d67 100644 --- a/src/py21cmfast/src/PerturbField.c +++ b/src/py21cmfast/src/PerturbField.c @@ -157,7 +157,7 @@ void compute_perturbed_velocities( } -int ComputePerturbField( +int ComputePerturbField_cpu( float redshift, UserParams *user_params, CosmoParams *cosmo_params, InitialConditions *boxes, PerturbedField *perturbed_field ){ @@ -770,3 +770,20 @@ int ComputePerturbField( return(0); } + +int ComputePerturbField( + float redshift, UserParams *user_params, CosmoParams *cosmo_params, + InitialConditions *boxes, PerturbedField *perturbed_field + ){ + if (1) { + ComputePerturbField_gpu(redshift, user_params, cosmo_params, boxes, perturbed_field) + } else { + ComputePerturbField_cpu(redshift, user_params, cosmo_params, boxes, perturbed_field) + } + // switch(GPU) { + // case 0: + // filter_box_cpu(box, RES, filter_type, R, R_param); + // case 1: + // filter_box_gpu(box, RES, filter_type, R, R_param); + // } + } diff --git a/src/py21cmfast/src/PerturbField.cu b/src/py21cmfast/src/PerturbField.cu new file mode 100644 index 000000000..92bd2e4bd --- /dev/null +++ b/src/py21cmfast/src/PerturbField.cu @@ -0,0 +1,883 @@ +// Re-write of perturb_field.c for being accessible within the MCMC +#include +#include +#include +#include +#include +#include + +#include "cexcept.h" +#include "exceptions.h" +#include "logger.h" +#include "Constants.h" +#include "indexing.h" +#include "InputParameters.h" +#include "OutputStructs.h" +#include "cosmology.h" +#include "dft.h" +#include "debugging.h" +#include "filtering.h" + +#include "PerturbField.h" + +void compute_perturbed_velocities( + unsigned short axis, + UserParams *user_params, + fftwf_complex *HIRES_density_perturb, + fftwf_complex *HIRES_density_perturb_saved, + fftwf_complex *LOWRES_density_perturb, + fftwf_complex *LOWRES_density_perturb_saved, + float dDdt_over_D, + int dimension, + int switch_mid, + float f_pixel_factor, + float *velocity +){ + + float k_x, k_y, k_z, k_sq; + unsigned long long int n_x, n_y, n_z; + unsigned long long int i,j,k; + + // ALICE: 3D vector for k-space coords + float kvec[3]; + + if(user_params->PERTURB_ON_HIGH_RES) { + // We are going to generate the velocity field on the high-resolution perturbed + // density grid + // ALICE: Copy the saved k-space density field to HIRES_density_perturb. + memcpy( + HIRES_density_perturb, + HIRES_density_perturb_saved, + sizeof(fftwf_complex)*KSPACE_NUM_PIXELS + ); + } + else { + // We are going to generate the velocity field on the low-resolution perturbed density grid + // ALICE: Copy the saved k-space density field to LOWRES_density_perturb. + memcpy( + LOWRES_density_perturb, + LOWRES_density_perturb_saved, + sizeof(fftwf_complex)*HII_KSPACE_NUM_PIXELS + ); + LOG_SUPER_DEBUG("dDdt_over_D=%.6e, dimension=%d, switch_mid=%d, f_pixel_factor=%f", dDdt_over_D, dimension, switch_mid, f_pixel_factor); + } + + // ALICE: Compute wave numbers (k_x, k_y, k_z) + compute velocity based on density perturbations. + // ALICE: Wave numbers == frequencies of spatial oscillations (higher wave number=faster oscillations) + #pragma omp parallel \ + shared(LOWRES_density_perturb,HIRES_density_perturb,dDdt_over_D,dimension,switch_mid) \ + private(n_x,n_y,n_z,k_x,k_y,k_z,k_sq, kvec) \ + num_threads(user_params->N_THREADS) + { + #pragma omp for + for (n_x=0; n_x switch_mid) + k_x = (n_x-dimension) * DELTA_K; // wrap around for FFT convention + else + k_x = n_x * DELTA_K; + + for (n_y=0; n_y switch_mid) + k_y = (n_y-dimension) * DELTA_K; + else + k_y = n_y * DELTA_K; + + for (n_z=0; n_z<=(unsigned long long)(user_params->NON_CUBIC_FACTOR*switch_mid); n_z++){ + k_z = n_z * DELTA_K_PARA; + + kvec[0] = k_x; + kvec[1] = k_y; + kvec[2] = k_z; + + k_sq = k_x*k_x + k_y*k_y + k_z*k_z; + + // now set the velocities + if ((n_x==0) && (n_y==0) && (n_z==0)) { // DC mode + if(user_params->PERTURB_ON_HIGH_RES) { + HIRES_density_perturb[0] = 0; + } + else { + LOWRES_density_perturb[0] = 0; + } + } + else{ + if(user_params->PERTURB_ON_HIGH_RES) { + HIRES_density_perturb[C_INDEX(n_x,n_y,n_z)] *= dDdt_over_D*kvec[axis]*I/k_sq/(TOT_NUM_PIXELS+0.0); + } + else { + LOWRES_density_perturb[HII_C_INDEX(n_x,n_y,n_z)] *= dDdt_over_D*kvec[axis]*I/k_sq/(HII_TOT_NUM_PIXELS+0.0); + } + } + } + } + } + } + + LOG_SUPER_DEBUG("density_perturb after modification by dDdt: "); + debugSummarizeBoxComplex(LOWRES_density_perturb, user_params->HII_DIM, user_params->NON_CUBIC_FACTOR, " "); + + // ALICE: density field was already in k-space when passed in, so now filter (top-hat), inverse fft and copy to velocity field. + if(user_params->PERTURB_ON_HIGH_RES) { + + // smooth the high resolution field ready for resampling + // ALICE: RES=0 (dimension=DIM, midpoint=MIDDLE), filter_type=0 (real space top-hat filtering) + if (user_params->DIM != user_params->HII_DIM) + filter_box(HIRES_density_perturb, 0, 0, L_FACTOR*user_params->BOX_LEN/(user_params->HII_DIM+0.0)); + + dft_c2r_cube(user_params->USE_FFTW_WISDOM, user_params->DIM, D_PARA, user_params->N_THREADS, HIRES_density_perturb); + + // ALICE: Copy computed velocities to velocity field. + #pragma omp parallel \ + shared(velocity,HIRES_density_perturb,f_pixel_factor) \ + private(i,j,k) \ + num_threads(user_params->N_THREADS) + { + #pragma omp for + for (i=0; iHII_DIM; i++){ + for (j=0; jHII_DIM; j++){ + for (k=0; k no top hat filtering, just inverse fft and copy to velocity field. + else { + dft_c2r_cube(user_params->USE_FFTW_WISDOM, user_params->HII_DIM, HII_D_PARA, user_params->N_THREADS, LOWRES_density_perturb); + + #pragma omp parallel \ + shared(velocity,LOWRES_density_perturb) \ + private(i,j,k) \ + num_threads(user_params->N_THREADS) + { + #pragma omp for + for (i=0; iHII_DIM; i++){ + for (j=0; jHII_DIM; j++){ + for (k=0; kHII_DIM, user_params->NON_CUBIC_FACTOR, " "); + +} + +__device__ inline double compute_R_INDEX(int i, int j, int k, int DIM, int D_PARA) { + return k + D_PARA * (j + DIM * i) +} + +__device__ inline double compute_HII_R_INDEX(int i, int j, int k, int DIM, int MID_PARA) { + return k + 2 * (MID_PARA + 1) * (j + DIM * i) +} + +__global__ void perturb_density_field_kernel( + double *resampled_box, int dimension, int DIM, int D_PARA, int MID_PARA, + int NON_CUBIC_FACTOR, float f_pixel_factor, float init_growth_factor, + bool PERTURB_ON_HIGH_RES, bool USE_2LPT, + ) { + + unsigned long long idx = blockIdx.x * blockDim.x + threadIdx.x; + + // Get index of density cell + int i = idx / (D_PARA * DIM); + int j = (idx / D_PARA) % DIM; + int k = idx % D_PARA; + + // Map index to location in units of box size + float xf = (i + 0.5) / DIM; + float yf = (j + 0.5) / DIM; + float zf = (k + 0.5) / D_PARA; + + // Update locations + if (PERTURB_ON_HIGH_RES) { + xf += hires_vx[compute_R_INDEX(i, j, k)]; + yf += hires_vy[compute_R_INDEX(i, j, k)]; + zf += hires_vz[compute_R_INDEX(i, j, k)]; + } + else { + unsigned long long HII_i = (unsigned long long)(i / f_pixel_factor); + unsigned long long HII_j = (unsigned long long)(j / f_pixel_factor); + unsigned long long HII_k = (unsigned long long)(k / f_pixel_factor); + xf += lowres_vx[compute_HII_R_INDEX(HII_i, HII_j, HII_k)]; + yf += lowres_vy[compute_HII_R_INDEX(HII_i, HII_j, HII_k)]; + zf += lowres_vz[compute_HII_R_INDEX(HII_i, HII_j, HII_k)]; + } + + // 2LPT (add second order corrections) + if (USE_2LPT) { + if (PERTURB_ON_HIGH_RES) { + xf -= hires_vx_2LPT[compute_R_INDEX(i, j, k)]; + yf -= hires_vy_2LP[compute_R_INDEX(i, j, k)]; + zf -= hires_vz_2LPT[compute_R_INDEX(i, j, k)]; + } + else { + xf -= lowres_vx_2LPT[compute_HII_R_INDEX(HII_i, HII_j, HII_k)]; + yf -= lowres_vy_2LPT[compute_HII_R_INDEX(HII_i, HII_j, HII_k)]; + zf -= lowres_vz_2LPT[compute_HII_R_INDEX(HII_i, HII_j, HII_k)]; + } + } + + // Scale coordinates back to grid size + xf *= (double)(dimension); + yf *= (double)(dimension); + zf *= (double)((unsigned long long)(NON_CUBIC_FACTOR * dimension)); + + // Wrap coordinates to keep them within valid boundaries + xf = fmod(fmod(xf, dimension) + dimension, dimension); + yf = fmod(fmod(yf, dimension) + dimension, dimension); + zf = fmod(fmod(zf, dimension * NCF) + dimension * NCF, dimension * NCF); + + // Get integer values for indices from floating point values + int xi = xf; + int yi = yf; + int zi = zf; + + // Wrap index coordinates to ensure no out-of-bounds array access will be attempted + xi = (xi % dimension + dimension) % dimension; + yi = (yi % dimension + dimension) % dimension; + zi = (zi % dimension * NCF + dimension * NCF) % dimension * NCF; + + // Determine the fraction of the perturbed cell which overlaps with the 8 nearest grid cells, + // based on the grid cell which contains the centre of the perturbed cell + float d_x = fabs(xf - (double)(xi + 0.5)); // Absolute distances from grid cell centre to perturbed cell centre + float d_y = fabs(yf - (double)(yi + 0.5)); // (also) The fractions of mass which will be moved to neighbouring cells + float d_z = fabs(zf - (double)(zi + 0.5)); + + // 8 neighbour cells-of-interest will be shifted left/down/behind if perturbed midpoint is in left/bottom/back corner of cell. + if (xf < (double)(xi + 0.5)) { + // If perturbed cell centre is less than the mid-point then update fraction + // of mass in the cell and determine the cell centre of neighbour to be the + // lowest grid point index + d_x = 1. - d_x; + xi -= 1; + xi += (xi + dimension) % dimension; // Only this critera is possible as iterate back by one (we cannot exceed DIM) + } + if(yf < (double)(yi + 0.5)) { + d_y = 1. - d_y; + yi -= 1; + yi += (yi + dimension) % dimension; + } + if(zf < (double)(zi + 0.5)) { + d_z = 1. - d_z; + zi -= 1; + zi += (zi + (unsigned long long)(NON_CUBIC_FACTOR * dimension)) % (unsigned long long)(NON_CUBIC_FACTOR * dimension); + } + // The fractions of mass which will remain with perturbed cell + float t_x = 1. - d_x; + float t_y = 1. - d_y; + float t_z = 1. - d_z; + + // Determine the grid coordinates of the 8 neighbouring cells. + // Neighbours will be in positive direction; front/right/above cells (-> 2x2 cube, with perturbed cell bottom/left/back) + // Takes into account the offset based on cell centre determined above + int xp1 = (xi + 1) % dimension; + int yp1 = (yi + 1) % dimension; + int zp1 = (zi + 1) % (unsigned long long)(NON_CUBIC_FACTOR * dimension); + + if (PERTURB_ON_HIGH_RES) { + // Redistribute the mass over the 8 neighbouring cells according to cloud in cell + // Cell mass = (1 + init_growth_factor * orig_density) * (proportion of mass to distribute) + atomicAdd(&resampled_box[compute_R_INDEX(xi, yi, zi)], (double)(1 + init_growth_factor * hires_density[compute_R_INDEX(i, j, k)]) * t_x * t_y * t_z); + atomicAdd(&resampled_box[compute_R_INDEX(xp1, yi, zi)], (double)(1 + init_growth_factor * hires_density[compute_R_INDEX(i, j, k)]) * d_x * t_y * t_z); + atomicAdd(&resampled_box[compute_R_INDEX(xi, yp1, zi)], (double)(1 + init_growth_factor * hires_density[compute_R_INDEX(i, j, k)]) * t_x * d_y * t_z); + atomicAdd(&resampled_box[compute_R_INDEX(xp1, yp1, zi)], (double)(1 + init_growth_factor * hires_density[compute_R_INDEX(i, j, k)]) * d_x * d_y * t_z); + atomicAdd(&resampled_box[compute_R_INDEX(xi, yi, zp1)], (double)(1 + init_growth_factor * hires_density[compute_R_INDEX(i, j, k)]) * t_x * t_y * d_z); + atomicAdd(&resampled_box[compute_R_INDEX(xp1, yi, zp1)], (double)(1 + init_growth_factor * hires_density[compute_R_INDEX(i, j, k)]) * d_x * t_y * d_z); + atomicAdd(&resampled_box[compute_R_INDEX(xi, yp1, zp1)], (double)(1 + init_growth_factor * hires_density[compute_R_INDEX(i, j, k)]) * t_x * d_y * d_z); + atomicAdd(&resampled_box[compute_R_INDEX(xp1, yp1, zp1)], (double)(1 + init_growth_factor * hires_density[compute_R_INDEX(i, j, k)]) * d_x * d_y * d_z); + } + else { + atomicAdd(&resampled_box[compute_HII_R_INDEX(xi, yi, zi)], (double)(1 + init_growth_factor * hires_density[compute_R_INDEX(i, j, k)]) * t_x * t_y * t_z); + atomicAdd(&resampled_box[compute_HII_R_INDEX(xp1, yi, zi)], (double)(1 + init_growth_factor * hires_density[compute_R_INDEX(i, j, k)]) * d_x * t_y * t_z); + atomicAdd(&resampled_box[compute_HII_R_INDEX(xi, yp1, zi)], (double)(1 + init_growth_factor * hires_density[compute_R_INDEX(i, j, k)]) * t_x * d_y * t_z); + atomicAdd(&resampled_box[compute_HII_R_INDEX(xp1, yp1, zi)], (double)(1 + init_growth_factor * hires_density[compute_R_INDEX(i, j, k)]) * d_x * d_y * t_z); + atomicAdd(&resampled_box[compute_HII_R_INDEX(xi, yi, zp1)], (double)(1 + init_growth_factor * hires_density[compute_R_INDEX(i, j, k)]) * t_x * t_y * d_z); + atomicAdd(&resampled_box[compute_HII_R_INDEX(xp1, yi, zp1)], (double)(1 + init_growth_factor * hires_density[compute_R_INDEX(i, j, k)]) * d_x * t_y * d_z); + atomicAdd(&resampled_box[compute_HII_R_INDEX(xi, yp1, zp1)], (double)(1 + init_growth_factor * hires_density[compute_R_INDEX(i, j, k)]) * t_x * d_y * d_z); + atomicAdd(&resampled_box[compute_HII_R_INDEX(xp1, yp1, zp1)], (double)(1 + init_growth_factor * hires_density[compute_R_INDEX(i, j, k)]) * d_x * d_y * d_z); + } +} + +int ComputePerturbField_gpu( + float redshift, UserParams *user_params, CosmoParams *cosmo_params, + InitialConditions *boxes, PerturbedField *perturbed_field +) { + /* + ComputePerturbField uses the first-order Langragian displacement field to move the + masses in the cells of the density field. The high-res density field is extrapolated + to some high-redshift (global_params.INITIAL_REDSHIFT), then uses the zeldovich + approximation to move the grid "particles" onto the lower-res grid we use for the + maps. Then we recalculate the velocity fields on the perturbed grid. + */ + + int status; + Try{ // This Try{} wraps the whole function, so we don't indent. + + // Makes the parameter structs visible to a variety of functions/macros + // Do each time to avoid Python garbage collection issues + Broadcast_struct_global_noastro(user_params,cosmo_params); + + omp_set_num_threads(user_params->N_THREADS); + + fftwf_complex *HIRES_density_perturb, *HIRES_density_perturb_saved; + fftwf_complex *LOWRES_density_perturb, *LOWRES_density_perturb_saved; + + float growth_factor, displacement_factor_2LPT, init_growth_factor, init_displacement_factor_2LPT; + float mass_factor, dDdt, f_pixel_factor, velocity_displacement_factor, velocity_displacement_factor_2LPT; + int i, j, k, dimension, switch_mid; + + // Function for deciding the dimensions of loops when we could + // use either the low or high resolution grids. + switch(user_params->PERTURB_ON_HIGH_RES) { + case 0: + dimension = user_params->HII_DIM; + switch_mid = HII_MIDDLE; + break; + case 1: + dimension = user_params->DIM; + switch_mid = MIDDLE; + break; + } + + // *************** BEGIN INITIALIZATION ************************** // + + // perform a very rudimentary check to see if we are underresolved and not using the linear approx + if ((user_params->BOX_LEN > user_params->DIM) && !(global_params.EVOLVE_DENSITY_LINEARLY)){ + LOG_WARNING("Resolution is likely too low for accurate evolved density fields\n \ + It is recommended that you either increase the resolution (DIM/BOX_LEN) or set the EVOLVE_DENSITY_LINEARLY flag to 1\n"); + } + + growth_factor = dicke(redshift); + displacement_factor_2LPT = -(3.0/7.0) * growth_factor*growth_factor; // 2LPT eq. D8 + + dDdt = ddickedt(redshift); // time derivative of the growth factor (1/s) + init_growth_factor = dicke(global_params.INITIAL_REDSHIFT); + init_displacement_factor_2LPT = -(3.0/7.0) * init_growth_factor*init_growth_factor; // 2LPT eq. D8 + + // find factor of HII pixel size / deltax pixel size + f_pixel_factor = user_params->DIM/(float)(user_params->HII_DIM); + mass_factor = pow(f_pixel_factor, 3); + + // allocate memory for the updated density, and initialize + LOWRES_density_perturb = (fftwf_complex *) fftwf_malloc(sizeof(fftwf_complex)*HII_KSPACE_NUM_PIXELS); + LOWRES_density_perturb_saved = (fftwf_complex *) fftwf_malloc(sizeof(fftwf_complex)*HII_KSPACE_NUM_PIXELS); + + if(user_params->PERTURB_ON_HIGH_RES) { + HIRES_density_perturb = (fftwf_complex *) fftwf_malloc(sizeof(fftwf_complex)*KSPACE_NUM_PIXELS); + HIRES_density_perturb_saved = (fftwf_complex *) fftwf_malloc(sizeof(fftwf_complex)*KSPACE_NUM_PIXELS); + } + + double *resampled_box; + + //TODO: debugSummarizeIC is bugged when not all the fields are in memory + // debugSummarizeIC(boxes, user_params->HII_DIM, user_params->DIM, user_params->NON_CUBIC_FACTOR); + LOG_SUPER_DEBUG("growth_factor=%f, displacemet_factor_2LPT=%f, dDdt=%f, init_growth_factor=%f, init_displacement_factor_2LPT=%f, mass_factor=%f", + growth_factor, displacement_factor_2LPT, dDdt, init_growth_factor, init_displacement_factor_2LPT, mass_factor); + + // check if the linear evolution flag was set + if (global_params.EVOLVE_DENSITY_LINEARLY){ + + LOG_DEBUG("Linearly evolve density field"); + +#pragma omp parallel shared(growth_factor,boxes,LOWRES_density_perturb,HIRES_density_perturb,dimension) private(i,j,k) num_threads(user_params->N_THREADS) + { +#pragma omp for + for (i=0; iNON_CUBIC_FACTOR*dimension); k++){ + if(user_params->PERTURB_ON_HIGH_RES) { + *((float *)HIRES_density_perturb + R_FFT_INDEX(i,j,k)) = growth_factor*boxes->hires_density[R_INDEX(i,j,k)]; + } + else { + *((float *)LOWRES_density_perturb + HII_R_FFT_INDEX(i,j,k)) = growth_factor*boxes->lowres_density[HII_R_INDEX(i,j,k)]; + } + } + } + } + } + } + else { + // Apply Zel'dovich/2LPT correction + LOG_DEBUG("Apply Zel'dovich"); + +#pragma omp parallel shared(LOWRES_density_perturb,HIRES_density_perturb,dimension) private(i,j,k) num_threads(user_params->N_THREADS) + { +#pragma omp for + for (i=0; iNON_CUBIC_FACTOR*dimension); k++){ + if(user_params->PERTURB_ON_HIGH_RES) { + *((float *)HIRES_density_perturb + R_FFT_INDEX(i,j,k)) = 0.; + } + else { + *((float *)LOWRES_density_perturb + HII_R_FFT_INDEX(i,j,k)) = 0.; + } + + } + } + } + } + + velocity_displacement_factor = (growth_factor-init_growth_factor) / user_params->BOX_LEN; + + // now add the missing factor of D +#pragma omp parallel shared(boxes,velocity_displacement_factor,dimension) private(i,j,k) num_threads(user_params->N_THREADS) + { +#pragma omp for + for (i=0; iNON_CUBIC_FACTOR*dimension); k++){ + if(user_params->PERTURB_ON_HIGH_RES) { + boxes->hires_vx[R_INDEX(i,j,k)] *= velocity_displacement_factor; // this is now comoving displacement in units of box size + boxes->hires_vy[R_INDEX(i,j,k)] *= velocity_displacement_factor; // this is now comoving displacement in units of box size + boxes->hires_vz[R_INDEX(i,j,k)] *= (velocity_displacement_factor/user_params->NON_CUBIC_FACTOR); // this is now comoving displacement in units of box size + } + else { + boxes->lowres_vx[HII_R_INDEX(i,j,k)] *= velocity_displacement_factor; // this is now comoving displacement in units of box size + boxes->lowres_vy[HII_R_INDEX(i,j,k)] *= velocity_displacement_factor; // this is now comoving displacement in units of box size + boxes->lowres_vz[HII_R_INDEX(i,j,k)] *= (velocity_displacement_factor/user_params->NON_CUBIC_FACTOR); // this is now comoving displacement in units of box size + } + } + } + } + } + + // * ************************************************************************* * // + // * BEGIN 2LPT PART * // + // * ************************************************************************* * // + // reference: reference: Scoccimarro R., 1998, MNRAS, 299, 1097-1118 Appendix D + if(user_params->USE_2LPT){ + LOG_DEBUG("Apply 2LPT"); + + // allocate memory for the velocity boxes and read them in + velocity_displacement_factor_2LPT = (displacement_factor_2LPT - init_displacement_factor_2LPT) / user_params->BOX_LEN; + + // now add the missing factor in eq. D9 +#pragma omp parallel shared(boxes,velocity_displacement_factor_2LPT,dimension) private(i,j,k) num_threads(user_params->N_THREADS) + { +#pragma omp for + for (i=0; iNON_CUBIC_FACTOR*dimension); k++){ + if(user_params->PERTURB_ON_HIGH_RES) { + boxes->hires_vx_2LPT[R_INDEX(i,j,k)] *= velocity_displacement_factor_2LPT; // this is now comoving displacement in units of box size + boxes->hires_vy_2LPT[R_INDEX(i,j,k)] *= velocity_displacement_factor_2LPT; // this is now comoving displacement in units of box size + boxes->hires_vz_2LPT[R_INDEX(i,j,k)] *= (velocity_displacement_factor_2LPT/user_params->NON_CUBIC_FACTOR); // this is now comoving displacement in units of box size + } + else { + boxes->lowres_vx_2LPT[HII_R_INDEX(i,j,k)] *= velocity_displacement_factor_2LPT; // this is now comoving displacement in units of box size + boxes->lowres_vy_2LPT[HII_R_INDEX(i,j,k)] *= velocity_displacement_factor_2LPT; // this is now comoving displacement in units of box size + boxes->lowres_vz_2LPT[HII_R_INDEX(i,j,k)] *= (velocity_displacement_factor_2LPT/user_params->NON_CUBIC_FACTOR); // this is now comoving displacement in units of box size + } + } + } + } + } + } + + + // * ************************************************************************* * // + // * END 2LPT PART * // + // * ************************************************************************* * // + + // ************ END INITIALIZATION **************************** // + + + // Box shapes from outputs.py and convenience macros + if(user_params->PERTURB_ON_HIGH_RES) { + int num_pixels = TOT_NUM_PIXELS; + size_t size = TOT_NUM_PIXELS * sizeof(double); + } + else { + int num_pixels = HII_TOT_NUM_PIXELS; + size_t size = HII_TOT_NUM_PIXELS * sizeof(double); + } + + // Allocate device memory for output box + double* d_box; + cudaMalloc(&d_box, size); + cudaMemset(d_box, 0, sizeof(double) * size); + + // Allocate device memory for density field + float* hires_density; + cudaMalloc(&hires_density); + cudaMemcpy(hires_density, boxes->hires_density, size, cudaMemcpyHostToDevice); + + // Allocate device memory and copy arrays to device as per user_params + if (user_params->PERTURB_ON_HIGH_RES) { + float* hires_vx; + float* hires_vy; + float* hires_vz; + cudaMalloc(&hires_vx); + cudaMalloc(&hires_vy); + cudaMalloc(&hires_vz); + cudaMemcpy(hires_vx, boxes->hires_vx, size, cudaMemcpyHostToDevice); + cudaMemcpy(hires_vy, boxes->hires_vy, size, cudaMemcpyHostToDevice); + cudaMemcpy(hires_vz, boxes->hires_vz, size, cudaMemcpyHostToDevice); + } + else { + float* lowres_vx; + float* lowres_vy; + float* lowres_vz; + cudaMalloc(&lowres_vx); + cudaMalloc(&lowres_vy); + cudaMalloc(&lowres_vz); + cudaMemcpy(lowres_vx, boxes->lowres_vx, size, cudaMemcpyHostToDevice); + cudaMemcpy(lowres_vy, boxes->lowres_vy, size, cudaMemcpyHostToDevice); + cudaMemcpy(lowres_vz, boxes->lowres_vz, size, cudaMemcpyHostToDevice); + } + if (user_params->USE_2LPT) { + if (user_params->PERTURB_ON_HIGH_RES) { + float* hires_vx_2LPT; + float* hires_vy_2LPT; + float* hires_vz_2LPT; + cudaMalloc(&hires_vx_2LPT); + cudaMalloc(&hires_vy_2LPT); + cudaMalloc(&hires_vz_2LPT); + cudaMemcpy(hires_vx_2LPT, boxes->hires_vx_2LPT, size, cudaMemcpyHostToDevice); + cudaMemcpy(hires_vy_2LPT, boxes->hires_vy_2LPT, size, cudaMemcpyHostToDevice); + cudaMemcpy(hires_vz_2LPT, boxes->hires_vz_2LPT, size, cudaMemcpyHostToDevice); + } + else { + float* lowres_vx_2LPT; + float* lowres_vy_2LPT; + float* lowres_vz_2LPT; + cudaMalloc(&lowres_vx_2LPT); + cudaMalloc(&lowres_vy_2LPT); + cudaMalloc(&lowres_vz_2LPT); + cudaMemcpy(lowres_vx_2LPT, boxes->lowres_vx_2LPT, size, cudaMemcpyHostToDevice); + cudaMemcpy(lowres_vy_2LPT, boxes->lowres_vy_2LPT, size, cudaMemcpyHostToDevice); + cudaMemcpy(lowres_vz_2LPT, boxes->lowres_vz_2LPT, size, cudaMemcpyHostToDevice); + } + } + + // Invoke kernel + int threadsPerBlock = 256; + int numBlocks = (num_pixels + threadsPerBlock - 1) / threadsPerBlock; + perturb_density_field_kernel<<>>( + d_box, dimension, user_params->DIM, D_PARA, MID_PARA, user_params->NON_CUBIC_FACTOR, + f_pixel_factor, init_growth_factor, user_params->PERTURB_ON_HIGH_RES, user_params->USE_2LPT, + ); + + // Only use during development! + cudaError_t err = cudaDeviceSynchronize(); + CATCH_CUDA_ERROR(err); + err = cudaGetLastError(); + if (err != cudaSuccess) { + LOG_ERROR("Kernel launch error: %s", cudaGetErrorString(err)); + Throw(CudaError); // Or the appropriate exception type + } + + // Copy results from device to host + double *resampled_box; + // resampled_box = (double *)calloc(num_pixels, sizeof(double)); // is this needed? + cudaMemcpy(resampled_box, d_box, size, cudaMemcpyDeviceToHost); + + // Deallocate device memory + cudaFree(d_box); + cudaFree(hires_density); + + if (user_params->PERTURB_ON_HIGH_RES) { + cudaFree(hires_vx); + cudaFree(hires_vy); + cudaFree(hires_vz); + } + else { + cudaFree(lowres_vx); + cudaFree(lowres_vy); + cudaFree(lowres_vz); + } + if (user_params->USE_2LPT) { + if (user_params->PERTURB_ON_HIGH_RES) { + cudaFree(hires_vx_2LPT); + cudaFree(hires_vy_2LPT); + cudaFree(hires_vz_2LPT); + } + else { + cudaFree(lowres_vx_2LPT); + cudaFree(lowres_vy_2LPT); + cudaFree(lowres_vz_2LPT); + } + } + + LOG_SUPER_DEBUG("resampled_box: "); + debugSummarizeBoxDouble(resampled_box, dimension, user_params->NON_CUBIC_FACTOR, " "); + + // Resample back to a float for remaining algorithm + #pragma omp parallel \ + shared(LOWRES_density_perturb,HIRES_density_perturb,resampled_box,dimension) \ + private(i,j,k) \ + num_threads(user_params->N_THREADS) + { + #pragma omp for + for (i=0; iNON_CUBIC_FACTOR*dimension); k++){ + if(user_params->PERTURB_ON_HIGH_RES) { + *( (float *)HIRES_density_perturb + R_FFT_INDEX(i,j,k) ) = (float)resampled_box[R_INDEX(i,j,k)]; + } + else { + *( (float *)LOWRES_density_perturb + HII_R_FFT_INDEX(i,j,k) ) = (float)resampled_box[HII_R_INDEX(i,j,k)]; + } + } + } + } + } + free(resampled_box); + LOG_DEBUG("Finished perturbing the density field"); + + LOG_SUPER_DEBUG("density_perturb: "); + if(user_params->PERTURB_ON_HIGH_RES){ + debugSummarizeBoxComplex(HIRES_density_perturb, dimension, user_params->NON_CUBIC_FACTOR, " "); + }else{ + debugSummarizeBoxComplex(LOWRES_density_perturb, dimension, user_params->NON_CUBIC_FACTOR, " "); + } + + // deallocate +#pragma omp parallel shared(boxes,velocity_displacement_factor,dimension) private(i,j,k) num_threads(user_params->N_THREADS) + { +#pragma omp for + for (i=0; iNON_CUBIC_FACTOR*dimension); k++){ + if(user_params->PERTURB_ON_HIGH_RES) { + boxes->hires_vx[R_INDEX(i,j,k)] /= velocity_displacement_factor; // convert back to z = 0 quantity + boxes->hires_vy[R_INDEX(i,j,k)] /= velocity_displacement_factor; // convert back to z = 0 quantity + boxes->hires_vz[R_INDEX(i,j,k)] /= (velocity_displacement_factor/user_params->NON_CUBIC_FACTOR); // convert back to z = 0 quantity + } + else { + boxes->lowres_vx[HII_R_INDEX(i,j,k)] /= velocity_displacement_factor; // convert back to z = 0 quantity + boxes->lowres_vy[HII_R_INDEX(i,j,k)] /= velocity_displacement_factor; // convert back to z = 0 quantity + boxes->lowres_vz[HII_R_INDEX(i,j,k)] /= (velocity_displacement_factor/user_params->NON_CUBIC_FACTOR); // convert back to z = 0 quantity + } + } + } + } + } + + if(user_params->USE_2LPT){ +#pragma omp parallel shared(boxes,velocity_displacement_factor_2LPT,dimension) private(i,j,k) num_threads(user_params->N_THREADS) + { +#pragma omp for + for (i=0; iNON_CUBIC_FACTOR*dimension); k++){ + if(user_params->PERTURB_ON_HIGH_RES) { + boxes->hires_vx_2LPT[R_INDEX(i,j,k)] /= velocity_displacement_factor_2LPT; // convert back to z = 0 quantity + boxes->hires_vy_2LPT[R_INDEX(i,j,k)] /= velocity_displacement_factor_2LPT; // convert back to z = 0 quantity + boxes->hires_vz_2LPT[R_INDEX(i,j,k)] /= (velocity_displacement_factor_2LPT/user_params->NON_CUBIC_FACTOR); // convert back to z = 0 quantity + } + else { + boxes->lowres_vx_2LPT[HII_R_INDEX(i,j,k)] /= velocity_displacement_factor_2LPT; // convert back to z = 0 quantity + boxes->lowres_vy_2LPT[HII_R_INDEX(i,j,k)] /= velocity_displacement_factor_2LPT; // convert back to z = 0 quantity + boxes->lowres_vz_2LPT[HII_R_INDEX(i,j,k)] /= (velocity_displacement_factor_2LPT/user_params->NON_CUBIC_FACTOR); // convert back to z = 0 quantity + } + } + } + } + } + } + LOG_DEBUG("Cleanup velocities for perturb"); + } + + // Now, if I still have the high resolution density grid (HIRES_density_perturb) I need to downsample it to the low-resolution grid + if(user_params->PERTURB_ON_HIGH_RES) { + + LOG_DEBUG("Downsample the high-res perturbed density"); + + // Transform to Fourier space to sample (filter) the box + dft_r2c_cube(user_params->USE_FFTW_WISDOM, user_params->DIM, D_PARA, user_params->N_THREADS, HIRES_density_perturb); + + // Need to save a copy of the high-resolution unfiltered density field for the velocities + memcpy(HIRES_density_perturb_saved, HIRES_density_perturb, sizeof(fftwf_complex)*KSPACE_NUM_PIXELS); + + // Now filter the box + // ALICE: RES=0 (dimension=DIM, midpoint=MIDDLE), filter_type=0 (real space top-hat filtering) + if (user_params->DIM != user_params->HII_DIM) { + filter_box(HIRES_density_perturb, 0, 0, L_FACTOR*user_params->BOX_LEN/(user_params->HII_DIM+0.0)); + } + + // FFT back to real space + dft_c2r_cube(user_params->USE_FFTW_WISDOM, user_params->DIM, D_PARA, user_params->N_THREADS, HIRES_density_perturb); + + // Renormalise the FFT'd box +#pragma omp parallel shared(HIRES_density_perturb,LOWRES_density_perturb,f_pixel_factor,mass_factor) private(i,j,k) num_threads(user_params->N_THREADS) + { +#pragma omp for + for (i=0; iHII_DIM; i++){ + for (j=0; jHII_DIM; j++){ + for (k=0; kN_THREADS) + { +#pragma omp for + for (i=0; iHII_DIM; i++){ + for (j=0; jHII_DIM; j++){ + for (k=0; kHII_DIM, user_params->NON_CUBIC_FACTOR, " "); + + // transform to k-space + dft_r2c_cube(user_params->USE_FFTW_WISDOM, user_params->HII_DIM, HII_D_PARA, user_params->N_THREADS, LOWRES_density_perturb); + + // smooth the field + // ALICE: RES=1 (dimension=HII_DIM, midpoint=HII_MIDDLE), filter_type=2 (Gaussian filtering) + if (!global_params.EVOLVE_DENSITY_LINEARLY && global_params.SMOOTH_EVOLVED_DENSITY_FIELD){ + filter_box(LOWRES_density_perturb, 1, 2, global_params.R_smooth_density*user_params->BOX_LEN/(float)user_params->HII_DIM); + } + + LOG_SUPER_DEBUG("LOWRES_density_perturb after smoothing: "); + debugSummarizeBoxComplex(LOWRES_density_perturb, user_params->HII_DIM, user_params->NON_CUBIC_FACTOR, " "); + + // save a copy of the k-space density field + memcpy(LOWRES_density_perturb_saved, LOWRES_density_perturb, sizeof(fftwf_complex)*HII_KSPACE_NUM_PIXELS); + + dft_c2r_cube(user_params->USE_FFTW_WISDOM, user_params->HII_DIM, HII_D_PARA, user_params->N_THREADS, LOWRES_density_perturb); + + LOG_SUPER_DEBUG("LOWRES_density_perturb back in real space: "); + debugSummarizeBoxComplex(LOWRES_density_perturb, user_params->HII_DIM, user_params->NON_CUBIC_FACTOR, " "); + + // normalize after FFT + // ALICE: divide by total pixels; if result < -1 changed it to just above -1. + int bad_count=0; +#pragma omp parallel shared(LOWRES_density_perturb) private(i,j,k) num_threads(user_params->N_THREADS) reduction(+: bad_count) + { +#pragma omp for + for(i=0; iHII_DIM; i++){ + for(j=0; jHII_DIM; j++){ + for(k=0; k=5) LOG_WARNING("Total number of bad indices for LOW_density_perturb: %d", bad_count); + LOG_SUPER_DEBUG("LOWRES_density_perturb back in real space (normalized): "); + debugSummarizeBoxComplex(LOWRES_density_perturb, user_params->HII_DIM, user_params->NON_CUBIC_FACTOR, " "); + +// ALICE: copy LOWRES_density_perturb cell values to density cells +#pragma omp parallel shared(perturbed_field,LOWRES_density_perturb) private(i,j,k) num_threads(user_params->N_THREADS) + { +#pragma omp for + for (i=0; iHII_DIM; i++){ + for (j=0; jHII_DIM; j++){ + for (k=0; kdensity + HII_R_INDEX(i,j,k)) = *((float *)LOWRES_density_perturb + HII_R_FFT_INDEX(i,j,k)); + } + } + } + } + + // **** Convert to velocities ***** // + LOG_DEBUG("Generate velocity fields"); + + float dDdt_over_D; + + dDdt_over_D = dDdt/growth_factor; + + + if (user_params->KEEP_3D_VELOCITIES){ + compute_perturbed_velocities( + 0, + user_params, + HIRES_density_perturb, + HIRES_density_perturb_saved, + LOWRES_density_perturb, + LOWRES_density_perturb_saved, + dDdt_over_D, + dimension, + switch_mid, + f_pixel_factor, + perturbed_field->velocity_x + ); + compute_perturbed_velocities( + 1, + user_params, + HIRES_density_perturb, + HIRES_density_perturb_saved, + LOWRES_density_perturb, + LOWRES_density_perturb_saved, + dDdt_over_D, + dimension, + switch_mid, + f_pixel_factor, + perturbed_field->velocity_y + ); + } + + compute_perturbed_velocities( + 2, + user_params, + HIRES_density_perturb, + HIRES_density_perturb_saved, + LOWRES_density_perturb, + LOWRES_density_perturb_saved, + dDdt_over_D, + dimension, + switch_mid, + f_pixel_factor, + perturbed_field->velocity_z + ); + + fftwf_cleanup_threads(); + fftwf_cleanup(); + fftwf_forget_wisdom(); + + // deallocate + fftwf_free(LOWRES_density_perturb); + fftwf_free(LOWRES_density_perturb_saved); + if(user_params->PERTURB_ON_HIGH_RES) { + fftwf_free(HIRES_density_perturb); + fftwf_free(HIRES_density_perturb_saved); + } + fftwf_cleanup(); + + } // End of Try{} + Catch(status){ + return(status); + } + + return(0); +} diff --git a/src/py21cmfast/src/PerturbField.h b/src/py21cmfast/src/PerturbField.h index 50550aec1..718906ecd 100644 --- a/src/py21cmfast/src/PerturbField.h +++ b/src/py21cmfast/src/PerturbField.h @@ -11,6 +11,14 @@ int ComputePerturbField( float redshift, UserParams *user_params, CosmoParams *cosmo_params, InitialConditions *boxes, PerturbedField *perturbed_field ); +int ComputePerturbField_cpu( + float redshift, UserParams *user_params, CosmoParams *cosmo_params, + InitialConditions *boxes, PerturbedField *perturbed_field +); +int ComputePerturbField_gpu( + float redshift, UserParams *user_params, CosmoParams *cosmo_params, + InitialConditions *boxes, PerturbedField *perturbed_field +); #ifdef __cplusplus } From a1384c89f9da17d8b7c2c51a19dfa22caa3a6b63 Mon Sep 17 00:00:00 2001 From: alserene Date: Mon, 21 Oct 2024 19:10:48 +1100 Subject: [PATCH 005/145] Add TODO comments. --- src/py21cmfast/src/filtering.c | 6 ++++++ src/py21cmfast/src/filtering.cu | 17 +++++++++++------ 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/src/py21cmfast/src/filtering.c b/src/py21cmfast/src/filtering.c index e8afd337d..de2551eae 100644 --- a/src/py21cmfast/src/filtering.c +++ b/src/py21cmfast/src/filtering.c @@ -160,6 +160,12 @@ void filter_box(fftwf_complex *box, int RES, int filter_type, float R, float R_p // Call the CPU version filter_box_cpu(box, RES, filter_type, R, R_param); } + // switch(GPU) { + // case 0: + // filter_box_cpu(box, RES, filter_type, R, R_param); + // case 1: + // filter_box_gpu(box, RES, filter_type, R, R_param); + // } } //Test function to filter a box without computing a whole output box diff --git a/src/py21cmfast/src/filtering.cu b/src/py21cmfast/src/filtering.cu index d2c19e6be..1c42e0e5b 100644 --- a/src/py21cmfast/src/filtering.cu +++ b/src/py21cmfast/src/filtering.cu @@ -74,11 +74,14 @@ __device__ inline double spherical_shell_filter(double k, double R_outer, double } // __global__ void filter_box_kernel(fftwf_complex *box, int dimension, int midpoint, int midpoint_para, double delta_k, float R, float R_param, double R_const, int filter_type) { -__global__ void filter_box_kernel(cuFloatComplex *box, int dimension, int midpoint, int midpoint_para, double delta_k, float R, float R_param, double R_const, int filter_type) { +__global__ void filter_box_kernel(cuFloatComplex *box, size_t size, int dimension, int midpoint, int midpoint_para, double delta_k, float R, float R_param, double R_const, int filter_type) { // Get index of box (flattened k-box) unsigned long long idx = blockIdx.x * blockDim.x + threadIdx.x; + // TODO: Add bound check (in case number of threads != multiple of block size) + // while (idx < size) { + // Compute the 3D indices (n_x, n_y, n_z) for the k-box from the flattened index (idx) // Based on convenience macros in indexing.h int n_z = idx % (midpoint_para + 1); @@ -86,11 +89,17 @@ __global__ void filter_box_kernel(cuFloatComplex *box, int dimension, int midpoi int n_y = remaining % dimension; int n_x = remaining / dimension; + // TODO: Alternative + // array(int) cell_coords = (idx % (midpoint_para + 1), (idx / (midpoint_para + 1)) % dimension, (idx / (midpoint_para + 1)) / dimension) + // Compute wave vector components float k_x = (n_x - dimension * (n_x > midpoint)) * delta_k; // Wrap around midpoint float k_y = (n_y - dimension * (n_y > midpoint)) * delta_k; float k_z = n_z * delta_k; + // TODO: Alternative + // (as above and * delta_k to vector at end) + // Compute squared magnitude of wave vector float k_mag_sq = k_x*k_x + k_y*k_y + k_z*k_z; @@ -127,10 +136,6 @@ __global__ void filter_box_kernel(cuFloatComplex *box, int dimension, int midpoi } -// *box is a pointer, so only memory address is passed, not entire array -// #ifdef __cplusplus -// extern "C" -// #endif void filter_box_gpu(fftwf_complex *box, int RES, int filter_type, float R, float R_param) { // Get required values @@ -172,7 +177,7 @@ void filter_box_gpu(fftwf_complex *box, int RES, int filter_type, float R, float // Invoke kernel int threadsPerBlock = 256; int numBlocks = (num_pixels + threadsPerBlock - 1) / threadsPerBlock; - filter_box_kernel<<>>(reinterpret_cast(d_box), dimension, midpoint, midpoint_para, delta_k, R, R_param, R_const, filter_type); + filter_box_kernel<<>>(reinterpret_cast(d_box), size, dimension, midpoint, midpoint_para, delta_k, R, R_param, R_const, filter_type); // filter_box_kernel<<>>((cuFloatComplex *)d_box, dimension, midpoint, midpoint_para, delta_k, R, R_param, R_const, filter_type); // Only use during development! From 641620132696d5f81bafb067afe48b698f24503d Mon Sep 17 00:00:00 2001 From: alserene Date: Tue, 22 Oct 2024 12:18:18 +1100 Subject: [PATCH 006/145] Add array handling. --- src/py21cmfast/src/PerturbField.cu | 91 ++++++++++++++++++++---------- 1 file changed, 60 insertions(+), 31 deletions(-) diff --git a/src/py21cmfast/src/PerturbField.cu b/src/py21cmfast/src/PerturbField.cu index 92bd2e4bd..5c96967a4 100644 --- a/src/py21cmfast/src/PerturbField.cu +++ b/src/py21cmfast/src/PerturbField.cu @@ -166,16 +166,34 @@ void compute_perturbed_velocities( } +// ---------------------------------------------------------------------------------------------------------------------------- + +// #define R_INDEX(x,y,z)((unsigned long long)((z)+D_PARA*((y)+D*(x)))) __device__ inline double compute_R_INDEX(int i, int j, int k, int DIM, int D_PARA) { return k + D_PARA * (j + DIM * i) } -__device__ inline double compute_HII_R_INDEX(int i, int j, int k, int DIM, int MID_PARA) { - return k + 2 * (MID_PARA + 1) * (j + DIM * i) +// #define HII_R_INDEX(x,y,z)((unsigned long long)((z)+HII_D_PARA*((y)+HII_D*(x)))) +__device__ inline double compute_HII_R_INDEX(int i, int j, int k, int HII_D_PARA, int HII_D) { + return k + HII_D_PARA * (j + HII_D * i) } __global__ void perturb_density_field_kernel( - double *resampled_box, int dimension, int DIM, int D_PARA, int MID_PARA, + double *resampled_box, + const float* __restrict__ hires_density, // Is const needed as well as __restrict__? + const float* __restrict__ hires_vx, + const float* __restrict__ hires_vy, + const float* __restrict__ hires_vz, + const float* __restrict__ lowres_vx, + const float* __restrict__ lowres_vy, + const float* __restrict__ lowres_vz, + const float* __restrict__ hires_vx_2LPT, + const float* __restrict__ hires_vy_2LPT, + const float* __restrict__ hires_vz_2LPT, + const float* __restrict__ lowres_vx_2LPT, + const float* __restrict__ lowres_vy_2LPT, + const float* __restrict__ lowres_vz_2LPT, + int dimension, int DIM, int D_PARA, int MID_PARA, int NON_CUBIC_FACTOR, float f_pixel_factor, float init_growth_factor, bool PERTURB_ON_HIGH_RES, bool USE_2LPT, ) { @@ -186,6 +204,8 @@ __global__ void perturb_density_field_kernel( int i = idx / (D_PARA * DIM); int j = (idx / D_PARA) % DIM; int k = idx % D_PARA; + + int r_index = compute_R_INDEX(i, j, k); // Map index to location in units of box size float xf = (i + 0.5) / DIM; @@ -194,30 +214,31 @@ __global__ void perturb_density_field_kernel( // Update locations if (PERTURB_ON_HIGH_RES) { - xf += hires_vx[compute_R_INDEX(i, j, k)]; - yf += hires_vy[compute_R_INDEX(i, j, k)]; - zf += hires_vz[compute_R_INDEX(i, j, k)]; + xf += __ldg(&hires_vx[r_index]); + yf += __ldg(&hires_vy[r_index]); + zf += __ldg(&hires_vz[r_index]); } else { unsigned long long HII_i = (unsigned long long)(i / f_pixel_factor); unsigned long long HII_j = (unsigned long long)(j / f_pixel_factor); unsigned long long HII_k = (unsigned long long)(k / f_pixel_factor); - xf += lowres_vx[compute_HII_R_INDEX(HII_i, HII_j, HII_k)]; - yf += lowres_vy[compute_HII_R_INDEX(HII_i, HII_j, HII_k)]; - zf += lowres_vz[compute_HII_R_INDEX(HII_i, HII_j, HII_k)]; + int HII_index = compute_HII_R_INDEX(HII_i, HII_j, HII_k, HII_D_PARA, HII_D); + xf += __ldg(&lowres_vx[HII_index]); + yf += __ldg(&lowres_vy[HII_index]); + zf += __ldg(&lowres_vz[HII_index]); } // 2LPT (add second order corrections) if (USE_2LPT) { if (PERTURB_ON_HIGH_RES) { - xf -= hires_vx_2LPT[compute_R_INDEX(i, j, k)]; - yf -= hires_vy_2LP[compute_R_INDEX(i, j, k)]; - zf -= hires_vz_2LPT[compute_R_INDEX(i, j, k)]; + xf -= __ldg(&hires_vx_2LPT[r_index]); + yf -= __ldg(&hires_vy_2LP[r_index]); + zf -= __ldg(&hires_vz_2LPT[r_index]); } else { - xf -= lowres_vx_2LPT[compute_HII_R_INDEX(HII_i, HII_j, HII_k)]; - yf -= lowres_vy_2LPT[compute_HII_R_INDEX(HII_i, HII_j, HII_k)]; - zf -= lowres_vz_2LPT[compute_HII_R_INDEX(HII_i, HII_j, HII_k)]; + xf -= __ldg(&lowres_vx_2LPT[HII_index]); + yf -= __ldg(&lowres_vy_2LPT[HII_index]); + zf -= __ldg(&lowres_vz_2LPT[HII_index]); } } @@ -278,30 +299,34 @@ __global__ void perturb_density_field_kernel( int yp1 = (yi + 1) % dimension; int zp1 = (zi + 1) % (unsigned long long)(NON_CUBIC_FACTOR * dimension); + double scaled_density = 1 + init_growth_factor * __ldg(&hires_density[r_index]); + if (PERTURB_ON_HIGH_RES) { // Redistribute the mass over the 8 neighbouring cells according to cloud in cell // Cell mass = (1 + init_growth_factor * orig_density) * (proportion of mass to distribute) - atomicAdd(&resampled_box[compute_R_INDEX(xi, yi, zi)], (double)(1 + init_growth_factor * hires_density[compute_R_INDEX(i, j, k)]) * t_x * t_y * t_z); - atomicAdd(&resampled_box[compute_R_INDEX(xp1, yi, zi)], (double)(1 + init_growth_factor * hires_density[compute_R_INDEX(i, j, k)]) * d_x * t_y * t_z); - atomicAdd(&resampled_box[compute_R_INDEX(xi, yp1, zi)], (double)(1 + init_growth_factor * hires_density[compute_R_INDEX(i, j, k)]) * t_x * d_y * t_z); - atomicAdd(&resampled_box[compute_R_INDEX(xp1, yp1, zi)], (double)(1 + init_growth_factor * hires_density[compute_R_INDEX(i, j, k)]) * d_x * d_y * t_z); - atomicAdd(&resampled_box[compute_R_INDEX(xi, yi, zp1)], (double)(1 + init_growth_factor * hires_density[compute_R_INDEX(i, j, k)]) * t_x * t_y * d_z); - atomicAdd(&resampled_box[compute_R_INDEX(xp1, yi, zp1)], (double)(1 + init_growth_factor * hires_density[compute_R_INDEX(i, j, k)]) * d_x * t_y * d_z); - atomicAdd(&resampled_box[compute_R_INDEX(xi, yp1, zp1)], (double)(1 + init_growth_factor * hires_density[compute_R_INDEX(i, j, k)]) * t_x * d_y * d_z); - atomicAdd(&resampled_box[compute_R_INDEX(xp1, yp1, zp1)], (double)(1 + init_growth_factor * hires_density[compute_R_INDEX(i, j, k)]) * d_x * d_y * d_z); + atomicAdd(&resampled_box[compute_R_INDEX(xi, yi, zi)], scaled_density * t_x * t_y * t_z); + atomicAdd(&resampled_box[compute_R_INDEX(xp1, yi, zi)], scaled_density * d_x * t_y * t_z); + atomicAdd(&resampled_box[compute_R_INDEX(xi, yp1, zi)], scaled_density * t_x * d_y * t_z); + atomicAdd(&resampled_box[compute_R_INDEX(xp1, yp1, zi)], scaled_density * d_x * d_y * t_z); + atomicAdd(&resampled_box[compute_R_INDEX(xi, yi, zp1)], scaled_density * t_x * t_y * d_z); + atomicAdd(&resampled_box[compute_R_INDEX(xp1, yi, zp1)], scaled_density * d_x * t_y * d_z); + atomicAdd(&resampled_box[compute_R_INDEX(xi, yp1, zp1)], scaled_density * t_x * d_y * d_z); + atomicAdd(&resampled_box[compute_R_INDEX(xp1, yp1, zp1)], scaled_density * d_x * d_y * d_z); } else { - atomicAdd(&resampled_box[compute_HII_R_INDEX(xi, yi, zi)], (double)(1 + init_growth_factor * hires_density[compute_R_INDEX(i, j, k)]) * t_x * t_y * t_z); - atomicAdd(&resampled_box[compute_HII_R_INDEX(xp1, yi, zi)], (double)(1 + init_growth_factor * hires_density[compute_R_INDEX(i, j, k)]) * d_x * t_y * t_z); - atomicAdd(&resampled_box[compute_HII_R_INDEX(xi, yp1, zi)], (double)(1 + init_growth_factor * hires_density[compute_R_INDEX(i, j, k)]) * t_x * d_y * t_z); - atomicAdd(&resampled_box[compute_HII_R_INDEX(xp1, yp1, zi)], (double)(1 + init_growth_factor * hires_density[compute_R_INDEX(i, j, k)]) * d_x * d_y * t_z); - atomicAdd(&resampled_box[compute_HII_R_INDEX(xi, yi, zp1)], (double)(1 + init_growth_factor * hires_density[compute_R_INDEX(i, j, k)]) * t_x * t_y * d_z); - atomicAdd(&resampled_box[compute_HII_R_INDEX(xp1, yi, zp1)], (double)(1 + init_growth_factor * hires_density[compute_R_INDEX(i, j, k)]) * d_x * t_y * d_z); - atomicAdd(&resampled_box[compute_HII_R_INDEX(xi, yp1, zp1)], (double)(1 + init_growth_factor * hires_density[compute_R_INDEX(i, j, k)]) * t_x * d_y * d_z); - atomicAdd(&resampled_box[compute_HII_R_INDEX(xp1, yp1, zp1)], (double)(1 + init_growth_factor * hires_density[compute_R_INDEX(i, j, k)]) * d_x * d_y * d_z); + atomicAdd(&resampled_box[compute_HII_R_INDEX(xi, yi, zi)], scaled_density * t_x * t_y * t_z); + atomicAdd(&resampled_box[compute_HII_R_INDEX(xp1, yi, zi)], scaled_density * d_x * t_y * t_z); + atomicAdd(&resampled_box[compute_HII_R_INDEX(xi, yp1, zi)], scaled_density * t_x * d_y * t_z); + atomicAdd(&resampled_box[compute_HII_R_INDEX(xp1, yp1, zi)], scaled_density * d_x * d_y * t_z); + atomicAdd(&resampled_box[compute_HII_R_INDEX(xi, yi, zp1)], scaled_density * t_x * t_y * d_z); + atomicAdd(&resampled_box[compute_HII_R_INDEX(xp1, yi, zp1)], scaled_density * d_x * t_y * d_z); + atomicAdd(&resampled_box[compute_HII_R_INDEX(xi, yp1, zp1)], scaled_density * t_x * d_y * d_z); + atomicAdd(&resampled_box[compute_HII_R_INDEX(xp1, yp1, zp1)], scaled_density * d_x * d_y * d_z); } } +// ------------------------------------------------------------------------------------------------------------------------------------------------------------------ + int ComputePerturbField_gpu( float redshift, UserParams *user_params, CosmoParams *cosmo_params, InitialConditions *boxes, PerturbedField *perturbed_field @@ -487,6 +512,8 @@ int ComputePerturbField_gpu( // ************ END INITIALIZATION **************************** // + // ---------------------------------------------------------------------------------------------------------------------------- + // Box shapes from outputs.py and convenience macros if(user_params->PERTURB_ON_HIGH_RES) { int num_pixels = TOT_NUM_PIXELS; @@ -604,6 +631,8 @@ int ComputePerturbField_gpu( } } + // ---------------------------------------------------------------------------------------------------------------------------- + LOG_SUPER_DEBUG("resampled_box: "); debugSummarizeBoxDouble(resampled_box, dimension, user_params->NON_CUBIC_FACTOR, " "); From 6fdfdd222db116961c6a89670e5e3ac46ed48f57 Mon Sep 17 00:00:00 2001 From: alserene Date: Tue, 22 Oct 2024 15:29:23 +1100 Subject: [PATCH 007/145] Remove comment. --- src/py21cmfast/src/filtering.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/py21cmfast/src/filtering.cu b/src/py21cmfast/src/filtering.cu index 1c42e0e5b..d1ac81ed3 100644 --- a/src/py21cmfast/src/filtering.cu +++ b/src/py21cmfast/src/filtering.cu @@ -190,7 +190,7 @@ void filter_box_gpu(fftwf_complex *box, int RES, int filter_type, float R, float err = cudaGetLastError(); if (err != cudaSuccess) { LOG_ERROR("Kernel launch error: %s", cudaGetErrorString(err)); - Throw(CudaError); // Or the appropriate exception type + Throw(CudaError); } // Copy results from device to host From 24d5509c45d717a3636ed842738fa7eef6b8556f Mon Sep 17 00:00:00 2001 From: alserene Date: Tue, 22 Oct 2024 15:29:41 +1100 Subject: [PATCH 008/145] Tidy types. --- src/py21cmfast/src/PerturbField.cu | 46 +++++++++++++++++++----------- 1 file changed, 29 insertions(+), 17 deletions(-) diff --git a/src/py21cmfast/src/PerturbField.cu b/src/py21cmfast/src/PerturbField.cu index 5c96967a4..e2ff0af25 100644 --- a/src/py21cmfast/src/PerturbField.cu +++ b/src/py21cmfast/src/PerturbField.cu @@ -169,12 +169,12 @@ void compute_perturbed_velocities( // ---------------------------------------------------------------------------------------------------------------------------- // #define R_INDEX(x,y,z)((unsigned long long)((z)+D_PARA*((y)+D*(x)))) -__device__ inline double compute_R_INDEX(int i, int j, int k, int DIM, int D_PARA) { +__device__ inline unsigned long long compute_R_INDEX(int i, int j, int k, int DIM, int D_PARA) { return k + D_PARA * (j + DIM * i) } // #define HII_R_INDEX(x,y,z)((unsigned long long)((z)+HII_D_PARA*((y)+HII_D*(x)))) -__device__ inline double compute_HII_R_INDEX(int i, int j, int k, int HII_D_PARA, int HII_D) { +__device__ inline unsigned long long compute_HII_R_INDEX(int i, int j, int k, int HII_D_PARA, int HII_D) { return k + HII_D_PARA * (j + HII_D * i) } @@ -205,12 +205,12 @@ __global__ void perturb_density_field_kernel( int j = (idx / D_PARA) % DIM; int k = idx % D_PARA; - int r_index = compute_R_INDEX(i, j, k); + unsigned long long r_index = compute_R_INDEX(i, j, k); // Map index to location in units of box size - float xf = (i + 0.5) / DIM; - float yf = (j + 0.5) / DIM; - float zf = (k + 0.5) / D_PARA; + double xf = (i + 0.5) / DIM; + double yf = (j + 0.5) / DIM; + double zf = (k + 0.5) / D_PARA; // Update locations if (PERTURB_ON_HIGH_RES) { @@ -222,7 +222,7 @@ __global__ void perturb_density_field_kernel( unsigned long long HII_i = (unsigned long long)(i / f_pixel_factor); unsigned long long HII_j = (unsigned long long)(j / f_pixel_factor); unsigned long long HII_k = (unsigned long long)(k / f_pixel_factor); - int HII_index = compute_HII_R_INDEX(HII_i, HII_j, HII_k, HII_D_PARA, HII_D); + unsigned long long HII_index = compute_HII_R_INDEX(HII_i, HII_j, HII_k, HII_D_PARA, HII_D); xf += __ldg(&lowres_vx[HII_index]); yf += __ldg(&lowres_vy[HII_index]); zf += __ldg(&lowres_vz[HII_index]); @@ -242,17 +242,27 @@ __global__ void perturb_density_field_kernel( } } + // TODO: shared between threads? + // Convert once to reduce overhead of multiple casts + double dimension_double = (double)(dimension); + double dimension_factored_double = dimension_double * (double)(NON_CUBIC_FACTOR); + int dimension_factored = dimension * NON_CUBIC_FACTOR; + // Scale coordinates back to grid size - xf *= (double)(dimension); - yf *= (double)(dimension); - zf *= (double)((unsigned long long)(NON_CUBIC_FACTOR * dimension)); + xf *= dimension_double; + yf *= dimension_double; + zf *= dimension_factored_double; // Wrap coordinates to keep them within valid boundaries - xf = fmod(fmod(xf, dimension) + dimension, dimension); - yf = fmod(fmod(yf, dimension) + dimension, dimension); - zf = fmod(fmod(zf, dimension * NCF) + dimension * NCF, dimension * NCF); + xf = fmod(fmod(xf, dimension_double) + dimension_double, dimension_double); + yf = fmod(fmod(yf, dimension_double) + dimension_double, dimension_double); + zf = fmod(fmod(zf, dimension_factored_double) + dimension_factored_double, dimension_factored_double); + + // FROM NVIDIA DOCS: + // __device__ doublenearbyint(double x) // Round the input argument to the nearest integer. + // There are SO many double-to-int conversion intrinsics. How to know if should use any? - // Get integer values for indices from floating point values + // Get integer values for indices from double precision values int xi = xf; int yi = yf; int zi = zf; @@ -260,7 +270,7 @@ __global__ void perturb_density_field_kernel( // Wrap index coordinates to ensure no out-of-bounds array access will be attempted xi = (xi % dimension + dimension) % dimension; yi = (yi % dimension + dimension) % dimension; - zi = (zi % dimension * NCF + dimension * NCF) % dimension * NCF; + zi = (zi % dimension_factored + dimension_factored) % dimension_factored; // Determine the fraction of the perturbed cell which overlaps with the 8 nearest grid cells, // based on the grid cell which contains the centre of the perturbed cell @@ -586,7 +596,9 @@ int ComputePerturbField_gpu( int threadsPerBlock = 256; int numBlocks = (num_pixels + threadsPerBlock - 1) / threadsPerBlock; perturb_density_field_kernel<<>>( - d_box, dimension, user_params->DIM, D_PARA, MID_PARA, user_params->NON_CUBIC_FACTOR, + d_box, hires_density, hires_vx, hires_vy, hires_vz, lowres_vx, lowres_vy, lowres_vz, + hires_vx_2LPT, hires_vy_2LPT, hires_vz_2LPT, lowres_vx_2LPT, lowres_vy_2LPT, lowres_vz_2LPT + dimension, user_params->DIM, D_PARA, MID_PARA, user_params->NON_CUBIC_FACTOR, f_pixel_factor, init_growth_factor, user_params->PERTURB_ON_HIGH_RES, user_params->USE_2LPT, ); @@ -596,7 +608,7 @@ int ComputePerturbField_gpu( err = cudaGetLastError(); if (err != cudaSuccess) { LOG_ERROR("Kernel launch error: %s", cudaGetErrorString(err)); - Throw(CudaError); // Or the appropriate exception type + Throw(CudaError); } // Copy results from device to host From f171e236d0a75f974021b4b145719585ebddc364 Mon Sep 17 00:00:00 2001 From: alserene Date: Wed, 23 Oct 2024 10:05:25 +1100 Subject: [PATCH 009/145] Remove outdated comment. --- src/py21cmfast/src/filtering.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/src/py21cmfast/src/filtering.c b/src/py21cmfast/src/filtering.c index de2551eae..0d65c2494 100644 --- a/src/py21cmfast/src/filtering.c +++ b/src/py21cmfast/src/filtering.c @@ -1,7 +1,3 @@ - -//filter_box, filter_box_annulus and filter_box_mfp should be combined in a better way, they require different inputs -//and they are run on different subsets of the boxes but they contain a lot of the same math - #include #include #include @@ -157,15 +153,8 @@ void filter_box(fftwf_complex *box, int RES, int filter_type, float R, float R_p if (1) { filter_box_gpu(box, RES, filter_type, R, R_param); } else { - // Call the CPU version filter_box_cpu(box, RES, filter_type, R, R_param); } - // switch(GPU) { - // case 0: - // filter_box_cpu(box, RES, filter_type, R, R_param); - // case 1: - // filter_box_gpu(box, RES, filter_type, R, R_param); - // } } //Test function to filter a box without computing a whole output box From 3d5366258846112a35ea3e0f8efafb8f9fa97759 Mon Sep 17 00:00:00 2001 From: alserene Date: Wed, 23 Oct 2024 10:05:37 +1100 Subject: [PATCH 010/145] Tidy code. --- src/py21cmfast/src/filtering.cu | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/src/py21cmfast/src/filtering.cu b/src/py21cmfast/src/filtering.cu index d1ac81ed3..90a99d0ac 100644 --- a/src/py21cmfast/src/filtering.cu +++ b/src/py21cmfast/src/filtering.cu @@ -79,26 +79,24 @@ __global__ void filter_box_kernel(cuFloatComplex *box, size_t size, int dimensio // Get index of box (flattened k-box) unsigned long long idx = blockIdx.x * blockDim.x + threadIdx.x; - // TODO: Add bound check (in case number of threads != multiple of block size) + // TODO: Do we need a bound check? (in case number of threads != multiple of block size) // while (idx < size) { // Compute the 3D indices (n_x, n_y, n_z) for the k-box from the flattened index (idx) // Based on convenience macros in indexing.h int n_z = idx % (midpoint_para + 1); - unsigned long long remaining = idx / (midpoint_para + 1); // Calculate remaining index + unsigned long long remaining = idx / (midpoint_para + 1); int n_y = remaining % dimension; int n_x = remaining / dimension; - // TODO: Alternative - // array(int) cell_coords = (idx % (midpoint_para + 1), (idx / (midpoint_para + 1)) % dimension, (idx / (midpoint_para + 1)) / dimension) - // Compute wave vector components float k_x = (n_x - dimension * (n_x > midpoint)) * delta_k; // Wrap around midpoint float k_y = (n_y - dimension * (n_y > midpoint)) * delta_k; float k_z = n_z * delta_k; - // TODO: Alternative - // (as above and * delta_k to vector at end) + // TODO: Try alternative vectorised coords & wave vector components? + // int *cell_coords = (int[]) {idx % (midpoint_para + 1), (idx / (midpoint_para + 1)) % dimension, (idx / (midpoint_para + 1)) / dimension)}; // (as above and * delta_k to vector at end) + // int *wave_vector = (float[]) { ... } // Compute squared magnitude of wave vector float k_mag_sq = k_x*k_x + k_y*k_y + k_z*k_z; @@ -143,14 +141,14 @@ void filter_box_gpu(fftwf_complex *box, int RES, int filter_type, float R, float switch(RES) { case 0: dimension = user_params_global->DIM; - midpoint = MIDDLE; // DIM / 2 - midpoint_para = MID_PARA; // NON_CUBIC_FACTOR * HII_DIM / 2 + midpoint = MIDDLE; // midpoint of x,y = DIM / 2 + midpoint_para = MID_PARA; // midpoint of z = NON_CUBIC_FACTOR * HII_DIM / 2 num_pixels = KSPACE_NUM_PIXELS; break; case 1: dimension = user_params_global->HII_DIM; - midpoint = HII_MIDDLE; // HII_DIM / 2 - midpoint_para = HII_MID_PARA; // NON_CUBIC_FACTOR * HII_DIM / 2 + midpoint = HII_MIDDLE; // midpoint of x,y = HII_DIM / 2 + midpoint_para = HII_MID_PARA; // midpoint of z = NON_CUBIC_FACTOR * HII_DIM / 2 num_pixels = HII_KSPACE_NUM_PIXELS; break; default: @@ -219,13 +217,11 @@ int test_filter_gpu(UserParams *user_params, CosmoParams *cosmo_params, AstroPar dft_r2c_cube(user_params->USE_FFTW_WISDOM, user_params->HII_DIM, HII_D_PARA, user_params->N_THREADS, box_unfiltered); + // Convert to CUDA complex type cuFloatComplex* box_unfiltered_cu = reinterpret_cast(box_unfiltered); - // float num_pixels = HII_TOT_NUM_PIXELS; for(ii=0;ii Date: Thu, 24 Oct 2024 10:37:03 +1100 Subject: [PATCH 011/145] Minor changes to resolve error messages. --- src/py21cmfast/src/PerturbField.c | 6 - src/py21cmfast/src/PerturbField.cu | 196 ++++++++++++++++------------- src/py21cmfast/src/PerturbField.h | 2 + 3 files changed, 113 insertions(+), 91 deletions(-) diff --git a/src/py21cmfast/src/PerturbField.c b/src/py21cmfast/src/PerturbField.c index 499199d67..b4886b2da 100644 --- a/src/py21cmfast/src/PerturbField.c +++ b/src/py21cmfast/src/PerturbField.c @@ -780,10 +780,4 @@ int ComputePerturbField( } else { ComputePerturbField_cpu(redshift, user_params, cosmo_params, boxes, perturbed_field) } - // switch(GPU) { - // case 0: - // filter_box_cpu(box, RES, filter_type, R, R_param); - // case 1: - // filter_box_gpu(box, RES, filter_type, R, R_param); - // } } diff --git a/src/py21cmfast/src/PerturbField.cu b/src/py21cmfast/src/PerturbField.cu index e2ff0af25..4476f3ae8 100644 --- a/src/py21cmfast/src/PerturbField.cu +++ b/src/py21cmfast/src/PerturbField.cu @@ -6,6 +6,11 @@ #include #include +// GPU +#include +#include +// #include + #include "cexcept.h" #include "exceptions.h" #include "logger.h" @@ -94,18 +99,28 @@ void compute_perturbed_velocities( // now set the velocities if ((n_x==0) && (n_y==0) && (n_z==0)) { // DC mode if(user_params->PERTURB_ON_HIGH_RES) { - HIRES_density_perturb[0] = 0; + // HIRES_density_perturb[0] = 0; + HIRES_density_perturb[0][0] = 0.; + HIRES_density_perturb[0][1] = 0.; } else { - LOWRES_density_perturb[0] = 0; + // LOWRES_density_perturb[0] = 0; + LOWRES_density_perturb[0][0] = 0.; + LOWRES_density_perturb[0][1] = 0.; } } else{ if(user_params->PERTURB_ON_HIGH_RES) { - HIRES_density_perturb[C_INDEX(n_x,n_y,n_z)] *= dDdt_over_D*kvec[axis]*I/k_sq/(TOT_NUM_PIXELS+0.0); + // HIRES_density_perturb[C_INDEX(n_x,n_y,n_z)] *= dDdt_over_D*kvec[axis]*I/k_sq/(TOT_NUM_PIXELS+0.0); + // HIRES_density_perturb[C_INDEX(n_x,n_y,n_z)] *= dDdt_over_D*kvec[axis]/k_sq/(TOT_NUM_PIXELS+0.0); + // reinterpret_cast &>(HIRES_density_perturb[C_INDEX(n_x,n_y,n_z)]) *= std::complex(0., dDdt_over_D*kvec[axis]*I/k_sq/(TOT_NUM_PIXELS+0.0)); + reinterpret_cast &>(HIRES_density_perturb[C_INDEX(n_x,n_y,n_z)]) *= std::complex(0., dDdt_over_D*kvec[axis]/k_sq/(TOT_NUM_PIXELS+0.0)); } else { - LOWRES_density_perturb[HII_C_INDEX(n_x,n_y,n_z)] *= dDdt_over_D*kvec[axis]*I/k_sq/(HII_TOT_NUM_PIXELS+0.0); + // LOWRES_density_perturb[HII_C_INDEX(n_x,n_y,n_z)] *= dDdt_over_D*kvec[axis]*I/k_sq/(HII_TOT_NUM_PIXELS+0.0); + // LOWRES_density_perturb[HII_C_INDEX(n_x,n_y,n_z)] *= dDdt_over_D*kvec[axis]/k_sq/(HII_TOT_NUM_PIXELS+0.0); + // reinterpret_cast &>(LOWRES_density_perturb[HII_C_INDEX(n_x,n_y,n_z)]) *= std::complex(0., dDdt_over_D*kvec[axis]*I/k_sq/(HII_TOT_NUM_PIXELS+0.0)); + reinterpret_cast &>(LOWRES_density_perturb[HII_C_INDEX(n_x,n_y,n_z)]) *= std::complex(0., dDdt_over_D*kvec[axis]/k_sq/(HII_TOT_NUM_PIXELS+0.0)); } } } @@ -122,7 +137,7 @@ void compute_perturbed_velocities( // smooth the high resolution field ready for resampling // ALICE: RES=0 (dimension=DIM, midpoint=MIDDLE), filter_type=0 (real space top-hat filtering) if (user_params->DIM != user_params->HII_DIM) - filter_box(HIRES_density_perturb, 0, 0, L_FACTOR*user_params->BOX_LEN/(user_params->HII_DIM+0.0)); + filter_box(HIRES_density_perturb, 0, 0, L_FACTOR*user_params->BOX_LEN/(user_params->HII_DIM+0.0), 0.); dft_c2r_cube(user_params->USE_FFTW_WISDOM, user_params->DIM, D_PARA, user_params->N_THREADS, HIRES_density_perturb); @@ -169,18 +184,19 @@ void compute_perturbed_velocities( // ---------------------------------------------------------------------------------------------------------------------------- // #define R_INDEX(x,y,z)((unsigned long long)((z)+D_PARA*((y)+D*(x)))) -__device__ inline unsigned long long compute_R_INDEX(int i, int j, int k, int DIM, int D_PARA) { - return k + D_PARA * (j + DIM * i) +__device__ inline unsigned long long compute_R_INDEX(int i, int j, int k, int dim, long long d_para) { + return k + d_para * (j + dim * i); } // #define HII_R_INDEX(x,y,z)((unsigned long long)((z)+HII_D_PARA*((y)+HII_D*(x)))) -__device__ inline unsigned long long compute_HII_R_INDEX(int i, int j, int k, int HII_D_PARA, int HII_D) { - return k + HII_D_PARA * (j + HII_D * i) +__device__ inline unsigned long long compute_HII_R_INDEX(int i, int j, int k, int hii_d, long long hii_d_para) { + return k + hii_d_para * (j + hii_d * i); } +// Is const needed as well as __restrict__? __global__ void perturb_density_field_kernel( double *resampled_box, - const float* __restrict__ hires_density, // Is const needed as well as __restrict__? + const float* __restrict__ hires_density, const float* __restrict__ hires_vx, const float* __restrict__ hires_vy, const float* __restrict__ hires_vz, @@ -193,27 +209,29 @@ __global__ void perturb_density_field_kernel( const float* __restrict__ lowres_vx_2LPT, const float* __restrict__ lowres_vy_2LPT, const float* __restrict__ lowres_vz_2LPT, - int dimension, int DIM, int D_PARA, int MID_PARA, - int NON_CUBIC_FACTOR, float f_pixel_factor, float init_growth_factor, - bool PERTURB_ON_HIGH_RES, bool USE_2LPT, + int dimension, int DIM, long long d_para, long long hii_d, long long hii_d_para, + int non_cubic_factor, float f_pixel_factor, float init_growth_factor, + bool perturb_on_high_res, bool use_2lpt ) { unsigned long long idx = blockIdx.x * blockDim.x + threadIdx.x; // Get index of density cell - int i = idx / (D_PARA * DIM); - int j = (idx / D_PARA) % DIM; - int k = idx % D_PARA; + int i = idx / (d_para * DIM); + int j = (idx / d_para) % DIM; + int k = idx % d_para; - unsigned long long r_index = compute_R_INDEX(i, j, k); + unsigned long long r_index = compute_R_INDEX(i, j, k, DIM, d_para); // Map index to location in units of box size double xf = (i + 0.5) / DIM; double yf = (j + 0.5) / DIM; - double zf = (k + 0.5) / D_PARA; + double zf = (k + 0.5) / d_para; // Update locations - if (PERTURB_ON_HIGH_RES) { + unsigned long long HII_index; + + if (perturb_on_high_res) { xf += __ldg(&hires_vx[r_index]); yf += __ldg(&hires_vy[r_index]); zf += __ldg(&hires_vz[r_index]); @@ -222,17 +240,17 @@ __global__ void perturb_density_field_kernel( unsigned long long HII_i = (unsigned long long)(i / f_pixel_factor); unsigned long long HII_j = (unsigned long long)(j / f_pixel_factor); unsigned long long HII_k = (unsigned long long)(k / f_pixel_factor); - unsigned long long HII_index = compute_HII_R_INDEX(HII_i, HII_j, HII_k, HII_D_PARA, HII_D); + HII_index = compute_HII_R_INDEX(HII_i, HII_j, HII_k, hii_d, hii_d_para); // This is accessing HII_D and HII_D_PARA macros! xf += __ldg(&lowres_vx[HII_index]); yf += __ldg(&lowres_vy[HII_index]); zf += __ldg(&lowres_vz[HII_index]); } // 2LPT (add second order corrections) - if (USE_2LPT) { - if (PERTURB_ON_HIGH_RES) { + if (use_2lpt) { + if (perturb_on_high_res) { xf -= __ldg(&hires_vx_2LPT[r_index]); - yf -= __ldg(&hires_vy_2LP[r_index]); + yf -= __ldg(&hires_vy_2LPT[r_index]); zf -= __ldg(&hires_vz_2LPT[r_index]); } else { @@ -245,8 +263,8 @@ __global__ void perturb_density_field_kernel( // TODO: shared between threads? // Convert once to reduce overhead of multiple casts double dimension_double = (double)(dimension); - double dimension_factored_double = dimension_double * (double)(NON_CUBIC_FACTOR); - int dimension_factored = dimension * NON_CUBIC_FACTOR; + double dimension_factored_double = dimension_double * (double)(non_cubic_factor); + int dimension_factored = dimension * non_cubic_factor; // Scale coordinates back to grid size xf *= dimension_double; @@ -295,7 +313,7 @@ __global__ void perturb_density_field_kernel( if(zf < (double)(zi + 0.5)) { d_z = 1. - d_z; zi -= 1; - zi += (zi + (unsigned long long)(NON_CUBIC_FACTOR * dimension)) % (unsigned long long)(NON_CUBIC_FACTOR * dimension); + zi += (zi + (unsigned long long)(non_cubic_factor * dimension)) % (unsigned long long)(non_cubic_factor * dimension); } // The fractions of mass which will remain with perturbed cell float t_x = 1. - d_x; @@ -307,35 +325,35 @@ __global__ void perturb_density_field_kernel( // Takes into account the offset based on cell centre determined above int xp1 = (xi + 1) % dimension; int yp1 = (yi + 1) % dimension; - int zp1 = (zi + 1) % (unsigned long long)(NON_CUBIC_FACTOR * dimension); + int zp1 = (zi + 1) % (unsigned long long)(non_cubic_factor * dimension); double scaled_density = 1 + init_growth_factor * __ldg(&hires_density[r_index]); - if (PERTURB_ON_HIGH_RES) { + if (perturb_on_high_res) { // Redistribute the mass over the 8 neighbouring cells according to cloud in cell // Cell mass = (1 + init_growth_factor * orig_density) * (proportion of mass to distribute) - atomicAdd(&resampled_box[compute_R_INDEX(xi, yi, zi)], scaled_density * t_x * t_y * t_z); - atomicAdd(&resampled_box[compute_R_INDEX(xp1, yi, zi)], scaled_density * d_x * t_y * t_z); - atomicAdd(&resampled_box[compute_R_INDEX(xi, yp1, zi)], scaled_density * t_x * d_y * t_z); - atomicAdd(&resampled_box[compute_R_INDEX(xp1, yp1, zi)], scaled_density * d_x * d_y * t_z); - atomicAdd(&resampled_box[compute_R_INDEX(xi, yi, zp1)], scaled_density * t_x * t_y * d_z); - atomicAdd(&resampled_box[compute_R_INDEX(xp1, yi, zp1)], scaled_density * d_x * t_y * d_z); - atomicAdd(&resampled_box[compute_R_INDEX(xi, yp1, zp1)], scaled_density * t_x * d_y * d_z); - atomicAdd(&resampled_box[compute_R_INDEX(xp1, yp1, zp1)], scaled_density * d_x * d_y * d_z); + atomicAdd(&resampled_box[compute_R_INDEX(xi, yi, zi, DIM, d_para)], scaled_density * t_x * t_y * t_z); + atomicAdd(&resampled_box[compute_R_INDEX(xp1, yi, zi, DIM, d_para)], scaled_density * d_x * t_y * t_z); + atomicAdd(&resampled_box[compute_R_INDEX(xi, yp1, zi, DIM, d_para)], scaled_density * t_x * d_y * t_z); + atomicAdd(&resampled_box[compute_R_INDEX(xp1, yp1, zi, DIM, d_para)], scaled_density * d_x * d_y * t_z); + atomicAdd(&resampled_box[compute_R_INDEX(xi, yi, zp1, DIM, d_para)], scaled_density * t_x * t_y * d_z); + atomicAdd(&resampled_box[compute_R_INDEX(xp1, yi, zp1, DIM, d_para)], scaled_density * d_x * t_y * d_z); + atomicAdd(&resampled_box[compute_R_INDEX(xi, yp1, zp1, DIM, d_para)], scaled_density * t_x * d_y * d_z); + atomicAdd(&resampled_box[compute_R_INDEX(xp1, yp1, zp1, DIM, d_para)], scaled_density * d_x * d_y * d_z); } else { - atomicAdd(&resampled_box[compute_HII_R_INDEX(xi, yi, zi)], scaled_density * t_x * t_y * t_z); - atomicAdd(&resampled_box[compute_HII_R_INDEX(xp1, yi, zi)], scaled_density * d_x * t_y * t_z); - atomicAdd(&resampled_box[compute_HII_R_INDEX(xi, yp1, zi)], scaled_density * t_x * d_y * t_z); - atomicAdd(&resampled_box[compute_HII_R_INDEX(xp1, yp1, zi)], scaled_density * d_x * d_y * t_z); - atomicAdd(&resampled_box[compute_HII_R_INDEX(xi, yi, zp1)], scaled_density * t_x * t_y * d_z); - atomicAdd(&resampled_box[compute_HII_R_INDEX(xp1, yi, zp1)], scaled_density * d_x * t_y * d_z); - atomicAdd(&resampled_box[compute_HII_R_INDEX(xi, yp1, zp1)], scaled_density * t_x * d_y * d_z); - atomicAdd(&resampled_box[compute_HII_R_INDEX(xp1, yp1, zp1)], scaled_density * d_x * d_y * d_z); + atomicAdd(&resampled_box[compute_HII_R_INDEX(xi, yi, zi, hii_d, hii_d_para)], scaled_density * t_x * t_y * t_z); + atomicAdd(&resampled_box[compute_HII_R_INDEX(xp1, yi, zi, hii_d, hii_d_para)], scaled_density * d_x * t_y * t_z); + atomicAdd(&resampled_box[compute_HII_R_INDEX(xi, yp1, zi, hii_d, hii_d_para)], scaled_density * t_x * d_y * t_z); + atomicAdd(&resampled_box[compute_HII_R_INDEX(xp1, yp1, zi, hii_d, hii_d_para)], scaled_density * d_x * d_y * t_z); + atomicAdd(&resampled_box[compute_HII_R_INDEX(xi, yi, zp1, hii_d, hii_d_para)], scaled_density * t_x * t_y * d_z); + atomicAdd(&resampled_box[compute_HII_R_INDEX(xp1, yi, zp1, hii_d, hii_d_para)], scaled_density * d_x * t_y * d_z); + atomicAdd(&resampled_box[compute_HII_R_INDEX(xi, yp1, zp1, hii_d, hii_d_para)], scaled_density * t_x * d_y * d_z); + atomicAdd(&resampled_box[compute_HII_R_INDEX(xp1, yp1, zp1, hii_d, hii_d_para)], scaled_density * d_x * d_y * d_z); } } -// ------------------------------------------------------------------------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------------------------------ int ComputePerturbField_gpu( float redshift, UserParams *user_params, CosmoParams *cosmo_params, @@ -406,7 +424,7 @@ int ComputePerturbField_gpu( HIRES_density_perturb_saved = (fftwf_complex *) fftwf_malloc(sizeof(fftwf_complex)*KSPACE_NUM_PIXELS); } - double *resampled_box; + // double *resampled_box; //TODO: debugSummarizeIC is bugged when not all the fields are in memory // debugSummarizeIC(boxes, user_params->HII_DIM, user_params->DIM, user_params->NON_CUBIC_FACTOR); @@ -525,82 +543,92 @@ int ComputePerturbField_gpu( // ---------------------------------------------------------------------------------------------------------------------------- // Box shapes from outputs.py and convenience macros + size_t size; + unsigned long long num_pixels; if(user_params->PERTURB_ON_HIGH_RES) { - int num_pixels = TOT_NUM_PIXELS; - size_t size = TOT_NUM_PIXELS * sizeof(double); + num_pixels = TOT_NUM_PIXELS; + size = TOT_NUM_PIXELS * sizeof(double); } else { - int num_pixels = HII_TOT_NUM_PIXELS; - size_t size = HII_TOT_NUM_PIXELS * sizeof(double); + num_pixels = HII_TOT_NUM_PIXELS; + size = HII_TOT_NUM_PIXELS * sizeof(double); } + // Allocat host memory for output box + double* resampled_box = (double*)malloc(size); + // Allocate device memory for output box double* d_box; cudaMalloc(&d_box, size); - cudaMemset(d_box, 0, sizeof(double) * size); + cudaMemset(d_box, 0, size); // Allocate device memory for density field float* hires_density; - cudaMalloc(&hires_density); - cudaMemcpy(hires_density, boxes->hires_density, size, cudaMemcpyHostToDevice); + cudaMalloc(&hires_density, (HII_TOT_NUM_PIXELS * sizeof(double))); + cudaMemcpy(hires_density, boxes->hires_density, (HII_TOT_NUM_PIXELS * sizeof(double)), cudaMemcpyHostToDevice); // Allocate device memory and copy arrays to device as per user_params + float* hires_vx; + float* hires_vy; + float* hires_vz; + float* lowres_vx; + float* lowres_vy; + float* lowres_vz; + float* hires_vx_2LPT; + float* hires_vy_2LPT; + float* hires_vz_2LPT; + float* lowres_vx_2LPT; + float* lowres_vy_2LPT; + float* lowres_vz_2LPT; + if (user_params->PERTURB_ON_HIGH_RES) { - float* hires_vx; - float* hires_vy; - float* hires_vz; - cudaMalloc(&hires_vx); - cudaMalloc(&hires_vy); - cudaMalloc(&hires_vz); + cudaMalloc(&hires_vx, size); // size isn't neccessarily correct, but it will be correct for whichever branch is run. + cudaMalloc(&hires_vy, size); + cudaMalloc(&hires_vz, size); cudaMemcpy(hires_vx, boxes->hires_vx, size, cudaMemcpyHostToDevice); cudaMemcpy(hires_vy, boxes->hires_vy, size, cudaMemcpyHostToDevice); cudaMemcpy(hires_vz, boxes->hires_vz, size, cudaMemcpyHostToDevice); } else { - float* lowres_vx; - float* lowres_vy; - float* lowres_vz; - cudaMalloc(&lowres_vx); - cudaMalloc(&lowres_vy); - cudaMalloc(&lowres_vz); + cudaMalloc(&lowres_vx, size); + cudaMalloc(&lowres_vy, size); + cudaMalloc(&lowres_vz, size); cudaMemcpy(lowres_vx, boxes->lowres_vx, size, cudaMemcpyHostToDevice); cudaMemcpy(lowres_vy, boxes->lowres_vy, size, cudaMemcpyHostToDevice); cudaMemcpy(lowres_vz, boxes->lowres_vz, size, cudaMemcpyHostToDevice); } if (user_params->USE_2LPT) { if (user_params->PERTURB_ON_HIGH_RES) { - float* hires_vx_2LPT; - float* hires_vy_2LPT; - float* hires_vz_2LPT; - cudaMalloc(&hires_vx_2LPT); - cudaMalloc(&hires_vy_2LPT); - cudaMalloc(&hires_vz_2LPT); + cudaMalloc(&hires_vx_2LPT, size); + cudaMalloc(&hires_vy_2LPT, size); + cudaMalloc(&hires_vz_2LPT, size); cudaMemcpy(hires_vx_2LPT, boxes->hires_vx_2LPT, size, cudaMemcpyHostToDevice); cudaMemcpy(hires_vy_2LPT, boxes->hires_vy_2LPT, size, cudaMemcpyHostToDevice); cudaMemcpy(hires_vz_2LPT, boxes->hires_vz_2LPT, size, cudaMemcpyHostToDevice); } else { - float* lowres_vx_2LPT; - float* lowres_vy_2LPT; - float* lowres_vz_2LPT; - cudaMalloc(&lowres_vx_2LPT); - cudaMalloc(&lowres_vy_2LPT); - cudaMalloc(&lowres_vz_2LPT); + cudaMalloc(&lowres_vx_2LPT, size); + cudaMalloc(&lowres_vy_2LPT, size); + cudaMalloc(&lowres_vz_2LPT, size); cudaMemcpy(lowres_vx_2LPT, boxes->lowres_vx_2LPT, size, cudaMemcpyHostToDevice); cudaMemcpy(lowres_vy_2LPT, boxes->lowres_vy_2LPT, size, cudaMemcpyHostToDevice); cudaMemcpy(lowres_vz_2LPT, boxes->lowres_vz_2LPT, size, cudaMemcpyHostToDevice); } } + // Can't seem to pass macro straight to kernel. + long long d_para = D_PARA; + long long hii_d = HII_D; + long long hii_d_para = HII_D_PARA; + // Invoke kernel int threadsPerBlock = 256; int numBlocks = (num_pixels + threadsPerBlock - 1) / threadsPerBlock; perturb_density_field_kernel<<>>( d_box, hires_density, hires_vx, hires_vy, hires_vz, lowres_vx, lowres_vy, lowres_vz, - hires_vx_2LPT, hires_vy_2LPT, hires_vz_2LPT, lowres_vx_2LPT, lowres_vy_2LPT, lowres_vz_2LPT - dimension, user_params->DIM, D_PARA, MID_PARA, user_params->NON_CUBIC_FACTOR, - f_pixel_factor, init_growth_factor, user_params->PERTURB_ON_HIGH_RES, user_params->USE_2LPT, - ); + hires_vx_2LPT, hires_vy_2LPT, hires_vz_2LPT, lowres_vx_2LPT, lowres_vy_2LPT, lowres_vz_2LPT, + dimension, user_params->DIM, d_para, hii_d, hii_d_para, user_params->NON_CUBIC_FACTOR, + f_pixel_factor, init_growth_factor, user_params->PERTURB_ON_HIGH_RES, user_params->USE_2LPT); // Only use during development! cudaError_t err = cudaDeviceSynchronize(); @@ -612,8 +640,6 @@ int ComputePerturbField_gpu( } // Copy results from device to host - double *resampled_box; - // resampled_box = (double *)calloc(num_pixels, sizeof(double)); // is this needed? cudaMemcpy(resampled_box, d_box, size, cudaMemcpyDeviceToHost); // Deallocate device memory @@ -739,7 +765,7 @@ int ComputePerturbField_gpu( // Now filter the box // ALICE: RES=0 (dimension=DIM, midpoint=MIDDLE), filter_type=0 (real space top-hat filtering) if (user_params->DIM != user_params->HII_DIM) { - filter_box(HIRES_density_perturb, 0, 0, L_FACTOR*user_params->BOX_LEN/(user_params->HII_DIM+0.0)); + filter_box(HIRES_density_perturb, 0, 0, L_FACTOR*user_params->BOX_LEN/(user_params->HII_DIM+0.0), 0.); } // FFT back to real space @@ -798,7 +824,7 @@ int ComputePerturbField_gpu( // smooth the field // ALICE: RES=1 (dimension=HII_DIM, midpoint=HII_MIDDLE), filter_type=2 (Gaussian filtering) if (!global_params.EVOLVE_DENSITY_LINEARLY && global_params.SMOOTH_EVOLVED_DENSITY_FIELD){ - filter_box(LOWRES_density_perturb, 1, 2, global_params.R_smooth_density*user_params->BOX_LEN/(float)user_params->HII_DIM); + filter_box(LOWRES_density_perturb, 1, 2, global_params.R_smooth_density*user_params->BOX_LEN/(float)user_params->HII_DIM, 0.); } LOG_SUPER_DEBUG("LOWRES_density_perturb after smoothing: "); diff --git a/src/py21cmfast/src/PerturbField.h b/src/py21cmfast/src/PerturbField.h index 718906ecd..bde8020b2 100644 --- a/src/py21cmfast/src/PerturbField.h +++ b/src/py21cmfast/src/PerturbField.h @@ -1,6 +1,8 @@ #ifndef _PERTURBFIELD_H #define _PERTURBFIELD_H +#include + #include "InputParameters.h" #include "OutputStructs.h" From b939e36678d839da01b281e2e35ae785750b07e2 Mon Sep 17 00:00:00 2001 From: alserene Date: Fri, 25 Oct 2024 12:28:41 +1100 Subject: [PATCH 012/145] Add PerturbField.o to build file. --- build_cffi.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/build_cffi.py b/build_cffi.py index 4767a518f..61c953fe4 100755 --- a/build_cffi.py +++ b/build_cffi.py @@ -29,7 +29,11 @@ # compiled cuda code # extra_objects = [os.path.join(CLOC, "hello_world.o")] -extra_objects = [os.path.join(CLOC, "hello_world.o"), os.path.join(CLOC, "filtering.o")] +extra_objects = [ + os.path.join(CLOC, "hello_world.o"), + os.path.join(CLOC, "filtering.o"), + os.path.join(CLOC, "PerturbField.o"), + ] extra_link_args = ["-lcudart"] # Set the C-code logging level. From 3943d2d99c932d8ac99484963e166a453dd24d55 Mon Sep 17 00:00:00 2001 From: alserene Date: Fri, 25 Oct 2024 12:29:10 +1100 Subject: [PATCH 013/145] Misc changes during debugging. --- src/py21cmfast/src/PerturbField.c | 9 +++-- src/py21cmfast/src/PerturbField.cu | 61 ++++++++++++++++++++++++------ 2 files changed, 54 insertions(+), 16 deletions(-) diff --git a/src/py21cmfast/src/PerturbField.c b/src/py21cmfast/src/PerturbField.c index b4886b2da..625c5b7ac 100644 --- a/src/py21cmfast/src/PerturbField.c +++ b/src/py21cmfast/src/PerturbField.c @@ -347,7 +347,7 @@ int ComputePerturbField_cpu( // ************ END INITIALIZATION **************************** // - // Perturbing the density field required adding over multiple cells. Store intermediate result as a double to avoid rounding errors + // Perturbing the density field requires adding over multiple cells. Store intermediate result as a double to avoid rounding errors if(user_params->PERTURB_ON_HIGH_RES) { resampled_box = (double *)calloc(TOT_NUM_PIXELS,sizeof(double)); } @@ -775,9 +775,10 @@ int ComputePerturbField( float redshift, UserParams *user_params, CosmoParams *cosmo_params, InitialConditions *boxes, PerturbedField *perturbed_field ){ - if (1) { - ComputePerturbField_gpu(redshift, user_params, cosmo_params, boxes, perturbed_field) + // int result; + if (0) { + return ComputePerturbField_gpu(redshift, user_params, cosmo_params, boxes, perturbed_field); } else { - ComputePerturbField_cpu(redshift, user_params, cosmo_params, boxes, perturbed_field) + return ComputePerturbField_cpu(redshift, user_params, cosmo_params, boxes, perturbed_field); } } diff --git a/src/py21cmfast/src/PerturbField.cu b/src/py21cmfast/src/PerturbField.cu index 4476f3ae8..c2dc5388f 100644 --- a/src/py21cmfast/src/PerturbField.cu +++ b/src/py21cmfast/src/PerturbField.cu @@ -109,7 +109,7 @@ void compute_perturbed_velocities( LOWRES_density_perturb[0][1] = 0.; } } - else{ + else { if(user_params->PERTURB_ON_HIGH_RES) { // HIRES_density_perturb[C_INDEX(n_x,n_y,n_z)] *= dDdt_over_D*kvec[axis]*I/k_sq/(TOT_NUM_PIXELS+0.0); // HIRES_density_perturb[C_INDEX(n_x,n_y,n_z)] *= dDdt_over_D*kvec[axis]/k_sq/(TOT_NUM_PIXELS+0.0); @@ -420,8 +420,8 @@ int ComputePerturbField_gpu( LOWRES_density_perturb_saved = (fftwf_complex *) fftwf_malloc(sizeof(fftwf_complex)*HII_KSPACE_NUM_PIXELS); if(user_params->PERTURB_ON_HIGH_RES) { - HIRES_density_perturb = (fftwf_complex *) fftwf_malloc(sizeof(fftwf_complex)*KSPACE_NUM_PIXELS); - HIRES_density_perturb_saved = (fftwf_complex *) fftwf_malloc(sizeof(fftwf_complex)*KSPACE_NUM_PIXELS); + HIRES_density_perturb = (fftwf_complex *) fftwf_malloc(sizeof(fftwf_complex)*KSPACE_NUM_PIXELS); // D * D * NCF * D/2 + HIRES_density_perturb_saved = (fftwf_complex *) fftwf_malloc(sizeof(fftwf_complex)*KSPACE_NUM_PIXELS); // sizeof(fftwf_complex) = 2 * sizeof(float)? } // double *resampled_box; @@ -439,10 +439,17 @@ int ComputePerturbField_gpu( #pragma omp parallel shared(growth_factor,boxes,LOWRES_density_perturb,HIRES_density_perturb,dimension) private(i,j,k) num_threads(user_params->N_THREADS) { #pragma omp for - for (i=0; iNON_CUBIC_FACTOR*dimension); k++){ + for (i=0; iNON_CUBIC_FACTOR*dimension); k++){ // NCF * D + // max (i, j, k) = (D, D, NCF * D) if(user_params->PERTURB_ON_HIGH_RES) { + // HIRES_density_perturb is of type fftwf_complex + // HIRES_density_perturb has size D * D * NCF * D/2 + + // hires_density is of type float + // hires_density has size D * D * NCF * D + *((float *)HIRES_density_perturb + R_FFT_INDEX(i,j,k)) = growth_factor*boxes->hires_density[R_INDEX(i,j,k)]; } else { @@ -557,15 +564,28 @@ int ComputePerturbField_gpu( // Allocat host memory for output box double* resampled_box = (double*)malloc(size); - // Allocate device memory for output box + // Allocate device memory for output box and set to 0. double* d_box; cudaMalloc(&d_box, size); - cudaMemset(d_box, 0, size); + cudaMemset(d_box, 0, size); // fills size bytes with byte=0 + + err = cudaGetLastError(); + if (err != cudaSuccess) { + LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); + Throw(CudaError); + } // Allocate device memory for density field float* hires_density; - cudaMalloc(&hires_density, (HII_TOT_NUM_PIXELS * sizeof(double))); - cudaMemcpy(hires_density, boxes->hires_density, (HII_TOT_NUM_PIXELS * sizeof(double)), cudaMemcpyHostToDevice); + // cudaMalloc(&hires_density, (HII_TOT_NUM_PIXELS * sizeof(double))); // from outputs.py & indexing.h + cudaMalloc(&hires_density, (TOT_NUM_PIXELS * sizeof(double))); // from outputs.py & indexing.h + cudaMemcpy(hires_density, boxes->hires_density, (TOT_NUM_PIXELS * sizeof(double)), cudaMemcpyHostToDevice); + + err = cudaGetLastError(); + if (err != cudaSuccess) { + LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); + Throw(CudaError); + } // Allocate device memory and copy arrays to device as per user_params float* hires_vx; @@ -616,7 +636,13 @@ int ComputePerturbField_gpu( } } - // Can't seem to pass macro straight to kernel. + err = cudaGetLastError(); + if (err != cudaSuccess) { + LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); + Throw(CudaError); + } + + // Seemingly can't pass macro straight to kernel long long d_para = D_PARA; long long hii_d = HII_D; long long hii_d_para = HII_D_PARA; @@ -640,7 +666,12 @@ int ComputePerturbField_gpu( } // Copy results from device to host - cudaMemcpy(resampled_box, d_box, size, cudaMemcpyDeviceToHost); + // cudaMemcpy(resampled_box, d_box, size, cudaMemcpyDeviceToHost); + cudaError_t err = cudaMemcpy(resampled_box, d_box, size, cudaMemcpyDeviceToHost); + if (err != cudaSuccess) { + LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); + Throw(CudaError); + } // Deallocate device memory cudaFree(d_box); @@ -669,6 +700,12 @@ int ComputePerturbField_gpu( } } + err = cudaGetLastError(); + if (err != cudaSuccess) { + LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); + Throw(CudaError); + } + // ---------------------------------------------------------------------------------------------------------------------------- LOG_SUPER_DEBUG("resampled_box: "); From 51eab94d28420b3cdd4e1f797e3967e21095349d Mon Sep 17 00:00:00 2001 From: alserene Date: Sat, 26 Oct 2024 10:59:51 +1100 Subject: [PATCH 014/145] Debugging attempts --- src/py21cmfast/src/PerturbField.c | 8 +- src/py21cmfast/src/PerturbField.cu | 404 ++++++++++++++++------------- 2 files changed, 230 insertions(+), 182 deletions(-) diff --git a/src/py21cmfast/src/PerturbField.c b/src/py21cmfast/src/PerturbField.c index 625c5b7ac..461a435f9 100644 --- a/src/py21cmfast/src/PerturbField.c +++ b/src/py21cmfast/src/PerturbField.c @@ -499,6 +499,12 @@ int ComputePerturbField_cpu( } } + // LOG_DEBUG("resampled_box[:50] = "); + // for (int element = 0; element < 50; element++) { + // LOG_DEBUG("%.4e ", resampled_box[element]); + // } + // LOG_DEBUG("\n"); + LOG_SUPER_DEBUG("resampled_box: "); debugSummarizeBoxDouble(resampled_box, dimension, user_params->NON_CUBIC_FACTOR, " "); @@ -776,7 +782,7 @@ int ComputePerturbField( InitialConditions *boxes, PerturbedField *perturbed_field ){ // int result; - if (0) { + if (1) { return ComputePerturbField_gpu(redshift, user_params, cosmo_params, boxes, perturbed_field); } else { return ComputePerturbField_cpu(redshift, user_params, cosmo_params, boxes, perturbed_field); diff --git a/src/py21cmfast/src/PerturbField.cu b/src/py21cmfast/src/PerturbField.cu index c2dc5388f..9ba491fde 100644 --- a/src/py21cmfast/src/PerturbField.cu +++ b/src/py21cmfast/src/PerturbField.cu @@ -196,160 +196,191 @@ __device__ inline unsigned long long compute_HII_R_INDEX(int i, int j, int k, in // Is const needed as well as __restrict__? __global__ void perturb_density_field_kernel( double *resampled_box, - const float* __restrict__ hires_density, - const float* __restrict__ hires_vx, - const float* __restrict__ hires_vy, - const float* __restrict__ hires_vz, - const float* __restrict__ lowres_vx, - const float* __restrict__ lowres_vy, - const float* __restrict__ lowres_vz, - const float* __restrict__ hires_vx_2LPT, - const float* __restrict__ hires_vy_2LPT, - const float* __restrict__ hires_vz_2LPT, - const float* __restrict__ lowres_vx_2LPT, - const float* __restrict__ lowres_vy_2LPT, - const float* __restrict__ lowres_vz_2LPT, - int dimension, int DIM, long long d_para, long long hii_d, long long hii_d_para, - int non_cubic_factor, float f_pixel_factor, float init_growth_factor, + // const float* __restrict__ hires_density, + // const float* __restrict__ hires_vx, + // const float* __restrict__ hires_vy, + // const float* __restrict__ hires_vz, + // const float* __restrict__ lowres_vx, + // const float* __restrict__ lowres_vy, + // const float* __restrict__ lowres_vz, + // const float* __restrict__ hires_vx_2LPT, + // const float* __restrict__ hires_vy_2LPT, + // const float* __restrict__ hires_vz_2LPT, + // const float* __restrict__ lowres_vx_2LPT, + // const float* __restrict__ lowres_vy_2LPT, + // const float* __restrict__ lowres_vz_2LPT, + float* hires_density, + float* hires_vx, + float* hires_vy, + float* hires_vz, + float* lowres_vx, + float* lowres_vy, + float* lowres_vz, + float* hires_vx_2LPT, + float* hires_vy_2LPT, + float* hires_vz_2LPT, + float* lowres_vx_2LPT, + float* lowres_vy_2LPT, + float* lowres_vz_2LPT, + int dimension, int DIM, + long long d_para, long long hii_d, long long hii_d_para, + int non_cubic_factor, + float f_pixel_factor, float init_growth_factor, bool perturb_on_high_res, bool use_2lpt ) { unsigned long long idx = blockIdx.x * blockDim.x + threadIdx.x; - // Get index of density cell - int i = idx / (d_para * DIM); - int j = (idx / d_para) % DIM; - int k = idx % d_para; - - unsigned long long r_index = compute_R_INDEX(i, j, k, DIM, d_para); - - // Map index to location in units of box size - double xf = (i + 0.5) / DIM; - double yf = (j + 0.5) / DIM; - double zf = (k + 0.5) / d_para; - - // Update locations - unsigned long long HII_index; - - if (perturb_on_high_res) { - xf += __ldg(&hires_vx[r_index]); - yf += __ldg(&hires_vy[r_index]); - zf += __ldg(&hires_vz[r_index]); - } - else { - unsigned long long HII_i = (unsigned long long)(i / f_pixel_factor); - unsigned long long HII_j = (unsigned long long)(j / f_pixel_factor); - unsigned long long HII_k = (unsigned long long)(k / f_pixel_factor); - HII_index = compute_HII_R_INDEX(HII_i, HII_j, HII_k, hii_d, hii_d_para); // This is accessing HII_D and HII_D_PARA macros! - xf += __ldg(&lowres_vx[HII_index]); - yf += __ldg(&lowres_vy[HII_index]); - zf += __ldg(&lowres_vz[HII_index]); - } + if (idx < DIM * DIM * d_para) { + + // Get index of density cell + int i = idx / (d_para * DIM); + int j = (idx / d_para) % DIM; + int k = idx % d_para; + + unsigned long long r_index = compute_R_INDEX(i, j, k, DIM, d_para); + + // Map index to location in units of box size + double xf = (i + 0.5) / DIM; + double yf = (j + 0.5) / DIM; + double zf = (k + 0.5) / d_para; + + // Update locations + unsigned long long HII_index; - // 2LPT (add second order corrections) - if (use_2lpt) { if (perturb_on_high_res) { - xf -= __ldg(&hires_vx_2LPT[r_index]); - yf -= __ldg(&hires_vy_2LPT[r_index]); - zf -= __ldg(&hires_vz_2LPT[r_index]); + // xf += __ldg(&hires_vx[r_index]); + // yf += __ldg(&hires_vy[r_index]); + // zf += __ldg(&hires_vz[r_index]); + xf += hires_vx[r_index]; + yf += hires_vy[r_index]; + zf += hires_vz[r_index]; } else { - xf -= __ldg(&lowres_vx_2LPT[HII_index]); - yf -= __ldg(&lowres_vy_2LPT[HII_index]); - zf -= __ldg(&lowres_vz_2LPT[HII_index]); + unsigned long long HII_i = (unsigned long long)(i / f_pixel_factor); + unsigned long long HII_j = (unsigned long long)(j / f_pixel_factor); + unsigned long long HII_k = (unsigned long long)(k / f_pixel_factor); + HII_index = compute_HII_R_INDEX(HII_i, HII_j, HII_k, hii_d, hii_d_para); // This is accessing HII_D and HII_D_PARA macros! + // xf += __ldg(&lowres_vx[HII_index]); + // yf += __ldg(&lowres_vy[HII_index]); + // zf += __ldg(&lowres_vz[HII_index]); + xf += lowres_vx[HII_index]; + yf += lowres_vy[HII_index]; + zf += lowres_vz[HII_index]; } - } - // TODO: shared between threads? - // Convert once to reduce overhead of multiple casts - double dimension_double = (double)(dimension); - double dimension_factored_double = dimension_double * (double)(non_cubic_factor); - int dimension_factored = dimension * non_cubic_factor; - - // Scale coordinates back to grid size - xf *= dimension_double; - yf *= dimension_double; - zf *= dimension_factored_double; - - // Wrap coordinates to keep them within valid boundaries - xf = fmod(fmod(xf, dimension_double) + dimension_double, dimension_double); - yf = fmod(fmod(yf, dimension_double) + dimension_double, dimension_double); - zf = fmod(fmod(zf, dimension_factored_double) + dimension_factored_double, dimension_factored_double); - - // FROM NVIDIA DOCS: - // __device__ doublenearbyint(double x) // Round the input argument to the nearest integer. - // There are SO many double-to-int conversion intrinsics. How to know if should use any? - - // Get integer values for indices from double precision values - int xi = xf; - int yi = yf; - int zi = zf; - - // Wrap index coordinates to ensure no out-of-bounds array access will be attempted - xi = (xi % dimension + dimension) % dimension; - yi = (yi % dimension + dimension) % dimension; - zi = (zi % dimension_factored + dimension_factored) % dimension_factored; - - // Determine the fraction of the perturbed cell which overlaps with the 8 nearest grid cells, - // based on the grid cell which contains the centre of the perturbed cell - float d_x = fabs(xf - (double)(xi + 0.5)); // Absolute distances from grid cell centre to perturbed cell centre - float d_y = fabs(yf - (double)(yi + 0.5)); // (also) The fractions of mass which will be moved to neighbouring cells - float d_z = fabs(zf - (double)(zi + 0.5)); - - // 8 neighbour cells-of-interest will be shifted left/down/behind if perturbed midpoint is in left/bottom/back corner of cell. - if (xf < (double)(xi + 0.5)) { - // If perturbed cell centre is less than the mid-point then update fraction - // of mass in the cell and determine the cell centre of neighbour to be the - // lowest grid point index - d_x = 1. - d_x; - xi -= 1; - xi += (xi + dimension) % dimension; // Only this critera is possible as iterate back by one (we cannot exceed DIM) - } - if(yf < (double)(yi + 0.5)) { - d_y = 1. - d_y; - yi -= 1; - yi += (yi + dimension) % dimension; - } - if(zf < (double)(zi + 0.5)) { - d_z = 1. - d_z; - zi -= 1; - zi += (zi + (unsigned long long)(non_cubic_factor * dimension)) % (unsigned long long)(non_cubic_factor * dimension); - } - // The fractions of mass which will remain with perturbed cell - float t_x = 1. - d_x; - float t_y = 1. - d_y; - float t_z = 1. - d_z; - - // Determine the grid coordinates of the 8 neighbouring cells. - // Neighbours will be in positive direction; front/right/above cells (-> 2x2 cube, with perturbed cell bottom/left/back) - // Takes into account the offset based on cell centre determined above - int xp1 = (xi + 1) % dimension; - int yp1 = (yi + 1) % dimension; - int zp1 = (zi + 1) % (unsigned long long)(non_cubic_factor * dimension); - - double scaled_density = 1 + init_growth_factor * __ldg(&hires_density[r_index]); - - if (perturb_on_high_res) { - // Redistribute the mass over the 8 neighbouring cells according to cloud in cell - // Cell mass = (1 + init_growth_factor * orig_density) * (proportion of mass to distribute) - atomicAdd(&resampled_box[compute_R_INDEX(xi, yi, zi, DIM, d_para)], scaled_density * t_x * t_y * t_z); - atomicAdd(&resampled_box[compute_R_INDEX(xp1, yi, zi, DIM, d_para)], scaled_density * d_x * t_y * t_z); - atomicAdd(&resampled_box[compute_R_INDEX(xi, yp1, zi, DIM, d_para)], scaled_density * t_x * d_y * t_z); - atomicAdd(&resampled_box[compute_R_INDEX(xp1, yp1, zi, DIM, d_para)], scaled_density * d_x * d_y * t_z); - atomicAdd(&resampled_box[compute_R_INDEX(xi, yi, zp1, DIM, d_para)], scaled_density * t_x * t_y * d_z); - atomicAdd(&resampled_box[compute_R_INDEX(xp1, yi, zp1, DIM, d_para)], scaled_density * d_x * t_y * d_z); - atomicAdd(&resampled_box[compute_R_INDEX(xi, yp1, zp1, DIM, d_para)], scaled_density * t_x * d_y * d_z); - atomicAdd(&resampled_box[compute_R_INDEX(xp1, yp1, zp1, DIM, d_para)], scaled_density * d_x * d_y * d_z); - } - else { - atomicAdd(&resampled_box[compute_HII_R_INDEX(xi, yi, zi, hii_d, hii_d_para)], scaled_density * t_x * t_y * t_z); - atomicAdd(&resampled_box[compute_HII_R_INDEX(xp1, yi, zi, hii_d, hii_d_para)], scaled_density * d_x * t_y * t_z); - atomicAdd(&resampled_box[compute_HII_R_INDEX(xi, yp1, zi, hii_d, hii_d_para)], scaled_density * t_x * d_y * t_z); - atomicAdd(&resampled_box[compute_HII_R_INDEX(xp1, yp1, zi, hii_d, hii_d_para)], scaled_density * d_x * d_y * t_z); - atomicAdd(&resampled_box[compute_HII_R_INDEX(xi, yi, zp1, hii_d, hii_d_para)], scaled_density * t_x * t_y * d_z); - atomicAdd(&resampled_box[compute_HII_R_INDEX(xp1, yi, zp1, hii_d, hii_d_para)], scaled_density * d_x * t_y * d_z); - atomicAdd(&resampled_box[compute_HII_R_INDEX(xi, yp1, zp1, hii_d, hii_d_para)], scaled_density * t_x * d_y * d_z); - atomicAdd(&resampled_box[compute_HII_R_INDEX(xp1, yp1, zp1, hii_d, hii_d_para)], scaled_density * d_x * d_y * d_z); + // 2LPT (add second order corrections) + if (use_2lpt) { + if (perturb_on_high_res) { + // xf -= __ldg(&hires_vx_2LPT[r_index]); + // yf -= __ldg(&hires_vy_2LPT[r_index]); + // zf -= __ldg(&hires_vz_2LPT[r_index]); + xf -= hires_vx_2LPT[r_index]; + yf -= hires_vy_2LPT[r_index]; + zf -= hires_vz_2LPT[r_index]; + } + else { + // xf -= __ldg(&lowres_vx_2LPT[HII_index]); + // yf -= __ldg(&lowres_vy_2LPT[HII_index]); + // zf -= __ldg(&lowres_vz_2LPT[HII_index]); + xf -= lowres_vx_2LPT[HII_index]; + yf -= lowres_vy_2LPT[HII_index]; + zf -= lowres_vz_2LPT[HII_index]; + } + } + + // TODO: shared between threads? + // Convert once to reduce overhead of multiple casts + double dimension_double = (double)(dimension); + double dimension_factored_double = dimension_double * (double)(non_cubic_factor); + int dimension_factored = dimension * non_cubic_factor; + + // Scale coordinates back to grid size + xf *= dimension_double; + yf *= dimension_double; + zf *= dimension_factored_double; + + // Wrap coordinates to keep them within valid boundaries + xf = fmod(fmod(xf, dimension_double) + dimension_double, dimension_double); + yf = fmod(fmod(yf, dimension_double) + dimension_double, dimension_double); + zf = fmod(fmod(zf, dimension_factored_double) + dimension_factored_double, dimension_factored_double); + + // FROM NVIDIA DOCS: + // __device__ doublenearbyint(double x) // Round the input argument to the nearest integer. + // There are SO many double-to-int conversion intrinsics. How to know if should use any? + + // Get integer values for indices from double precision values + int xi = xf; + int yi = yf; + int zi = zf; + + // Wrap index coordinates to ensure no out-of-bounds array access will be attempted + xi = (xi % dimension + dimension) % dimension; + yi = (yi % dimension + dimension) % dimension; + zi = (zi % dimension_factored + dimension_factored) % dimension_factored; + + // Determine the fraction of the perturbed cell which overlaps with the 8 nearest grid cells, + // based on the grid cell which contains the centre of the perturbed cell + float d_x = fabs(xf - (double)(xi + 0.5)); // Absolute distances from grid cell centre to perturbed cell centre + float d_y = fabs(yf - (double)(yi + 0.5)); // (also) The fractions of mass which will be moved to neighbouring cells + float d_z = fabs(zf - (double)(zi + 0.5)); + + // 8 neighbour cells-of-interest will be shifted left/down/behind if perturbed midpoint is in left/bottom/back corner of cell. + if (xf < (double)(xi + 0.5)) { + // If perturbed cell centre is less than the mid-point then update fraction + // of mass in the cell and determine the cell centre of neighbour to be the + // lowest grid point index + d_x = 1. - d_x; + xi -= 1; + xi += (xi + dimension) % dimension; // Only this critera is possible as iterate back by one (we cannot exceed DIM) + } + if(yf < (double)(yi + 0.5)) { + d_y = 1. - d_y; + yi -= 1; + yi += (yi + dimension) % dimension; + } + if(zf < (double)(zi + 0.5)) { + d_z = 1. - d_z; + zi -= 1; + zi += (zi + (unsigned long long)(non_cubic_factor * dimension)) % (unsigned long long)(non_cubic_factor * dimension); + } + // The fractions of mass which will remain with perturbed cell + float t_x = 1. - d_x; + float t_y = 1. - d_y; + float t_z = 1. - d_z; + + // Determine the grid coordinates of the 8 neighbouring cells. + // Neighbours will be in positive direction; front/right/above cells (-> 2x2 cube, with perturbed cell bottom/left/back) + // Takes into account the offset based on cell centre determined above + int xp1 = (xi + 1) % dimension; + int yp1 = (yi + 1) % dimension; + int zp1 = (zi + 1) % (unsigned long long)(non_cubic_factor * dimension); + + // double scaled_density = 1 + init_growth_factor * __ldg(&hires_density[r_index]); + double scaled_density = 1 + init_growth_factor * hires_density[r_index]; + + if (perturb_on_high_res) { + // Redistribute the mass over the 8 neighbouring cells according to cloud in cell + // Cell mass = (1 + init_growth_factor * orig_density) * (proportion of mass to distribute) + atomicAdd(&resampled_box[compute_R_INDEX(xi, yi, zi, DIM, d_para)], scaled_density * t_x * t_y * t_z); + atomicAdd(&resampled_box[compute_R_INDEX(xp1, yi, zi, DIM, d_para)], scaled_density * d_x * t_y * t_z); + atomicAdd(&resampled_box[compute_R_INDEX(xi, yp1, zi, DIM, d_para)], scaled_density * t_x * d_y * t_z); + atomicAdd(&resampled_box[compute_R_INDEX(xp1, yp1, zi, DIM, d_para)], scaled_density * d_x * d_y * t_z); + atomicAdd(&resampled_box[compute_R_INDEX(xi, yi, zp1, DIM, d_para)], scaled_density * t_x * t_y * d_z); + atomicAdd(&resampled_box[compute_R_INDEX(xp1, yi, zp1, DIM, d_para)], scaled_density * d_x * t_y * d_z); + atomicAdd(&resampled_box[compute_R_INDEX(xi, yp1, zp1, DIM, d_para)], scaled_density * t_x * d_y * d_z); + atomicAdd(&resampled_box[compute_R_INDEX(xp1, yp1, zp1, DIM, d_para)], scaled_density * d_x * d_y * d_z); + } + else { + atomicAdd(&resampled_box[compute_HII_R_INDEX(xi, yi, zi, hii_d, hii_d_para)], scaled_density * t_x * t_y * t_z); + atomicAdd(&resampled_box[compute_HII_R_INDEX(xp1, yi, zi, hii_d, hii_d_para)], scaled_density * d_x * t_y * t_z); + atomicAdd(&resampled_box[compute_HII_R_INDEX(xi, yp1, zi, hii_d, hii_d_para)], scaled_density * t_x * d_y * t_z); + atomicAdd(&resampled_box[compute_HII_R_INDEX(xp1, yp1, zi, hii_d, hii_d_para)], scaled_density * d_x * d_y * t_z); + atomicAdd(&resampled_box[compute_HII_R_INDEX(xi, yi, zp1, hii_d, hii_d_para)], scaled_density * t_x * t_y * d_z); + atomicAdd(&resampled_box[compute_HII_R_INDEX(xp1, yi, zp1, hii_d, hii_d_para)], scaled_density * d_x * t_y * d_z); + atomicAdd(&resampled_box[compute_HII_R_INDEX(xi, yp1, zp1, hii_d, hii_d_para)], scaled_density * t_x * d_y * d_z); + atomicAdd(&resampled_box[compute_HII_R_INDEX(xp1, yp1, zp1, hii_d, hii_d_para)], scaled_density * d_x * d_y * d_z); + } } } @@ -550,26 +581,28 @@ int ComputePerturbField_gpu( // ---------------------------------------------------------------------------------------------------------------------------- // Box shapes from outputs.py and convenience macros - size_t size; + size_t size_double, size_float; unsigned long long num_pixels; if(user_params->PERTURB_ON_HIGH_RES) { num_pixels = TOT_NUM_PIXELS; - size = TOT_NUM_PIXELS * sizeof(double); + size_double = TOT_NUM_PIXELS * sizeof(double); + size_float = TOT_NUM_PIXELS * sizeof(float); } else { num_pixels = HII_TOT_NUM_PIXELS; - size = HII_TOT_NUM_PIXELS * sizeof(double); + size_double = HII_TOT_NUM_PIXELS * sizeof(double); + size_float = HII_TOT_NUM_PIXELS * sizeof(float); } // Allocat host memory for output box - double* resampled_box = (double*)malloc(size); + double* resampled_box = (double*)malloc(size_double); // Allocate device memory for output box and set to 0. double* d_box; - cudaMalloc(&d_box, size); - cudaMemset(d_box, 0, size); // fills size bytes with byte=0 + cudaMalloc(&d_box, size_double); + cudaMemset(d_box, 0, size_double); // fills size bytes with byte=0 - err = cudaGetLastError(); + cudaError_t err = cudaGetLastError(); if (err != cudaSuccess) { LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); Throw(CudaError); @@ -577,9 +610,10 @@ int ComputePerturbField_gpu( // Allocate device memory for density field float* hires_density; - // cudaMalloc(&hires_density, (HII_TOT_NUM_PIXELS * sizeof(double))); // from outputs.py & indexing.h - cudaMalloc(&hires_density, (TOT_NUM_PIXELS * sizeof(double))); // from outputs.py & indexing.h - cudaMemcpy(hires_density, boxes->hires_density, (TOT_NUM_PIXELS * sizeof(double)), cudaMemcpyHostToDevice); + // cudaMalloc(&hires_density, (HII_TOT_NUM_PIXELS * sizeof(double))); // from 21cmFAST.h, outputs.py & indexing.h + // cudaMemcpy(hires_density, boxes->hires_density, (HII_TOT_NUM_PIXELS * sizeof(double)), cudaMemcpyHostToDevice); + cudaMalloc(&hires_density, (TOT_NUM_PIXELS * sizeof(float))); // from 21cmFAST.h, outputs.py & indexing.h + cudaMemcpy(hires_density, boxes->hires_density, (TOT_NUM_PIXELS * sizeof(float)), cudaMemcpyHostToDevice); err = cudaGetLastError(); if (err != cudaSuccess) { @@ -588,6 +622,7 @@ int ComputePerturbField_gpu( } // Allocate device memory and copy arrays to device as per user_params + // floats as per 21cmFAST.h float* hires_vx; float* hires_vy; float* hires_vz; @@ -602,37 +637,37 @@ int ComputePerturbField_gpu( float* lowres_vz_2LPT; if (user_params->PERTURB_ON_HIGH_RES) { - cudaMalloc(&hires_vx, size); // size isn't neccessarily correct, but it will be correct for whichever branch is run. - cudaMalloc(&hires_vy, size); - cudaMalloc(&hires_vz, size); - cudaMemcpy(hires_vx, boxes->hires_vx, size, cudaMemcpyHostToDevice); - cudaMemcpy(hires_vy, boxes->hires_vy, size, cudaMemcpyHostToDevice); - cudaMemcpy(hires_vz, boxes->hires_vz, size, cudaMemcpyHostToDevice); + cudaMalloc(&hires_vx, size_float); + cudaMalloc(&hires_vy, size_float); + cudaMalloc(&hires_vz, size_float); + cudaMemcpy(hires_vx, boxes->hires_vx, size_float, cudaMemcpyHostToDevice); + cudaMemcpy(hires_vy, boxes->hires_vy, size_float, cudaMemcpyHostToDevice); + cudaMemcpy(hires_vz, boxes->hires_vz, size_float, cudaMemcpyHostToDevice); } else { - cudaMalloc(&lowres_vx, size); - cudaMalloc(&lowres_vy, size); - cudaMalloc(&lowres_vz, size); - cudaMemcpy(lowres_vx, boxes->lowres_vx, size, cudaMemcpyHostToDevice); - cudaMemcpy(lowres_vy, boxes->lowres_vy, size, cudaMemcpyHostToDevice); - cudaMemcpy(lowres_vz, boxes->lowres_vz, size, cudaMemcpyHostToDevice); + cudaMalloc(&lowres_vx, size_float); + cudaMalloc(&lowres_vy, size_float); + cudaMalloc(&lowres_vz, size_float); + cudaMemcpy(lowres_vx, boxes->lowres_vx, size_float, cudaMemcpyHostToDevice); + cudaMemcpy(lowres_vy, boxes->lowres_vy, size_float, cudaMemcpyHostToDevice); + cudaMemcpy(lowres_vz, boxes->lowres_vz, size_float, cudaMemcpyHostToDevice); } if (user_params->USE_2LPT) { if (user_params->PERTURB_ON_HIGH_RES) { - cudaMalloc(&hires_vx_2LPT, size); - cudaMalloc(&hires_vy_2LPT, size); - cudaMalloc(&hires_vz_2LPT, size); - cudaMemcpy(hires_vx_2LPT, boxes->hires_vx_2LPT, size, cudaMemcpyHostToDevice); - cudaMemcpy(hires_vy_2LPT, boxes->hires_vy_2LPT, size, cudaMemcpyHostToDevice); - cudaMemcpy(hires_vz_2LPT, boxes->hires_vz_2LPT, size, cudaMemcpyHostToDevice); + cudaMalloc(&hires_vx_2LPT, size_float); + cudaMalloc(&hires_vy_2LPT, size_float); + cudaMalloc(&hires_vz_2LPT, size_float); + cudaMemcpy(hires_vx_2LPT, boxes->hires_vx_2LPT, size_float, cudaMemcpyHostToDevice); + cudaMemcpy(hires_vy_2LPT, boxes->hires_vy_2LPT, size_float, cudaMemcpyHostToDevice); + cudaMemcpy(hires_vz_2LPT, boxes->hires_vz_2LPT, size_float, cudaMemcpyHostToDevice); } else { - cudaMalloc(&lowres_vx_2LPT, size); - cudaMalloc(&lowres_vy_2LPT, size); - cudaMalloc(&lowres_vz_2LPT, size); - cudaMemcpy(lowres_vx_2LPT, boxes->lowres_vx_2LPT, size, cudaMemcpyHostToDevice); - cudaMemcpy(lowres_vy_2LPT, boxes->lowres_vy_2LPT, size, cudaMemcpyHostToDevice); - cudaMemcpy(lowres_vz_2LPT, boxes->lowres_vz_2LPT, size, cudaMemcpyHostToDevice); + cudaMalloc(&lowres_vx_2LPT, size_float); + cudaMalloc(&lowres_vy_2LPT, size_float); + cudaMalloc(&lowres_vz_2LPT, size_float); + cudaMemcpy(lowres_vx_2LPT, boxes->lowres_vx_2LPT, size_float, cudaMemcpyHostToDevice); + cudaMemcpy(lowres_vy_2LPT, boxes->lowres_vy_2LPT, size_float, cudaMemcpyHostToDevice); + cudaMemcpy(lowres_vz_2LPT, boxes->lowres_vz_2LPT, size_float, cudaMemcpyHostToDevice); } } @@ -649,7 +684,8 @@ int ComputePerturbField_gpu( // Invoke kernel int threadsPerBlock = 256; - int numBlocks = (num_pixels + threadsPerBlock - 1) / threadsPerBlock; + // int numBlocks = (num_pixels + threadsPerBlock - 1) / threadsPerBlock; + int numBlocks = (TOT_NUM_PIXELS + threadsPerBlock - 1) / threadsPerBlock; perturb_density_field_kernel<<>>( d_box, hires_density, hires_vx, hires_vy, hires_vz, lowres_vx, lowres_vy, lowres_vz, hires_vx_2LPT, hires_vy_2LPT, hires_vz_2LPT, lowres_vx_2LPT, lowres_vy_2LPT, lowres_vz_2LPT, @@ -657,7 +693,7 @@ int ComputePerturbField_gpu( f_pixel_factor, init_growth_factor, user_params->PERTURB_ON_HIGH_RES, user_params->USE_2LPT); // Only use during development! - cudaError_t err = cudaDeviceSynchronize(); + err = cudaDeviceSynchronize(); CATCH_CUDA_ERROR(err); err = cudaGetLastError(); if (err != cudaSuccess) { @@ -666,8 +702,8 @@ int ComputePerturbField_gpu( } // Copy results from device to host - // cudaMemcpy(resampled_box, d_box, size, cudaMemcpyDeviceToHost); - cudaError_t err = cudaMemcpy(resampled_box, d_box, size, cudaMemcpyDeviceToHost); + // cudaMemcpy(resampled_box, d_box, size_double, cudaMemcpyDeviceToHost); + err = cudaMemcpy(resampled_box, d_box, size_double, cudaMemcpyDeviceToHost); if (err != cudaSuccess) { LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); Throw(CudaError); @@ -706,6 +742,12 @@ int ComputePerturbField_gpu( Throw(CudaError); } + // LOG_DEBUG("resampled_box[:50] = "); + // for (int element = 0; element < 50; element++) { + // LOG_DEBUG("%.4e ", resampled_box[element]); + // } + // LOG_DEBUG("\n"); + // ---------------------------------------------------------------------------------------------------------------------------- LOG_SUPER_DEBUG("resampled_box: "); From af06767c088ff397b373b5d45b4c5fd06ab46557 Mon Sep 17 00:00:00 2001 From: alserene Date: Sat, 26 Oct 2024 16:01:17 +1100 Subject: [PATCH 015/145] Reorganise C and CUDA code so only CUDA related code in cu file. --- src/py21cmfast/src/PerturbField.c | 280 +++++---- src/py21cmfast/src/PerturbField.cu | 931 +++++------------------------ src/py21cmfast/src/PerturbField.h | 10 +- 3 files changed, 298 insertions(+), 923 deletions(-) diff --git a/src/py21cmfast/src/PerturbField.c b/src/py21cmfast/src/PerturbField.c index 461a435f9..e5fddf753 100644 --- a/src/py21cmfast/src/PerturbField.c +++ b/src/py21cmfast/src/PerturbField.c @@ -157,7 +157,7 @@ void compute_perturbed_velocities( } -int ComputePerturbField_cpu( +int ComputePerturbField( float redshift, UserParams *user_params, CosmoParams *cosmo_params, InitialConditions *boxes, PerturbedField *perturbed_field ){ @@ -347,6 +347,7 @@ int ComputePerturbField_cpu( // ************ END INITIALIZATION **************************** // + // Perturbing the density field requires adding over multiple cells. Store intermediate result as a double to avoid rounding errors if(user_params->PERTURB_ON_HIGH_RES) { resampled_box = (double *)calloc(TOT_NUM_PIXELS,sizeof(double)); @@ -354,145 +355,154 @@ int ComputePerturbField_cpu( else { resampled_box = (double *)calloc(HII_TOT_NUM_PIXELS,sizeof(double)); } + if (resampled_box == NULL) { + LOG_DEBUG("Could not allocate memory for resampled_box."); + } - // go through the high-res box, mapping the mass onto the low-res (updated) box - LOG_DEBUG("Perturb the density field"); - #pragma omp parallel \ - shared(init_growth_factor,boxes,f_pixel_factor,resampled_box,dimension) \ - private(i,j,k,xi,xf,yi,yf,zi,zf,HII_i,HII_j,HII_k,d_x,d_y,d_z,t_x,t_y,t_z,xp1,yp1,zp1) \ - num_threads(user_params->N_THREADS) - { - #pragma omp for - for (i=0; iDIM;i++){ - for (j=0; jDIM;j++){ - for (k=0; kDIM)+0.0); - yf = (j+0.5)/((user_params->DIM)+0.0); - zf = (k+0.5)/((D_PARA)+0.0); + // go through the high-res box, mapping the mass onto the low-res (updated) box + LOG_DEBUG("Perturb the density field"); + #pragma omp parallel \ + shared(init_growth_factor,boxes,f_pixel_factor,resampled_box,dimension) \ + private(i,j,k,xi,xf,yi,yf,zi,zf,HII_i,HII_j,HII_k,d_x,d_y,d_z,t_x,t_y,t_z,xp1,yp1,zp1) \ + num_threads(user_params->N_THREADS) + { + #pragma omp for + for (i=0; iDIM;i++){ + for (j=0; jDIM;j++){ + for (k=0; kPERTURB_ON_HIGH_RES) { - xf += (boxes->hires_vx)[R_INDEX(i, j, k)]; - yf += (boxes->hires_vy)[R_INDEX(i, j, k)]; - zf += (boxes->hires_vz)[R_INDEX(i, j, k)]; - } - else { - HII_i = (unsigned long long)(i/f_pixel_factor); - HII_j = (unsigned long long)(j/f_pixel_factor); - HII_k = (unsigned long long)(k/f_pixel_factor); - xf += (boxes->lowres_vx)[HII_R_INDEX(HII_i, HII_j, HII_k)]; - yf += (boxes->lowres_vy)[HII_R_INDEX(HII_i, HII_j, HII_k)]; - zf += (boxes->lowres_vz)[HII_R_INDEX(HII_i, HII_j, HII_k)]; - } + // map indeces to locations in units of box size + xf = (i+0.5)/((user_params->DIM)+0.0); + yf = (j+0.5)/((user_params->DIM)+0.0); + zf = (k+0.5)/((D_PARA)+0.0); - // 2LPT PART - // add second order corrections - if(user_params->USE_2LPT){ + // update locations if(user_params->PERTURB_ON_HIGH_RES) { - xf -= (boxes->hires_vx_2LPT)[R_INDEX(i,j,k)]; - yf -= (boxes->hires_vy_2LPT)[R_INDEX(i,j,k)]; - zf -= (boxes->hires_vz_2LPT)[R_INDEX(i,j,k)]; + xf += (boxes->hires_vx)[R_INDEX(i, j, k)]; + yf += (boxes->hires_vy)[R_INDEX(i, j, k)]; + zf += (boxes->hires_vz)[R_INDEX(i, j, k)]; } else { - xf -= (boxes->lowres_vx_2LPT)[HII_R_INDEX(HII_i,HII_j,HII_k)]; - yf -= (boxes->lowres_vy_2LPT)[HII_R_INDEX(HII_i,HII_j,HII_k)]; - zf -= (boxes->lowres_vz_2LPT)[HII_R_INDEX(HII_i,HII_j,HII_k)]; + HII_i = (unsigned long long)(i/f_pixel_factor); + HII_j = (unsigned long long)(j/f_pixel_factor); + HII_k = (unsigned long long)(k/f_pixel_factor); + xf += (boxes->lowres_vx)[HII_R_INDEX(HII_i, HII_j, HII_k)]; + yf += (boxes->lowres_vy)[HII_R_INDEX(HII_i, HII_j, HII_k)]; + zf += (boxes->lowres_vz)[HII_R_INDEX(HII_i, HII_j, HII_k)]; } - } - xf *= (double)(dimension); - yf *= (double)(dimension); - zf *= (double)((unsigned long long)(user_params->NON_CUBIC_FACTOR*dimension)); - while (xf >= (double)(dimension)){ xf -= (dimension);} - while (xf < 0){ xf += (dimension);} - while (yf >= (double)(dimension)){ yf -= (dimension);} - while (yf < 0){ yf += (dimension);} - while (zf >= (double)(user_params->NON_CUBIC_FACTOR*dimension)){ zf -= (user_params->NON_CUBIC_FACTOR*dimension);} - while (zf < 0){ zf += (user_params->NON_CUBIC_FACTOR*dimension);} - xi = xf; - yi = yf; - zi = zf; - if (xi >= (dimension)){ xi -= (dimension);} - if (xi < 0) {xi += (dimension);} - if (yi >= (dimension)){ yi -= (dimension);} - if (yi < 0) {yi += (dimension);} - if (zi >= ((unsigned long long)(user_params->NON_CUBIC_FACTOR*dimension))){ zi -= ((unsigned long long)(user_params->NON_CUBIC_FACTOR*dimension));} - if (zi < 0) {zi += ((unsigned long long)(user_params->NON_CUBIC_FACTOR*dimension));} - - // Determine the fraction of the perturbed cell which overlaps with the 8 nearest grid cells, - // based on the grid cell which contains the centre of the perturbed cell - d_x = fabs(xf - (double)(xi+0.5)); - d_y = fabs(yf - (double)(yi+0.5)); - d_z = fabs(zf - (double)(zi+0.5)); - if(xf < (double)(xi+0.5)) { - // If perturbed cell centre is less than the mid-point then update fraction - // of mass in the cell and determine the cell centre of neighbour to be the - // lowest grid point index - d_x = 1. - d_x; - xi -= 1; - if (xi < 0) {xi += (dimension);} // Only this critera is possible as iterate back by one (we cannot exceed DIM) - } - if(yf < (double)(yi+0.5)) { - d_y = 1. - d_y; - yi -= 1; + + // 2LPT PART + // add second order corrections + if(user_params->USE_2LPT){ + if(user_params->PERTURB_ON_HIGH_RES) { + xf -= (boxes->hires_vx_2LPT)[R_INDEX(i,j,k)]; + yf -= (boxes->hires_vy_2LPT)[R_INDEX(i,j,k)]; + zf -= (boxes->hires_vz_2LPT)[R_INDEX(i,j,k)]; + } + else { + xf -= (boxes->lowres_vx_2LPT)[HII_R_INDEX(HII_i,HII_j,HII_k)]; + yf -= (boxes->lowres_vy_2LPT)[HII_R_INDEX(HII_i,HII_j,HII_k)]; + zf -= (boxes->lowres_vz_2LPT)[HII_R_INDEX(HII_i,HII_j,HII_k)]; + } + } + xf *= (double)(dimension); + yf *= (double)(dimension); + zf *= (double)((unsigned long long)(user_params->NON_CUBIC_FACTOR*dimension)); + while (xf >= (double)(dimension)){ xf -= (dimension);} + while (xf < 0){ xf += (dimension);} + while (yf >= (double)(dimension)){ yf -= (dimension);} + while (yf < 0){ yf += (dimension);} + while (zf >= (double)(user_params->NON_CUBIC_FACTOR*dimension)){ zf -= (user_params->NON_CUBIC_FACTOR*dimension);} + while (zf < 0){ zf += (user_params->NON_CUBIC_FACTOR*dimension);} + xi = xf; + yi = yf; + zi = zf; + if (xi >= (dimension)){ xi -= (dimension);} + if (xi < 0) {xi += (dimension);} + if (yi >= (dimension)){ yi -= (dimension);} if (yi < 0) {yi += (dimension);} - } - if(zf < (double)(zi+0.5)) { - d_z = 1. - d_z; - zi -= 1; + if (zi >= ((unsigned long long)(user_params->NON_CUBIC_FACTOR*dimension))){ zi -= ((unsigned long long)(user_params->NON_CUBIC_FACTOR*dimension));} if (zi < 0) {zi += ((unsigned long long)(user_params->NON_CUBIC_FACTOR*dimension));} - } - t_x = 1. - d_x; - t_y = 1. - d_y; - t_z = 1. - d_z; - - // Determine the grid coordinates of the 8 neighbouring cells - // Takes into account the offset based on cell centre determined above - xp1 = xi + 1; - if(xp1 >= dimension) { xp1 -= (dimension);} - yp1 = yi + 1; - if(yp1 >= dimension) { yp1 -= (dimension);} - zp1 = zi + 1; - if(zp1 >= ((unsigned long long)(user_params->NON_CUBIC_FACTOR*dimension))) { zp1 -= ((unsigned long long)(user_params->NON_CUBIC_FACTOR*dimension));} - if(user_params->PERTURB_ON_HIGH_RES) { - // Redistribute the mass over the 8 neighbouring cells according to cloud in cell -#pragma omp atomic - resampled_box[R_INDEX(xi,yi,zi)] += (double)(1. + init_growth_factor*(boxes->hires_density)[R_INDEX(i,j,k)])*(t_x*t_y*t_z); -#pragma omp atomic - resampled_box[R_INDEX(xp1,yi,zi)] += (double)(1. + init_growth_factor*(boxes->hires_density)[R_INDEX(i,j,k)])*(d_x*t_y*t_z); -#pragma omp atomic - resampled_box[R_INDEX(xi,yp1,zi)] += (double)(1. + init_growth_factor*(boxes->hires_density)[R_INDEX(i,j,k)])*(t_x*d_y*t_z); -#pragma omp atomic - resampled_box[R_INDEX(xp1,yp1,zi)] += (double)(1. + init_growth_factor*(boxes->hires_density)[R_INDEX(i,j,k)])*(d_x*d_y*t_z); -#pragma omp atomic - resampled_box[R_INDEX(xi,yi,zp1)] += (double)(1. + init_growth_factor*(boxes->hires_density)[R_INDEX(i,j,k)])*(t_x*t_y*d_z); -#pragma omp atomic - resampled_box[R_INDEX(xp1,yi,zp1)] += (double)(1. + init_growth_factor*(boxes->hires_density)[R_INDEX(i,j,k)])*(d_x*t_y*d_z); -#pragma omp atomic - resampled_box[R_INDEX(xi,yp1,zp1)] += (double)(1. + init_growth_factor*(boxes->hires_density)[R_INDEX(i,j,k)])*(t_x*d_y*d_z); -#pragma omp atomic - resampled_box[R_INDEX(xp1,yp1,zp1)] += (double)(1. + init_growth_factor*(boxes->hires_density)[R_INDEX(i,j,k)])*(d_x*d_y*d_z); - } - else { - // Redistribute the mass over the 8 neighbouring cells according to cloud in cell -#pragma omp atomic - resampled_box[HII_R_INDEX(xi,yi,zi)] += (double)(1. + init_growth_factor*(boxes->hires_density)[R_INDEX(i,j,k)])*(t_x*t_y*t_z); -#pragma omp atomic - resampled_box[HII_R_INDEX(xp1,yi,zi)] += (double)(1. + init_growth_factor*(boxes->hires_density)[R_INDEX(i,j,k)])*(d_x*t_y*t_z); -#pragma omp atomic - resampled_box[HII_R_INDEX(xi,yp1,zi)] += (double)(1. + init_growth_factor*(boxes->hires_density)[R_INDEX(i,j,k)])*(t_x*d_y*t_z); -#pragma omp atomic - resampled_box[HII_R_INDEX(xp1,yp1,zi)] += (double)(1. + init_growth_factor*(boxes->hires_density)[R_INDEX(i,j,k)])*(d_x*d_y*t_z); -#pragma omp atomic - resampled_box[HII_R_INDEX(xi,yi,zp1)] += (double)(1. + init_growth_factor*(boxes->hires_density)[R_INDEX(i,j,k)])*(t_x*t_y*d_z); -#pragma omp atomic - resampled_box[HII_R_INDEX(xp1,yi,zp1)] += (double)(1. + init_growth_factor*(boxes->hires_density)[R_INDEX(i,j,k)])*(d_x*t_y*d_z); -#pragma omp atomic - resampled_box[HII_R_INDEX(xi,yp1,zp1)] += (double)(1. + init_growth_factor*(boxes->hires_density)[R_INDEX(i,j,k)])*(t_x*d_y*d_z); -#pragma omp atomic - resampled_box[HII_R_INDEX(xp1,yp1,zp1)] += (double)(1. + init_growth_factor*(boxes->hires_density)[R_INDEX(i,j,k)])*(d_x*d_y*d_z); + // Determine the fraction of the perturbed cell which overlaps with the 8 nearest grid cells, + // based on the grid cell which contains the centre of the perturbed cell + d_x = fabs(xf - (double)(xi+0.5)); + d_y = fabs(yf - (double)(yi+0.5)); + d_z = fabs(zf - (double)(zi+0.5)); + if(xf < (double)(xi+0.5)) { + // If perturbed cell centre is less than the mid-point then update fraction + // of mass in the cell and determine the cell centre of neighbour to be the + // lowest grid point index + d_x = 1. - d_x; + xi -= 1; + if (xi < 0) {xi += (dimension);} // Only this critera is possible as iterate back by one (we cannot exceed DIM) + } + if(yf < (double)(yi+0.5)) { + d_y = 1. - d_y; + yi -= 1; + if (yi < 0) {yi += (dimension);} + } + if(zf < (double)(zi+0.5)) { + d_z = 1. - d_z; + zi -= 1; + if (zi < 0) {zi += ((unsigned long long)(user_params->NON_CUBIC_FACTOR*dimension));} + } + t_x = 1. - d_x; + t_y = 1. - d_y; + t_z = 1. - d_z; + + // Determine the grid coordinates of the 8 neighbouring cells + // Takes into account the offset based on cell centre determined above + xp1 = xi + 1; + if(xp1 >= dimension) { xp1 -= (dimension);} + yp1 = yi + 1; + if(yp1 >= dimension) { yp1 -= (dimension);} + zp1 = zi + 1; + if(zp1 >= ((unsigned long long)(user_params->NON_CUBIC_FACTOR*dimension))) { zp1 -= ((unsigned long long)(user_params->NON_CUBIC_FACTOR*dimension));} + + if(user_params->PERTURB_ON_HIGH_RES) { + // Redistribute the mass over the 8 neighbouring cells according to cloud in cell + #pragma omp atomic + resampled_box[R_INDEX(xi,yi,zi)] += (double)(1. + init_growth_factor*(boxes->hires_density)[R_INDEX(i,j,k)])*(t_x*t_y*t_z); + #pragma omp atomic + resampled_box[R_INDEX(xp1,yi,zi)] += (double)(1. + init_growth_factor*(boxes->hires_density)[R_INDEX(i,j,k)])*(d_x*t_y*t_z); + #pragma omp atomic + resampled_box[R_INDEX(xi,yp1,zi)] += (double)(1. + init_growth_factor*(boxes->hires_density)[R_INDEX(i,j,k)])*(t_x*d_y*t_z); + #pragma omp atomic + resampled_box[R_INDEX(xp1,yp1,zi)] += (double)(1. + init_growth_factor*(boxes->hires_density)[R_INDEX(i,j,k)])*(d_x*d_y*t_z); + #pragma omp atomic + resampled_box[R_INDEX(xi,yi,zp1)] += (double)(1. + init_growth_factor*(boxes->hires_density)[R_INDEX(i,j,k)])*(t_x*t_y*d_z); + #pragma omp atomic + resampled_box[R_INDEX(xp1,yi,zp1)] += (double)(1. + init_growth_factor*(boxes->hires_density)[R_INDEX(i,j,k)])*(d_x*t_y*d_z); + #pragma omp atomic + resampled_box[R_INDEX(xi,yp1,zp1)] += (double)(1. + init_growth_factor*(boxes->hires_density)[R_INDEX(i,j,k)])*(t_x*d_y*d_z); + #pragma omp atomic + resampled_box[R_INDEX(xp1,yp1,zp1)] += (double)(1. + init_growth_factor*(boxes->hires_density)[R_INDEX(i,j,k)])*(d_x*d_y*d_z); + } + else { + // Redistribute the mass over the 8 neighbouring cells according to cloud in cell + #pragma omp atomic + resampled_box[HII_R_INDEX(xi,yi,zi)] += (double)(1. + init_growth_factor*(boxes->hires_density)[R_INDEX(i,j,k)])*(t_x*t_y*t_z); + #pragma omp atomic + resampled_box[HII_R_INDEX(xp1,yi,zi)] += (double)(1. + init_growth_factor*(boxes->hires_density)[R_INDEX(i,j,k)])*(d_x*t_y*t_z); + #pragma omp atomic + resampled_box[HII_R_INDEX(xi,yp1,zi)] += (double)(1. + init_growth_factor*(boxes->hires_density)[R_INDEX(i,j,k)])*(t_x*d_y*t_z); + #pragma omp atomic + resampled_box[HII_R_INDEX(xp1,yp1,zi)] += (double)(1. + init_growth_factor*(boxes->hires_density)[R_INDEX(i,j,k)])*(d_x*d_y*t_z); + #pragma omp atomic + resampled_box[HII_R_INDEX(xi,yi,zp1)] += (double)(1. + init_growth_factor*(boxes->hires_density)[R_INDEX(i,j,k)])*(t_x*t_y*d_z); + #pragma omp atomic + resampled_box[HII_R_INDEX(xp1,yi,zp1)] += (double)(1. + init_growth_factor*(boxes->hires_density)[R_INDEX(i,j,k)])*(d_x*t_y*d_z); + #pragma omp atomic + resampled_box[HII_R_INDEX(xi,yp1,zp1)] += (double)(1. + init_growth_factor*(boxes->hires_density)[R_INDEX(i,j,k)])*(t_x*d_y*d_z); + #pragma omp atomic + resampled_box[HII_R_INDEX(xp1,yp1,zp1)] += (double)(1. + init_growth_factor*(boxes->hires_density)[R_INDEX(i,j,k)])*(d_x*d_y*d_z); + } } } } @@ -776,15 +786,3 @@ int ComputePerturbField_cpu( return(0); } - -int ComputePerturbField( - float redshift, UserParams *user_params, CosmoParams *cosmo_params, - InitialConditions *boxes, PerturbedField *perturbed_field - ){ - // int result; - if (1) { - return ComputePerturbField_gpu(redshift, user_params, cosmo_params, boxes, perturbed_field); - } else { - return ComputePerturbField_cpu(redshift, user_params, cosmo_params, boxes, perturbed_field); - } - } diff --git a/src/py21cmfast/src/PerturbField.cu b/src/py21cmfast/src/PerturbField.cu index 9ba491fde..de1d024bb 100644 --- a/src/py21cmfast/src/PerturbField.cu +++ b/src/py21cmfast/src/PerturbField.cu @@ -25,164 +25,6 @@ #include "PerturbField.h" -void compute_perturbed_velocities( - unsigned short axis, - UserParams *user_params, - fftwf_complex *HIRES_density_perturb, - fftwf_complex *HIRES_density_perturb_saved, - fftwf_complex *LOWRES_density_perturb, - fftwf_complex *LOWRES_density_perturb_saved, - float dDdt_over_D, - int dimension, - int switch_mid, - float f_pixel_factor, - float *velocity -){ - - float k_x, k_y, k_z, k_sq; - unsigned long long int n_x, n_y, n_z; - unsigned long long int i,j,k; - - // ALICE: 3D vector for k-space coords - float kvec[3]; - - if(user_params->PERTURB_ON_HIGH_RES) { - // We are going to generate the velocity field on the high-resolution perturbed - // density grid - // ALICE: Copy the saved k-space density field to HIRES_density_perturb. - memcpy( - HIRES_density_perturb, - HIRES_density_perturb_saved, - sizeof(fftwf_complex)*KSPACE_NUM_PIXELS - ); - } - else { - // We are going to generate the velocity field on the low-resolution perturbed density grid - // ALICE: Copy the saved k-space density field to LOWRES_density_perturb. - memcpy( - LOWRES_density_perturb, - LOWRES_density_perturb_saved, - sizeof(fftwf_complex)*HII_KSPACE_NUM_PIXELS - ); - LOG_SUPER_DEBUG("dDdt_over_D=%.6e, dimension=%d, switch_mid=%d, f_pixel_factor=%f", dDdt_over_D, dimension, switch_mid, f_pixel_factor); - } - - // ALICE: Compute wave numbers (k_x, k_y, k_z) + compute velocity based on density perturbations. - // ALICE: Wave numbers == frequencies of spatial oscillations (higher wave number=faster oscillations) - #pragma omp parallel \ - shared(LOWRES_density_perturb,HIRES_density_perturb,dDdt_over_D,dimension,switch_mid) \ - private(n_x,n_y,n_z,k_x,k_y,k_z,k_sq, kvec) \ - num_threads(user_params->N_THREADS) - { - #pragma omp for - for (n_x=0; n_x switch_mid) - k_x = (n_x-dimension) * DELTA_K; // wrap around for FFT convention - else - k_x = n_x * DELTA_K; - - for (n_y=0; n_y switch_mid) - k_y = (n_y-dimension) * DELTA_K; - else - k_y = n_y * DELTA_K; - - for (n_z=0; n_z<=(unsigned long long)(user_params->NON_CUBIC_FACTOR*switch_mid); n_z++){ - k_z = n_z * DELTA_K_PARA; - - kvec[0] = k_x; - kvec[1] = k_y; - kvec[2] = k_z; - - k_sq = k_x*k_x + k_y*k_y + k_z*k_z; - - // now set the velocities - if ((n_x==0) && (n_y==0) && (n_z==0)) { // DC mode - if(user_params->PERTURB_ON_HIGH_RES) { - // HIRES_density_perturb[0] = 0; - HIRES_density_perturb[0][0] = 0.; - HIRES_density_perturb[0][1] = 0.; - } - else { - // LOWRES_density_perturb[0] = 0; - LOWRES_density_perturb[0][0] = 0.; - LOWRES_density_perturb[0][1] = 0.; - } - } - else { - if(user_params->PERTURB_ON_HIGH_RES) { - // HIRES_density_perturb[C_INDEX(n_x,n_y,n_z)] *= dDdt_over_D*kvec[axis]*I/k_sq/(TOT_NUM_PIXELS+0.0); - // HIRES_density_perturb[C_INDEX(n_x,n_y,n_z)] *= dDdt_over_D*kvec[axis]/k_sq/(TOT_NUM_PIXELS+0.0); - // reinterpret_cast &>(HIRES_density_perturb[C_INDEX(n_x,n_y,n_z)]) *= std::complex(0., dDdt_over_D*kvec[axis]*I/k_sq/(TOT_NUM_PIXELS+0.0)); - reinterpret_cast &>(HIRES_density_perturb[C_INDEX(n_x,n_y,n_z)]) *= std::complex(0., dDdt_over_D*kvec[axis]/k_sq/(TOT_NUM_PIXELS+0.0)); - } - else { - // LOWRES_density_perturb[HII_C_INDEX(n_x,n_y,n_z)] *= dDdt_over_D*kvec[axis]*I/k_sq/(HII_TOT_NUM_PIXELS+0.0); - // LOWRES_density_perturb[HII_C_INDEX(n_x,n_y,n_z)] *= dDdt_over_D*kvec[axis]/k_sq/(HII_TOT_NUM_PIXELS+0.0); - // reinterpret_cast &>(LOWRES_density_perturb[HII_C_INDEX(n_x,n_y,n_z)]) *= std::complex(0., dDdt_over_D*kvec[axis]*I/k_sq/(HII_TOT_NUM_PIXELS+0.0)); - reinterpret_cast &>(LOWRES_density_perturb[HII_C_INDEX(n_x,n_y,n_z)]) *= std::complex(0., dDdt_over_D*kvec[axis]/k_sq/(HII_TOT_NUM_PIXELS+0.0)); - } - } - } - } - } - } - - LOG_SUPER_DEBUG("density_perturb after modification by dDdt: "); - debugSummarizeBoxComplex(LOWRES_density_perturb, user_params->HII_DIM, user_params->NON_CUBIC_FACTOR, " "); - - // ALICE: density field was already in k-space when passed in, so now filter (top-hat), inverse fft and copy to velocity field. - if(user_params->PERTURB_ON_HIGH_RES) { - - // smooth the high resolution field ready for resampling - // ALICE: RES=0 (dimension=DIM, midpoint=MIDDLE), filter_type=0 (real space top-hat filtering) - if (user_params->DIM != user_params->HII_DIM) - filter_box(HIRES_density_perturb, 0, 0, L_FACTOR*user_params->BOX_LEN/(user_params->HII_DIM+0.0), 0.); - - dft_c2r_cube(user_params->USE_FFTW_WISDOM, user_params->DIM, D_PARA, user_params->N_THREADS, HIRES_density_perturb); - - // ALICE: Copy computed velocities to velocity field. - #pragma omp parallel \ - shared(velocity,HIRES_density_perturb,f_pixel_factor) \ - private(i,j,k) \ - num_threads(user_params->N_THREADS) - { - #pragma omp for - for (i=0; iHII_DIM; i++){ - for (j=0; jHII_DIM; j++){ - for (k=0; k no top hat filtering, just inverse fft and copy to velocity field. - else { - dft_c2r_cube(user_params->USE_FFTW_WISDOM, user_params->HII_DIM, HII_D_PARA, user_params->N_THREADS, LOWRES_density_perturb); - - #pragma omp parallel \ - shared(velocity,LOWRES_density_perturb) \ - private(i,j,k) \ - num_threads(user_params->N_THREADS) - { - #pragma omp for - for (i=0; iHII_DIM; i++){ - for (j=0; jHII_DIM; j++){ - for (k=0; kHII_DIM, user_params->NON_CUBIC_FACTOR, " "); - -} - -// ---------------------------------------------------------------------------------------------------------------------------- - // #define R_INDEX(x,y,z)((unsigned long long)((z)+D_PARA*((y)+D*(x)))) __device__ inline unsigned long long compute_R_INDEX(int i, int j, int k, int dim, long long d_para) { return k + d_para * (j + dim * i); @@ -241,9 +83,9 @@ __global__ void perturb_density_field_kernel( unsigned long long r_index = compute_R_INDEX(i, j, k, DIM, d_para); // Map index to location in units of box size - double xf = (i + 0.5) / DIM; - double yf = (j + 0.5) / DIM; - double zf = (k + 0.5) / d_para; + double xf = (i + 0.5) / (DIM + 0.0); + double yf = (j + 0.5) / (DIM + 0.0); + double zf = (k + 0.5) / (d_para + 0.0); // Update locations unsigned long long HII_index; @@ -315,9 +157,9 @@ __global__ void perturb_density_field_kernel( int zi = zf; // Wrap index coordinates to ensure no out-of-bounds array access will be attempted - xi = (xi % dimension + dimension) % dimension; - yi = (yi % dimension + dimension) % dimension; - zi = (zi % dimension_factored + dimension_factored) % dimension_factored; + xi = ((xi % dimension) + dimension) % dimension; + yi = ((yi % dimension) + dimension) % dimension; + zi = ((zi % dimension_factored) + dimension_factored) % dimension_factored; // Determine the fraction of the perturbed cell which overlaps with the 8 nearest grid cells, // based on the grid cell which contains the centre of the perturbed cell @@ -332,17 +174,20 @@ __global__ void perturb_density_field_kernel( // lowest grid point index d_x = 1. - d_x; xi -= 1; - xi += (xi + dimension) % dimension; // Only this critera is possible as iterate back by one (we cannot exceed DIM) + // xi += (xi + dimension) % dimension; // Only this critera is possible as iterate back by one (we cannot exceed DIM) + xi = (xi + dimension) % dimension; } if(yf < (double)(yi + 0.5)) { d_y = 1. - d_y; yi -= 1; - yi += (yi + dimension) % dimension; + // yi += (yi + dimension) % dimension; + yi = (yi + dimension) % dimension; } if(zf < (double)(zi + 0.5)) { d_z = 1. - d_z; zi -= 1; - zi += (zi + (unsigned long long)(non_cubic_factor * dimension)) % (unsigned long long)(non_cubic_factor * dimension); + // zi += (zi + (unsigned long long)(non_cubic_factor * dimension)) % (unsigned long long)(non_cubic_factor * dimension); + zi = (zi + (unsigned long long)(non_cubic_factor * dimension)) % (unsigned long long)(non_cubic_factor * dimension); } // The fractions of mass which will remain with perturbed cell float t_x = 1. - d_x; @@ -357,7 +202,7 @@ __global__ void perturb_density_field_kernel( int zp1 = (zi + 1) % (unsigned long long)(non_cubic_factor * dimension); // double scaled_density = 1 + init_growth_factor * __ldg(&hires_density[r_index]); - double scaled_density = 1 + init_growth_factor * hires_density[r_index]; + double scaled_density = 1.0 + init_growth_factor * hires_density[r_index]; if (perturb_on_high_res) { // Redistribute the mass over the 8 neighbouring cells according to cloud in cell @@ -384,646 +229,182 @@ __global__ void perturb_density_field_kernel( } } -// ------------------------------------------------------------------------------------------------------------------------ - -int ComputePerturbField_gpu( - float redshift, UserParams *user_params, CosmoParams *cosmo_params, - InitialConditions *boxes, PerturbedField *perturbed_field +double* MapMass_gpu( + UserParams *user_params, CosmoParams *cosmo_params, InitialConditions *boxes, double *resampled_box, + int dimension, float f_pixel_factor, float init_growth_factor ) { - /* - ComputePerturbField uses the first-order Langragian displacement field to move the - masses in the cells of the density field. The high-res density field is extrapolated - to some high-redshift (global_params.INITIAL_REDSHIFT), then uses the zeldovich - approximation to move the grid "particles" onto the lower-res grid we use for the - maps. Then we recalculate the velocity fields on the perturbed grid. - */ - - int status; - Try{ // This Try{} wraps the whole function, so we don't indent. - // Makes the parameter structs visible to a variety of functions/macros // Do each time to avoid Python garbage collection issues Broadcast_struct_global_noastro(user_params,cosmo_params); - omp_set_num_threads(user_params->N_THREADS); - - fftwf_complex *HIRES_density_perturb, *HIRES_density_perturb_saved; - fftwf_complex *LOWRES_density_perturb, *LOWRES_density_perturb_saved; - - float growth_factor, displacement_factor_2LPT, init_growth_factor, init_displacement_factor_2LPT; - float mass_factor, dDdt, f_pixel_factor, velocity_displacement_factor, velocity_displacement_factor_2LPT; - int i, j, k, dimension, switch_mid; - - // Function for deciding the dimensions of loops when we could - // use either the low or high resolution grids. - switch(user_params->PERTURB_ON_HIGH_RES) { - case 0: - dimension = user_params->HII_DIM; - switch_mid = HII_MIDDLE; - break; - case 1: - dimension = user_params->DIM; - switch_mid = MIDDLE; - break; + // Box shapes from outputs.py and convenience macros + size_t size_double, size_float; + // unsigned long long num_pixels; + if(user_params->PERTURB_ON_HIGH_RES) { + // num_pixels = TOT_NUM_PIXELS; + size_double = TOT_NUM_PIXELS * sizeof(double); + size_float = TOT_NUM_PIXELS * sizeof(float); } - - // *************** BEGIN INITIALIZATION ************************** // - - // perform a very rudimentary check to see if we are underresolved and not using the linear approx - if ((user_params->BOX_LEN > user_params->DIM) && !(global_params.EVOLVE_DENSITY_LINEARLY)){ - LOG_WARNING("Resolution is likely too low for accurate evolved density fields\n \ - It is recommended that you either increase the resolution (DIM/BOX_LEN) or set the EVOLVE_DENSITY_LINEARLY flag to 1\n"); + else { + // num_pixels = HII_TOT_NUM_PIXELS; + size_double = HII_TOT_NUM_PIXELS * sizeof(double); + size_float = HII_TOT_NUM_PIXELS * sizeof(float); } - growth_factor = dicke(redshift); - displacement_factor_2LPT = -(3.0/7.0) * growth_factor*growth_factor; // 2LPT eq. D8 - - dDdt = ddickedt(redshift); // time derivative of the growth factor (1/s) - init_growth_factor = dicke(global_params.INITIAL_REDSHIFT); - init_displacement_factor_2LPT = -(3.0/7.0) * init_growth_factor*init_growth_factor; // 2LPT eq. D8 + // Allocat host memory for output box + // double* resampled_box = (double*)malloc(size_double); + // double* resampled_box = (double *)calloc(num_pixels, sizeof(double)); // initialise to 0 - // find factor of HII pixel size / deltax pixel size - f_pixel_factor = user_params->DIM/(float)(user_params->HII_DIM); - mass_factor = pow(f_pixel_factor, 3); + // Allocate device memory for output box and set to 0. + double* d_box; + cudaMalloc((void**)&d_box, size_double); + cudaMemset(d_box, 0, size_double); // fills size bytes with byte=0 - // allocate memory for the updated density, and initialize - LOWRES_density_perturb = (fftwf_complex *) fftwf_malloc(sizeof(fftwf_complex)*HII_KSPACE_NUM_PIXELS); - LOWRES_density_perturb_saved = (fftwf_complex *) fftwf_malloc(sizeof(fftwf_complex)*HII_KSPACE_NUM_PIXELS); - - if(user_params->PERTURB_ON_HIGH_RES) { - HIRES_density_perturb = (fftwf_complex *) fftwf_malloc(sizeof(fftwf_complex)*KSPACE_NUM_PIXELS); // D * D * NCF * D/2 - HIRES_density_perturb_saved = (fftwf_complex *) fftwf_malloc(sizeof(fftwf_complex)*KSPACE_NUM_PIXELS); // sizeof(fftwf_complex) = 2 * sizeof(float)? + cudaError_t err = cudaGetLastError(); + if (err != cudaSuccess) { + LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); + Throw(CudaError); } - // double *resampled_box; - - //TODO: debugSummarizeIC is bugged when not all the fields are in memory - // debugSummarizeIC(boxes, user_params->HII_DIM, user_params->DIM, user_params->NON_CUBIC_FACTOR); - LOG_SUPER_DEBUG("growth_factor=%f, displacemet_factor_2LPT=%f, dDdt=%f, init_growth_factor=%f, init_displacement_factor_2LPT=%f, mass_factor=%f", - growth_factor, displacement_factor_2LPT, dDdt, init_growth_factor, init_displacement_factor_2LPT, mass_factor); - - // check if the linear evolution flag was set - if (global_params.EVOLVE_DENSITY_LINEARLY){ - - LOG_DEBUG("Linearly evolve density field"); - -#pragma omp parallel shared(growth_factor,boxes,LOWRES_density_perturb,HIRES_density_perturb,dimension) private(i,j,k) num_threads(user_params->N_THREADS) - { -#pragma omp for - for (i=0; iNON_CUBIC_FACTOR*dimension); k++){ // NCF * D - // max (i, j, k) = (D, D, NCF * D) - if(user_params->PERTURB_ON_HIGH_RES) { - // HIRES_density_perturb is of type fftwf_complex - // HIRES_density_perturb has size D * D * NCF * D/2 - - // hires_density is of type float - // hires_density has size D * D * NCF * D + // Allocate device memory for density field + float* hires_density; + // cudaMalloc(&hires_density, (HII_TOT_NUM_PIXELS * sizeof(double))); // from 21cmFAST.h, outputs.py & indexing.h + // cudaMemcpy(hires_density, boxes->hires_density, (HII_TOT_NUM_PIXELS * sizeof(double)), cudaMemcpyHostToDevice); + cudaMalloc(&hires_density, (TOT_NUM_PIXELS * sizeof(float))); // from 21cmFAST.h, outputs.py & indexing.h + cudaMemcpy(hires_density, boxes->hires_density, (TOT_NUM_PIXELS * sizeof(float)), cudaMemcpyHostToDevice); + + err = cudaGetLastError(); + if (err != cudaSuccess) { + LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); + Throw(CudaError); + } - *((float *)HIRES_density_perturb + R_FFT_INDEX(i,j,k)) = growth_factor*boxes->hires_density[R_INDEX(i,j,k)]; - } - else { - *((float *)LOWRES_density_perturb + HII_R_FFT_INDEX(i,j,k)) = growth_factor*boxes->lowres_density[HII_R_INDEX(i,j,k)]; - } - } - } - } - } + // Allocate device memory and copy arrays to device as per user_params + // floats as per 21cmFAST.h + float* hires_vx; + float* hires_vy; + float* hires_vz; + float* lowres_vx; + float* lowres_vy; + float* lowres_vz; + float* hires_vx_2LPT; + float* hires_vy_2LPT; + float* hires_vz_2LPT; + float* lowres_vx_2LPT; + float* lowres_vy_2LPT; + float* lowres_vz_2LPT; + + if (user_params->PERTURB_ON_HIGH_RES) { + cudaMalloc(&hires_vx, size_float); + cudaMalloc(&hires_vy, size_float); + cudaMalloc(&hires_vz, size_float); + cudaMemcpy(hires_vx, boxes->hires_vx, size_float, cudaMemcpyHostToDevice); + cudaMemcpy(hires_vy, boxes->hires_vy, size_float, cudaMemcpyHostToDevice); + cudaMemcpy(hires_vz, boxes->hires_vz, size_float, cudaMemcpyHostToDevice); } else { - // Apply Zel'dovich/2LPT correction - LOG_DEBUG("Apply Zel'dovich"); - -#pragma omp parallel shared(LOWRES_density_perturb,HIRES_density_perturb,dimension) private(i,j,k) num_threads(user_params->N_THREADS) - { -#pragma omp for - for (i=0; iNON_CUBIC_FACTOR*dimension); k++){ - if(user_params->PERTURB_ON_HIGH_RES) { - *((float *)HIRES_density_perturb + R_FFT_INDEX(i,j,k)) = 0.; - } - else { - *((float *)LOWRES_density_perturb + HII_R_FFT_INDEX(i,j,k)) = 0.; - } - - } - } - } - } - - velocity_displacement_factor = (growth_factor-init_growth_factor) / user_params->BOX_LEN; - - // now add the missing factor of D -#pragma omp parallel shared(boxes,velocity_displacement_factor,dimension) private(i,j,k) num_threads(user_params->N_THREADS) - { -#pragma omp for - for (i=0; iNON_CUBIC_FACTOR*dimension); k++){ - if(user_params->PERTURB_ON_HIGH_RES) { - boxes->hires_vx[R_INDEX(i,j,k)] *= velocity_displacement_factor; // this is now comoving displacement in units of box size - boxes->hires_vy[R_INDEX(i,j,k)] *= velocity_displacement_factor; // this is now comoving displacement in units of box size - boxes->hires_vz[R_INDEX(i,j,k)] *= (velocity_displacement_factor/user_params->NON_CUBIC_FACTOR); // this is now comoving displacement in units of box size - } - else { - boxes->lowres_vx[HII_R_INDEX(i,j,k)] *= velocity_displacement_factor; // this is now comoving displacement in units of box size - boxes->lowres_vy[HII_R_INDEX(i,j,k)] *= velocity_displacement_factor; // this is now comoving displacement in units of box size - boxes->lowres_vz[HII_R_INDEX(i,j,k)] *= (velocity_displacement_factor/user_params->NON_CUBIC_FACTOR); // this is now comoving displacement in units of box size - } - } - } - } - } - - // * ************************************************************************* * // - // * BEGIN 2LPT PART * // - // * ************************************************************************* * // - // reference: reference: Scoccimarro R., 1998, MNRAS, 299, 1097-1118 Appendix D - if(user_params->USE_2LPT){ - LOG_DEBUG("Apply 2LPT"); - - // allocate memory for the velocity boxes and read them in - velocity_displacement_factor_2LPT = (displacement_factor_2LPT - init_displacement_factor_2LPT) / user_params->BOX_LEN; - - // now add the missing factor in eq. D9 -#pragma omp parallel shared(boxes,velocity_displacement_factor_2LPT,dimension) private(i,j,k) num_threads(user_params->N_THREADS) - { -#pragma omp for - for (i=0; iNON_CUBIC_FACTOR*dimension); k++){ - if(user_params->PERTURB_ON_HIGH_RES) { - boxes->hires_vx_2LPT[R_INDEX(i,j,k)] *= velocity_displacement_factor_2LPT; // this is now comoving displacement in units of box size - boxes->hires_vy_2LPT[R_INDEX(i,j,k)] *= velocity_displacement_factor_2LPT; // this is now comoving displacement in units of box size - boxes->hires_vz_2LPT[R_INDEX(i,j,k)] *= (velocity_displacement_factor_2LPT/user_params->NON_CUBIC_FACTOR); // this is now comoving displacement in units of box size - } - else { - boxes->lowres_vx_2LPT[HII_R_INDEX(i,j,k)] *= velocity_displacement_factor_2LPT; // this is now comoving displacement in units of box size - boxes->lowres_vy_2LPT[HII_R_INDEX(i,j,k)] *= velocity_displacement_factor_2LPT; // this is now comoving displacement in units of box size - boxes->lowres_vz_2LPT[HII_R_INDEX(i,j,k)] *= (velocity_displacement_factor_2LPT/user_params->NON_CUBIC_FACTOR); // this is now comoving displacement in units of box size - } - } - } - } - } - } - - - // * ************************************************************************* * // - // * END 2LPT PART * // - // * ************************************************************************* * // - - // ************ END INITIALIZATION **************************** // - - - // ---------------------------------------------------------------------------------------------------------------------------- - - // Box shapes from outputs.py and convenience macros - size_t size_double, size_float; - unsigned long long num_pixels; - if(user_params->PERTURB_ON_HIGH_RES) { - num_pixels = TOT_NUM_PIXELS; - size_double = TOT_NUM_PIXELS * sizeof(double); - size_float = TOT_NUM_PIXELS * sizeof(float); - } - else { - num_pixels = HII_TOT_NUM_PIXELS; - size_double = HII_TOT_NUM_PIXELS * sizeof(double); - size_float = HII_TOT_NUM_PIXELS * sizeof(float); - } - - // Allocat host memory for output box - double* resampled_box = (double*)malloc(size_double); - - // Allocate device memory for output box and set to 0. - double* d_box; - cudaMalloc(&d_box, size_double); - cudaMemset(d_box, 0, size_double); // fills size bytes with byte=0 - - cudaError_t err = cudaGetLastError(); - if (err != cudaSuccess) { - LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); - Throw(CudaError); - } - - // Allocate device memory for density field - float* hires_density; - // cudaMalloc(&hires_density, (HII_TOT_NUM_PIXELS * sizeof(double))); // from 21cmFAST.h, outputs.py & indexing.h - // cudaMemcpy(hires_density, boxes->hires_density, (HII_TOT_NUM_PIXELS * sizeof(double)), cudaMemcpyHostToDevice); - cudaMalloc(&hires_density, (TOT_NUM_PIXELS * sizeof(float))); // from 21cmFAST.h, outputs.py & indexing.h - cudaMemcpy(hires_density, boxes->hires_density, (TOT_NUM_PIXELS * sizeof(float)), cudaMemcpyHostToDevice); - - err = cudaGetLastError(); - if (err != cudaSuccess) { - LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); - Throw(CudaError); - } - - // Allocate device memory and copy arrays to device as per user_params - // floats as per 21cmFAST.h - float* hires_vx; - float* hires_vy; - float* hires_vz; - float* lowres_vx; - float* lowres_vy; - float* lowres_vz; - float* hires_vx_2LPT; - float* hires_vy_2LPT; - float* hires_vz_2LPT; - float* lowres_vx_2LPT; - float* lowres_vy_2LPT; - float* lowres_vz_2LPT; - - if (user_params->PERTURB_ON_HIGH_RES) { - cudaMalloc(&hires_vx, size_float); - cudaMalloc(&hires_vy, size_float); - cudaMalloc(&hires_vz, size_float); - cudaMemcpy(hires_vx, boxes->hires_vx, size_float, cudaMemcpyHostToDevice); - cudaMemcpy(hires_vy, boxes->hires_vy, size_float, cudaMemcpyHostToDevice); - cudaMemcpy(hires_vz, boxes->hires_vz, size_float, cudaMemcpyHostToDevice); - } - else { - cudaMalloc(&lowres_vx, size_float); - cudaMalloc(&lowres_vy, size_float); - cudaMalloc(&lowres_vz, size_float); - cudaMemcpy(lowres_vx, boxes->lowres_vx, size_float, cudaMemcpyHostToDevice); - cudaMemcpy(lowres_vy, boxes->lowres_vy, size_float, cudaMemcpyHostToDevice); - cudaMemcpy(lowres_vz, boxes->lowres_vz, size_float, cudaMemcpyHostToDevice); - } - if (user_params->USE_2LPT) { - if (user_params->PERTURB_ON_HIGH_RES) { - cudaMalloc(&hires_vx_2LPT, size_float); - cudaMalloc(&hires_vy_2LPT, size_float); - cudaMalloc(&hires_vz_2LPT, size_float); - cudaMemcpy(hires_vx_2LPT, boxes->hires_vx_2LPT, size_float, cudaMemcpyHostToDevice); - cudaMemcpy(hires_vy_2LPT, boxes->hires_vy_2LPT, size_float, cudaMemcpyHostToDevice); - cudaMemcpy(hires_vz_2LPT, boxes->hires_vz_2LPT, size_float, cudaMemcpyHostToDevice); - } - else { - cudaMalloc(&lowres_vx_2LPT, size_float); - cudaMalloc(&lowres_vy_2LPT, size_float); - cudaMalloc(&lowres_vz_2LPT, size_float); - cudaMemcpy(lowres_vx_2LPT, boxes->lowres_vx_2LPT, size_float, cudaMemcpyHostToDevice); - cudaMemcpy(lowres_vy_2LPT, boxes->lowres_vy_2LPT, size_float, cudaMemcpyHostToDevice); - cudaMemcpy(lowres_vz_2LPT, boxes->lowres_vz_2LPT, size_float, cudaMemcpyHostToDevice); - } - } - - err = cudaGetLastError(); - if (err != cudaSuccess) { - LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); - Throw(CudaError); - } - - // Seemingly can't pass macro straight to kernel - long long d_para = D_PARA; - long long hii_d = HII_D; - long long hii_d_para = HII_D_PARA; - - // Invoke kernel - int threadsPerBlock = 256; - // int numBlocks = (num_pixels + threadsPerBlock - 1) / threadsPerBlock; - int numBlocks = (TOT_NUM_PIXELS + threadsPerBlock - 1) / threadsPerBlock; - perturb_density_field_kernel<<>>( - d_box, hires_density, hires_vx, hires_vy, hires_vz, lowres_vx, lowres_vy, lowres_vz, - hires_vx_2LPT, hires_vy_2LPT, hires_vz_2LPT, lowres_vx_2LPT, lowres_vy_2LPT, lowres_vz_2LPT, - dimension, user_params->DIM, d_para, hii_d, hii_d_para, user_params->NON_CUBIC_FACTOR, - f_pixel_factor, init_growth_factor, user_params->PERTURB_ON_HIGH_RES, user_params->USE_2LPT); - - // Only use during development! - err = cudaDeviceSynchronize(); - CATCH_CUDA_ERROR(err); - err = cudaGetLastError(); - if (err != cudaSuccess) { - LOG_ERROR("Kernel launch error: %s", cudaGetErrorString(err)); - Throw(CudaError); - } - - // Copy results from device to host - // cudaMemcpy(resampled_box, d_box, size_double, cudaMemcpyDeviceToHost); - err = cudaMemcpy(resampled_box, d_box, size_double, cudaMemcpyDeviceToHost); - if (err != cudaSuccess) { - LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); - Throw(CudaError); - } - - // Deallocate device memory - cudaFree(d_box); - cudaFree(hires_density); - + cudaMalloc(&lowres_vx, size_float); + cudaMalloc(&lowres_vy, size_float); + cudaMalloc(&lowres_vz, size_float); + cudaMemcpy(lowres_vx, boxes->lowres_vx, size_float, cudaMemcpyHostToDevice); + cudaMemcpy(lowres_vy, boxes->lowres_vy, size_float, cudaMemcpyHostToDevice); + cudaMemcpy(lowres_vz, boxes->lowres_vz, size_float, cudaMemcpyHostToDevice); + } + if (user_params->USE_2LPT) { if (user_params->PERTURB_ON_HIGH_RES) { - cudaFree(hires_vx); - cudaFree(hires_vy); - cudaFree(hires_vz); + cudaMalloc(&hires_vx_2LPT, size_float); + cudaMalloc(&hires_vy_2LPT, size_float); + cudaMalloc(&hires_vz_2LPT, size_float); + cudaMemcpy(hires_vx_2LPT, boxes->hires_vx_2LPT, size_float, cudaMemcpyHostToDevice); + cudaMemcpy(hires_vy_2LPT, boxes->hires_vy_2LPT, size_float, cudaMemcpyHostToDevice); + cudaMemcpy(hires_vz_2LPT, boxes->hires_vz_2LPT, size_float, cudaMemcpyHostToDevice); } else { - cudaFree(lowres_vx); - cudaFree(lowres_vy); - cudaFree(lowres_vz); + cudaMalloc(&lowres_vx_2LPT, size_float); + cudaMalloc(&lowres_vy_2LPT, size_float); + cudaMalloc(&lowres_vz_2LPT, size_float); + cudaMemcpy(lowres_vx_2LPT, boxes->lowres_vx_2LPT, size_float, cudaMemcpyHostToDevice); + cudaMemcpy(lowres_vy_2LPT, boxes->lowres_vy_2LPT, size_float, cudaMemcpyHostToDevice); + cudaMemcpy(lowres_vz_2LPT, boxes->lowres_vz_2LPT, size_float, cudaMemcpyHostToDevice); } - if (user_params->USE_2LPT) { - if (user_params->PERTURB_ON_HIGH_RES) { - cudaFree(hires_vx_2LPT); - cudaFree(hires_vy_2LPT); - cudaFree(hires_vz_2LPT); - } - else { - cudaFree(lowres_vx_2LPT); - cudaFree(lowres_vy_2LPT); - cudaFree(lowres_vz_2LPT); - } - } - - err = cudaGetLastError(); - if (err != cudaSuccess) { - LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); - Throw(CudaError); - } - - // LOG_DEBUG("resampled_box[:50] = "); - // for (int element = 0; element < 50; element++) { - // LOG_DEBUG("%.4e ", resampled_box[element]); - // } - // LOG_DEBUG("\n"); - - // ---------------------------------------------------------------------------------------------------------------------------- - - LOG_SUPER_DEBUG("resampled_box: "); - debugSummarizeBoxDouble(resampled_box, dimension, user_params->NON_CUBIC_FACTOR, " "); - - // Resample back to a float for remaining algorithm - #pragma omp parallel \ - shared(LOWRES_density_perturb,HIRES_density_perturb,resampled_box,dimension) \ - private(i,j,k) \ - num_threads(user_params->N_THREADS) - { - #pragma omp for - for (i=0; iNON_CUBIC_FACTOR*dimension); k++){ - if(user_params->PERTURB_ON_HIGH_RES) { - *( (float *)HIRES_density_perturb + R_FFT_INDEX(i,j,k) ) = (float)resampled_box[R_INDEX(i,j,k)]; - } - else { - *( (float *)LOWRES_density_perturb + HII_R_FFT_INDEX(i,j,k) ) = (float)resampled_box[HII_R_INDEX(i,j,k)]; - } - } - } - } - } - free(resampled_box); - LOG_DEBUG("Finished perturbing the density field"); - - LOG_SUPER_DEBUG("density_perturb: "); - if(user_params->PERTURB_ON_HIGH_RES){ - debugSummarizeBoxComplex(HIRES_density_perturb, dimension, user_params->NON_CUBIC_FACTOR, " "); - }else{ - debugSummarizeBoxComplex(LOWRES_density_perturb, dimension, user_params->NON_CUBIC_FACTOR, " "); - } - - // deallocate -#pragma omp parallel shared(boxes,velocity_displacement_factor,dimension) private(i,j,k) num_threads(user_params->N_THREADS) - { -#pragma omp for - for (i=0; iNON_CUBIC_FACTOR*dimension); k++){ - if(user_params->PERTURB_ON_HIGH_RES) { - boxes->hires_vx[R_INDEX(i,j,k)] /= velocity_displacement_factor; // convert back to z = 0 quantity - boxes->hires_vy[R_INDEX(i,j,k)] /= velocity_displacement_factor; // convert back to z = 0 quantity - boxes->hires_vz[R_INDEX(i,j,k)] /= (velocity_displacement_factor/user_params->NON_CUBIC_FACTOR); // convert back to z = 0 quantity - } - else { - boxes->lowres_vx[HII_R_INDEX(i,j,k)] /= velocity_displacement_factor; // convert back to z = 0 quantity - boxes->lowres_vy[HII_R_INDEX(i,j,k)] /= velocity_displacement_factor; // convert back to z = 0 quantity - boxes->lowres_vz[HII_R_INDEX(i,j,k)] /= (velocity_displacement_factor/user_params->NON_CUBIC_FACTOR); // convert back to z = 0 quantity - } - } - } - } - } - - if(user_params->USE_2LPT){ -#pragma omp parallel shared(boxes,velocity_displacement_factor_2LPT,dimension) private(i,j,k) num_threads(user_params->N_THREADS) - { -#pragma omp for - for (i=0; iNON_CUBIC_FACTOR*dimension); k++){ - if(user_params->PERTURB_ON_HIGH_RES) { - boxes->hires_vx_2LPT[R_INDEX(i,j,k)] /= velocity_displacement_factor_2LPT; // convert back to z = 0 quantity - boxes->hires_vy_2LPT[R_INDEX(i,j,k)] /= velocity_displacement_factor_2LPT; // convert back to z = 0 quantity - boxes->hires_vz_2LPT[R_INDEX(i,j,k)] /= (velocity_displacement_factor_2LPT/user_params->NON_CUBIC_FACTOR); // convert back to z = 0 quantity - } - else { - boxes->lowres_vx_2LPT[HII_R_INDEX(i,j,k)] /= velocity_displacement_factor_2LPT; // convert back to z = 0 quantity - boxes->lowres_vy_2LPT[HII_R_INDEX(i,j,k)] /= velocity_displacement_factor_2LPT; // convert back to z = 0 quantity - boxes->lowres_vz_2LPT[HII_R_INDEX(i,j,k)] /= (velocity_displacement_factor_2LPT/user_params->NON_CUBIC_FACTOR); // convert back to z = 0 quantity - } - } - } - } - } - } - LOG_DEBUG("Cleanup velocities for perturb"); } - // Now, if I still have the high resolution density grid (HIRES_density_perturb) I need to downsample it to the low-resolution grid - if(user_params->PERTURB_ON_HIGH_RES) { - - LOG_DEBUG("Downsample the high-res perturbed density"); - - // Transform to Fourier space to sample (filter) the box - dft_r2c_cube(user_params->USE_FFTW_WISDOM, user_params->DIM, D_PARA, user_params->N_THREADS, HIRES_density_perturb); - - // Need to save a copy of the high-resolution unfiltered density field for the velocities - memcpy(HIRES_density_perturb_saved, HIRES_density_perturb, sizeof(fftwf_complex)*KSPACE_NUM_PIXELS); - - // Now filter the box - // ALICE: RES=0 (dimension=DIM, midpoint=MIDDLE), filter_type=0 (real space top-hat filtering) - if (user_params->DIM != user_params->HII_DIM) { - filter_box(HIRES_density_perturb, 0, 0, L_FACTOR*user_params->BOX_LEN/(user_params->HII_DIM+0.0), 0.); - } - - // FFT back to real space - dft_c2r_cube(user_params->USE_FFTW_WISDOM, user_params->DIM, D_PARA, user_params->N_THREADS, HIRES_density_perturb); - - // Renormalise the FFT'd box -#pragma omp parallel shared(HIRES_density_perturb,LOWRES_density_perturb,f_pixel_factor,mass_factor) private(i,j,k) num_threads(user_params->N_THREADS) - { -#pragma omp for - for (i=0; iHII_DIM; i++){ - for (j=0; jHII_DIM; j++){ - for (k=0; kN_THREADS) - { -#pragma omp for - for (i=0; iHII_DIM; i++){ - for (j=0; jHII_DIM; j++){ - for (k=0; k>>( + d_box, hires_density, hires_vx, hires_vy, hires_vz, lowres_vx, lowres_vy, lowres_vz, + hires_vx_2LPT, hires_vy_2LPT, hires_vz_2LPT, lowres_vx_2LPT, lowres_vy_2LPT, lowres_vz_2LPT, + dimension, user_params->DIM, d_para, hii_d, hii_d_para, user_params->NON_CUBIC_FACTOR, + f_pixel_factor, init_growth_factor, user_params->PERTURB_ON_HIGH_RES, user_params->USE_2LPT); + + // Only use during development! + err = cudaDeviceSynchronize(); + CATCH_CUDA_ERROR(err); + err = cudaGetLastError(); + if (err != cudaSuccess) { + LOG_ERROR("Kernel launch error: %s", cudaGetErrorString(err)); + Throw(CudaError); } - LOG_SUPER_DEBUG("LOWRES_density_perturb: "); - debugSummarizeBoxComplex(LOWRES_density_perturb, user_params->HII_DIM, user_params->NON_CUBIC_FACTOR, " "); - - // transform to k-space - dft_r2c_cube(user_params->USE_FFTW_WISDOM, user_params->HII_DIM, HII_D_PARA, user_params->N_THREADS, LOWRES_density_perturb); - - // smooth the field - // ALICE: RES=1 (dimension=HII_DIM, midpoint=HII_MIDDLE), filter_type=2 (Gaussian filtering) - if (!global_params.EVOLVE_DENSITY_LINEARLY && global_params.SMOOTH_EVOLVED_DENSITY_FIELD){ - filter_box(LOWRES_density_perturb, 1, 2, global_params.R_smooth_density*user_params->BOX_LEN/(float)user_params->HII_DIM, 0.); + // Copy results from device to host + // cudaMemcpy(resampled_box, d_box, size_double, cudaMemcpyDeviceToHost); + err = cudaMemcpy(resampled_box, d_box, size_double, cudaMemcpyDeviceToHost); + if (err != cudaSuccess) { + LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); + Throw(CudaError); } - LOG_SUPER_DEBUG("LOWRES_density_perturb after smoothing: "); - debugSummarizeBoxComplex(LOWRES_density_perturb, user_params->HII_DIM, user_params->NON_CUBIC_FACTOR, " "); - - // save a copy of the k-space density field - memcpy(LOWRES_density_perturb_saved, LOWRES_density_perturb, sizeof(fftwf_complex)*HII_KSPACE_NUM_PIXELS); - - dft_c2r_cube(user_params->USE_FFTW_WISDOM, user_params->HII_DIM, HII_D_PARA, user_params->N_THREADS, LOWRES_density_perturb); - - LOG_SUPER_DEBUG("LOWRES_density_perturb back in real space: "); - debugSummarizeBoxComplex(LOWRES_density_perturb, user_params->HII_DIM, user_params->NON_CUBIC_FACTOR, " "); - - // normalize after FFT - // ALICE: divide by total pixels; if result < -1 changed it to just above -1. - int bad_count=0; -#pragma omp parallel shared(LOWRES_density_perturb) private(i,j,k) num_threads(user_params->N_THREADS) reduction(+: bad_count) - { -#pragma omp for - for(i=0; iHII_DIM; i++){ - for(j=0; jHII_DIM; j++){ - for(k=0; kPERTURB_ON_HIGH_RES) { + cudaFree(hires_vx); + cudaFree(hires_vy); + cudaFree(hires_vz); } - if(bad_count>=5) LOG_WARNING("Total number of bad indices for LOW_density_perturb: %d", bad_count); - LOG_SUPER_DEBUG("LOWRES_density_perturb back in real space (normalized): "); - debugSummarizeBoxComplex(LOWRES_density_perturb, user_params->HII_DIM, user_params->NON_CUBIC_FACTOR, " "); - -// ALICE: copy LOWRES_density_perturb cell values to density cells -#pragma omp parallel shared(perturbed_field,LOWRES_density_perturb) private(i,j,k) num_threads(user_params->N_THREADS) - { -#pragma omp for - for (i=0; iHII_DIM; i++){ - for (j=0; jHII_DIM; j++){ - for (k=0; kdensity + HII_R_INDEX(i,j,k)) = *((float *)LOWRES_density_perturb + HII_R_FFT_INDEX(i,j,k)); - } - } - } + else { + cudaFree(lowres_vx); + cudaFree(lowres_vy); + cudaFree(lowres_vz); } - - // **** Convert to velocities ***** // - LOG_DEBUG("Generate velocity fields"); - - float dDdt_over_D; - - dDdt_over_D = dDdt/growth_factor; - - - if (user_params->KEEP_3D_VELOCITIES){ - compute_perturbed_velocities( - 0, - user_params, - HIRES_density_perturb, - HIRES_density_perturb_saved, - LOWRES_density_perturb, - LOWRES_density_perturb_saved, - dDdt_over_D, - dimension, - switch_mid, - f_pixel_factor, - perturbed_field->velocity_x - ); - compute_perturbed_velocities( - 1, - user_params, - HIRES_density_perturb, - HIRES_density_perturb_saved, - LOWRES_density_perturb, - LOWRES_density_perturb_saved, - dDdt_over_D, - dimension, - switch_mid, - f_pixel_factor, - perturbed_field->velocity_y - ); + if (user_params->USE_2LPT) { + if (user_params->PERTURB_ON_HIGH_RES) { + cudaFree(hires_vx_2LPT); + cudaFree(hires_vy_2LPT); + cudaFree(hires_vz_2LPT); + } + else { + cudaFree(lowres_vx_2LPT); + cudaFree(lowres_vy_2LPT); + cudaFree(lowres_vz_2LPT); + } } - compute_perturbed_velocities( - 2, - user_params, - HIRES_density_perturb, - HIRES_density_perturb_saved, - LOWRES_density_perturb, - LOWRES_density_perturb_saved, - dDdt_over_D, - dimension, - switch_mid, - f_pixel_factor, - perturbed_field->velocity_z - ); - - fftwf_cleanup_threads(); - fftwf_cleanup(); - fftwf_forget_wisdom(); - - // deallocate - fftwf_free(LOWRES_density_perturb); - fftwf_free(LOWRES_density_perturb_saved); - if(user_params->PERTURB_ON_HIGH_RES) { - fftwf_free(HIRES_density_perturb); - fftwf_free(HIRES_density_perturb_saved); + err = cudaGetLastError(); + if (err != cudaSuccess) { + LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); + Throw(CudaError); } - fftwf_cleanup(); - } // End of Try{} - Catch(status){ - return(status); - } + // LOG_DEBUG("resampled_box[:50] = "); + // for (int element = 0; element < 50; element++) { + // LOG_DEBUG("%.4e ", resampled_box[element]); + // } + // LOG_DEBUG("\n"); - return(0); + return resampled_box; } diff --git a/src/py21cmfast/src/PerturbField.h b/src/py21cmfast/src/PerturbField.h index bde8020b2..79f713aa0 100644 --- a/src/py21cmfast/src/PerturbField.h +++ b/src/py21cmfast/src/PerturbField.h @@ -13,13 +13,9 @@ int ComputePerturbField( float redshift, UserParams *user_params, CosmoParams *cosmo_params, InitialConditions *boxes, PerturbedField *perturbed_field ); -int ComputePerturbField_cpu( - float redshift, UserParams *user_params, CosmoParams *cosmo_params, - InitialConditions *boxes, PerturbedField *perturbed_field -); -int ComputePerturbField_gpu( - float redshift, UserParams *user_params, CosmoParams *cosmo_params, - InitialConditions *boxes, PerturbedField *perturbed_field +double* MapMass_gpu( + UserParams *user_params, CosmoParams *cosmo_params, InitialConditions *boxes, double *resampled_box, + int dimension, float f_pixel_factor, float init_growth_factor ); #ifdef __cplusplus From 9162148f6de999dd16ac0a9d44ff5c2cd3d06a28 Mon Sep 17 00:00:00 2001 From: alserene Date: Mon, 28 Oct 2024 09:59:30 +1100 Subject: [PATCH 016/145] Trim trailing whitespace. --- src/py21cmfast/src/PerturbField.cu | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/py21cmfast/src/PerturbField.cu b/src/py21cmfast/src/PerturbField.cu index de1d024bb..66ed1a631 100644 --- a/src/py21cmfast/src/PerturbField.cu +++ b/src/py21cmfast/src/PerturbField.cu @@ -81,7 +81,7 @@ __global__ void perturb_density_field_kernel( int k = idx % d_para; unsigned long long r_index = compute_R_INDEX(i, j, k, DIM, d_para); - + // Map index to location in units of box size double xf = (i + 0.5) / (DIM + 0.0); double yf = (j + 0.5) / (DIM + 0.0); @@ -235,7 +235,7 @@ double* MapMass_gpu( ) { // Makes the parameter structs visible to a variety of functions/macros // Do each time to avoid Python garbage collection issues - Broadcast_struct_global_noastro(user_params,cosmo_params); + Broadcast_struct_global_noastro(user_params, cosmo_params); // Box shapes from outputs.py and convenience macros size_t size_double, size_float; From c5f64507b638c3622b1bb83e9e4fdb20182ddb8d Mon Sep 17 00:00:00 2001 From: alserene Date: Mon, 28 Oct 2024 13:28:13 +1100 Subject: [PATCH 017/145] Add CUDAError to list of exitcodes. --- src/py21cmfast/_utils.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/py21cmfast/_utils.py b/src/py21cmfast/_utils.py index f3002abb4..fa7687c34 100644 --- a/src/py21cmfast/_utils.py +++ b/src/py21cmfast/_utils.py @@ -175,11 +175,17 @@ class MassDepZetaError(ParameterError): class MemoryAllocError(FatalCError): - """An exception when unable to allocated memory.""" + """An exception when unable to allocate memory.""" default_message = """An error has occured while attempting to allocate memory! (check the LOG for more info)""" +class CUDAError(FatalCError): + """An exception when an error occurs with CUDA.""" + + default_message = """A CUDA error has occured! (check the LOG for more info)""" + + SUCCESS = 0 IOERROR = 1 GSLERROR = 2 @@ -190,6 +196,7 @@ class MemoryAllocError(FatalCError): INFINITYORNANERROR = 7 MASSDEPZETAERROR = 8 MEMORYALLOCERROR = 9 +CUDAERROR = 10 def _process_exitcode(exitcode, fnc, args): @@ -209,6 +216,7 @@ def _process_exitcode(exitcode, fnc, args): INFINITYORNANERROR: InfinityorNaNError, MASSDEPZETAERROR: MassDepZetaError, MEMORYALLOCERROR: MemoryAllocError, + CUDAERROR: CUDAError, }[exitcode] except KeyError: # pragma: no cover raise FatalCError( From b9d1fd39fa1e7d7455b0d31358b061588d07a928 Mon Sep 17 00:00:00 2001 From: alserene Date: Mon, 28 Oct 2024 13:29:07 +1100 Subject: [PATCH 018/145] Add CUDAError to list of error codes. --- src/py21cmfast/src/exceptions.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/py21cmfast/src/exceptions.h b/src/py21cmfast/src/exceptions.h index 00ddd2bf7..8b1fea600 100644 --- a/src/py21cmfast/src/exceptions.h +++ b/src/py21cmfast/src/exceptions.h @@ -22,10 +22,10 @@ extern struct exception_context the_exception_context[1]; #define InfinityorNaNError 7 #define MassDepZetaError 8 #define MemoryAllocError 9 -#define CudaError 10 +#define CUDAError 10 #define CATCH_GSL_ERROR(status) if(status>0) {LOG_ERROR("GSL Error Encountered (Code = %d): %s", status, gsl_strerror(status)); Throw(GSLError);} -#define CATCH_CUDA_ERROR(err) if(err != cudaSuccess) {LOG_ERROR("CUDA Error Encountered: %s", cudaGetErrorString(err)); Throw(CudaError);} +#define CATCH_CUDA_ERROR(err) if(err != cudaSuccess) {LOG_ERROR("CUDA Error Encountered: %s", cudaGetErrorString(err)); Throw(CUDAError);} #ifdef __cplusplus From 6fdb4e839e30b7b2e190a937c2e31eb8cfc90380 Mon Sep 17 00:00:00 2001 From: alserene Date: Mon, 28 Oct 2024 13:29:34 +1100 Subject: [PATCH 019/145] Clean up kernel. --- src/py21cmfast/src/PerturbField.cu | 56 ++++++++++-------------------- src/py21cmfast/src/filtering.cu | 23 +++++------- 2 files changed, 27 insertions(+), 52 deletions(-) diff --git a/src/py21cmfast/src/PerturbField.cu b/src/py21cmfast/src/PerturbField.cu index 66ed1a631..b8f20d9ce 100644 --- a/src/py21cmfast/src/PerturbField.cu +++ b/src/py21cmfast/src/PerturbField.cu @@ -1,4 +1,5 @@ // Re-write of perturb_field.c for being accessible within the MCMC + #include #include #include @@ -9,7 +10,6 @@ // GPU #include #include -// #include #include "cexcept.h" #include "exceptions.h" @@ -102,7 +102,7 @@ __global__ void perturb_density_field_kernel( unsigned long long HII_i = (unsigned long long)(i / f_pixel_factor); unsigned long long HII_j = (unsigned long long)(j / f_pixel_factor); unsigned long long HII_k = (unsigned long long)(k / f_pixel_factor); - HII_index = compute_HII_R_INDEX(HII_i, HII_j, HII_k, hii_d, hii_d_para); // This is accessing HII_D and HII_D_PARA macros! + HII_index = compute_HII_R_INDEX(HII_i, HII_j, HII_k, hii_d, hii_d_para); // xf += __ldg(&lowres_vx[HII_index]); // yf += __ldg(&lowres_vy[HII_index]); // zf += __ldg(&lowres_vz[HII_index]); @@ -174,19 +174,16 @@ __global__ void perturb_density_field_kernel( // lowest grid point index d_x = 1. - d_x; xi -= 1; - // xi += (xi + dimension) % dimension; // Only this critera is possible as iterate back by one (we cannot exceed DIM) - xi = (xi + dimension) % dimension; + xi = (xi + dimension) % dimension; // Only this critera is possible as iterate back by one (we cannot exceed DIM) } if(yf < (double)(yi + 0.5)) { d_y = 1. - d_y; yi -= 1; - // yi += (yi + dimension) % dimension; yi = (yi + dimension) % dimension; } if(zf < (double)(zi + 0.5)) { d_z = 1. - d_z; zi -= 1; - // zi += (zi + (unsigned long long)(non_cubic_factor * dimension)) % (unsigned long long)(non_cubic_factor * dimension); zi = (zi + (unsigned long long)(non_cubic_factor * dimension)) % (unsigned long long)(non_cubic_factor * dimension); } // The fractions of mass which will remain with perturbed cell @@ -239,49 +236,39 @@ double* MapMass_gpu( // Box shapes from outputs.py and convenience macros size_t size_double, size_float; - // unsigned long long num_pixels; if(user_params->PERTURB_ON_HIGH_RES) { - // num_pixels = TOT_NUM_PIXELS; size_double = TOT_NUM_PIXELS * sizeof(double); size_float = TOT_NUM_PIXELS * sizeof(float); } else { - // num_pixels = HII_TOT_NUM_PIXELS; size_double = HII_TOT_NUM_PIXELS * sizeof(double); size_float = HII_TOT_NUM_PIXELS * sizeof(float); } - // Allocat host memory for output box - // double* resampled_box = (double*)malloc(size_double); - // double* resampled_box = (double *)calloc(num_pixels, sizeof(double)); // initialise to 0 - // Allocate device memory for output box and set to 0. - double* d_box; - cudaMalloc((void**)&d_box, size_double); - cudaMemset(d_box, 0, size_double); // fills size bytes with byte=0 + double* d_resampled_box; + cudaMalloc((void**)&d_resampled_box, size_double); + cudaMemset(d_resampled_box, 0, size_double); // fills size_double bytes with byte=0 cudaError_t err = cudaGetLastError(); if (err != cudaSuccess) { LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); - Throw(CudaError); + Throw(CUDAError); } // Allocate device memory for density field float* hires_density; - // cudaMalloc(&hires_density, (HII_TOT_NUM_PIXELS * sizeof(double))); // from 21cmFAST.h, outputs.py & indexing.h - // cudaMemcpy(hires_density, boxes->hires_density, (HII_TOT_NUM_PIXELS * sizeof(double)), cudaMemcpyHostToDevice); cudaMalloc(&hires_density, (TOT_NUM_PIXELS * sizeof(float))); // from 21cmFAST.h, outputs.py & indexing.h cudaMemcpy(hires_density, boxes->hires_density, (TOT_NUM_PIXELS * sizeof(float)), cudaMemcpyHostToDevice); err = cudaGetLastError(); if (err != cudaSuccess) { LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); - Throw(CudaError); + Throw(CUDAError); } // Allocate device memory and copy arrays to device as per user_params - // floats as per 21cmFAST.h - float* hires_vx; + float* hires_vx; // floats as per 21cmFAST.h float* hires_vy; float* hires_vz; float* lowres_vx; @@ -332,20 +319,19 @@ double* MapMass_gpu( err = cudaGetLastError(); if (err != cudaSuccess) { LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); - Throw(CudaError); + Throw(CUDAError); } - // Seemingly can't pass macro straight to kernel + // Can't pass macro straight to kernel long long d_para = D_PARA; long long hii_d = HII_D; long long hii_d_para = HII_D_PARA; // Invoke kernel int threadsPerBlock = 256; - // int numBlocks = (num_pixels + threadsPerBlock - 1) / threadsPerBlock; int numBlocks = (TOT_NUM_PIXELS + threadsPerBlock - 1) / threadsPerBlock; perturb_density_field_kernel<<>>( - d_box, hires_density, hires_vx, hires_vy, hires_vz, lowres_vx, lowres_vy, lowres_vz, + d_resampled_box, hires_density, hires_vx, hires_vy, hires_vz, lowres_vx, lowres_vy, lowres_vz, hires_vx_2LPT, hires_vy_2LPT, hires_vz_2LPT, lowres_vx_2LPT, lowres_vy_2LPT, lowres_vz_2LPT, dimension, user_params->DIM, d_para, hii_d, hii_d_para, user_params->NON_CUBIC_FACTOR, f_pixel_factor, init_growth_factor, user_params->PERTURB_ON_HIGH_RES, user_params->USE_2LPT); @@ -353,22 +339,22 @@ double* MapMass_gpu( // Only use during development! err = cudaDeviceSynchronize(); CATCH_CUDA_ERROR(err); + err = cudaGetLastError(); if (err != cudaSuccess) { LOG_ERROR("Kernel launch error: %s", cudaGetErrorString(err)); - Throw(CudaError); + Throw(CUDAError); } // Copy results from device to host - // cudaMemcpy(resampled_box, d_box, size_double, cudaMemcpyDeviceToHost); - err = cudaMemcpy(resampled_box, d_box, size_double, cudaMemcpyDeviceToHost); + err = cudaMemcpy(resampled_box, d_resampled_box, size_double, cudaMemcpyDeviceToHost); if (err != cudaSuccess) { LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); - Throw(CudaError); + Throw(CUDAError); } // Deallocate device memory - cudaFree(d_box); + cudaFree(d_resampled_box); cudaFree(hires_density); if (user_params->PERTURB_ON_HIGH_RES) { @@ -397,14 +383,8 @@ double* MapMass_gpu( err = cudaGetLastError(); if (err != cudaSuccess) { LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); - Throw(CudaError); + Throw(CUDAError); } - // LOG_DEBUG("resampled_box[:50] = "); - // for (int element = 0; element < 50; element++) { - // LOG_DEBUG("%.4e ", resampled_box[element]); - // } - // LOG_DEBUG("\n"); - return resampled_box; } diff --git a/src/py21cmfast/src/filtering.cu b/src/py21cmfast/src/filtering.cu index 90a99d0ac..492957449 100644 --- a/src/py21cmfast/src/filtering.cu +++ b/src/py21cmfast/src/filtering.cu @@ -73,7 +73,6 @@ __device__ inline double spherical_shell_filter(double k, double R_outer, double - sin(kR_inner) + cos(kR_inner) * kR_inner); } -// __global__ void filter_box_kernel(fftwf_complex *box, int dimension, int midpoint, int midpoint_para, double delta_k, float R, float R_param, double R_const, int filter_type) { __global__ void filter_box_kernel(cuFloatComplex *box, size_t size, int dimension, int midpoint, int midpoint_para, double delta_k, float R, float R_param, double R_const, int filter_type) { // Get index of box (flattened k-box) @@ -125,17 +124,16 @@ __global__ void filter_box_kernel(cuFloatComplex *box, size_t size, int dimensio // box[idx] *= spherical_shell_filter(sqrt(k_mag_sq), R, R_param); box[idx] = cuCmulf(box[idx], make_cuFloatComplex((float)spherical_shell_filter(sqrt(k_mag_sq), R, R_param), 0.f)); } - // This doesn't work from device - // else { - // if (idx == 0) { - // LOG_WARNING("Filter type %i is undefined. Box is unfiltered.", filter_type); - // } - // } - } void filter_box_gpu(fftwf_complex *box, int RES, int filter_type, float R, float R_param) { + // Check for valid filter type + if (filter_type < 0 || filter_type > 4) { + LOG_WARNING("Filter type %i is undefined. Box is unfiltered.", filter_type); + return; + } + // Get required values int dimension, midpoint, midpoint_para, num_pixels; switch(RES) { @@ -175,20 +173,17 @@ void filter_box_gpu(fftwf_complex *box, int RES, int filter_type, float R, float // Invoke kernel int threadsPerBlock = 256; int numBlocks = (num_pixels + threadsPerBlock - 1) / threadsPerBlock; + // d_box must be cast to cuFloatComplex (from fftwf_complex) for CUDA filter_box_kernel<<>>(reinterpret_cast(d_box), size, dimension, midpoint, midpoint_para, delta_k, R, R_param, R_const, filter_type); - // filter_box_kernel<<>>((cuFloatComplex *)d_box, dimension, midpoint, midpoint_para, delta_k, R, R_param, R_const, filter_type); // Only use during development! cudaError_t err = cudaDeviceSynchronize(); CATCH_CUDA_ERROR(err); - // if (err != cudaSuccess) { - // LOG_ERROR("cudaDeviceSynchronize error: %s", cudaGetErrorString(err)); - // Throw(RuntimeError); - // } + err = cudaGetLastError(); if (err != cudaSuccess) { LOG_ERROR("Kernel launch error: %s", cudaGetErrorString(err)); - Throw(CudaError); + Throw(CUDAError); } // Copy results from device to host From 0b36ebc78af3bfece6ae8822400165d764647e77 Mon Sep 17 00:00:00 2001 From: alserene Date: Mon, 28 Oct 2024 14:10:21 +1100 Subject: [PATCH 020/145] Remove cudaDeviceSynchronize. --- src/py21cmfast/src/PerturbField.cu | 6 +++--- src/py21cmfast/src/filtering.cu | 8 +++++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/py21cmfast/src/PerturbField.cu b/src/py21cmfast/src/PerturbField.cu index b8f20d9ce..bcf8ad2b2 100644 --- a/src/py21cmfast/src/PerturbField.cu +++ b/src/py21cmfast/src/PerturbField.cu @@ -336,9 +336,9 @@ double* MapMass_gpu( dimension, user_params->DIM, d_para, hii_d, hii_d_para, user_params->NON_CUBIC_FACTOR, f_pixel_factor, init_growth_factor, user_params->PERTURB_ON_HIGH_RES, user_params->USE_2LPT); - // Only use during development! - err = cudaDeviceSynchronize(); - CATCH_CUDA_ERROR(err); + // // Only use during development! + // err = cudaDeviceSynchronize(); + // CATCH_CUDA_ERROR(err); err = cudaGetLastError(); if (err != cudaSuccess) { diff --git a/src/py21cmfast/src/filtering.cu b/src/py21cmfast/src/filtering.cu index 492957449..6383b5edf 100644 --- a/src/py21cmfast/src/filtering.cu +++ b/src/py21cmfast/src/filtering.cu @@ -176,9 +176,11 @@ void filter_box_gpu(fftwf_complex *box, int RES, int filter_type, float R, float // d_box must be cast to cuFloatComplex (from fftwf_complex) for CUDA filter_box_kernel<<>>(reinterpret_cast(d_box), size, dimension, midpoint, midpoint_para, delta_k, R, R_param, R_const, filter_type); - // Only use during development! - cudaError_t err = cudaDeviceSynchronize(); - CATCH_CUDA_ERROR(err); + cudaError_t err; + + // // Only use during development! + // err = cudaDeviceSynchronize(); + // CATCH_CUDA_ERROR(err); err = cudaGetLastError(); if (err != cudaSuccess) { From 477d076990be4262279d766f7debacae6a4c0446 Mon Sep 17 00:00:00 2001 From: alserene Date: Tue, 29 Oct 2024 11:55:53 +1100 Subject: [PATCH 021/145] Capital letter in comment --- build_cffi.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/build_cffi.py b/build_cffi.py index 61c953fe4..2a8be0578 100755 --- a/build_cffi.py +++ b/build_cffi.py @@ -27,13 +27,12 @@ if f.endswith(".c") ] -# compiled cuda code -# extra_objects = [os.path.join(CLOC, "hello_world.o")] +# Compiled cuda code extra_objects = [ os.path.join(CLOC, "hello_world.o"), os.path.join(CLOC, "filtering.o"), os.path.join(CLOC, "PerturbField.o"), - ] +] extra_link_args = ["-lcudart"] # Set the C-code logging level. From 1d9ff1c44dcec282d68f22da756ac1029495f6a9 Mon Sep 17 00:00:00 2001 From: --local <> Date: Thu, 31 Oct 2024 23:20:43 +1100 Subject: [PATCH 022/145] reset profiling libraries --- build_cffi.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/build_cffi.py b/build_cffi.py index 75ee9b8e2..e2bfac87e 100755 --- a/build_cffi.py +++ b/build_cffi.py @@ -74,7 +74,8 @@ # stuff for gperftools if "PROFILE" in os.environ: - libraries += ["profiler", "tcmalloc"] + #libraries += ["profiler", "tcmalloc"] + libraries += ["profiler"] # we need this even if DEBUG is off extra_compile_args += ["-g"] From b32ebce483a7c326582c23271e0c43af569f53dd Mon Sep 17 00:00:00 2001 From: James Davies Date: Fri, 18 Oct 2024 17:31:29 +0200 Subject: [PATCH 023/145] fix purging of lowres density when needed --- src/py21cmfast/outputs.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/py21cmfast/outputs.py b/src/py21cmfast/outputs.py index 5be3be82f..5fca83678 100644 --- a/src/py21cmfast/outputs.py +++ b/src/py21cmfast/outputs.py @@ -141,6 +141,8 @@ def prepare_for_halos(self, flag_options: FlagOptions, force: bool = False): def prepare_for_spin_temp(self, flag_options: FlagOptions, force: bool = False): """Ensure ICs have all boxes required for spin_temp, and no more.""" keep = [] + if flag_options.HALO_STOCHASTICITY and self.user_params.AVG_BELOW_SAMPLER: + keep.append("lowres_density") # for the sub-resolution halos if self.user_params.USE_RELATIVE_VELOCITIES: keep.append("lowres_vcb") self.prepare(keep=keep, force=force) @@ -481,7 +483,7 @@ def get_required_input_arrays(self, input_box: _BaseOutputStruct) -> list[str]: if not self.flag_options.FIXED_HALO_GRIDS: required += ["halo_coords", "halo_masses", "star_rng", "sfr_rng"] elif isinstance(input_box, PerturbedField): - if self.flag_options.FIXED_HALO_GRIDS or self.user_params.AVG_BELOW_SAMPLER: + if self.flag_options.FIXED_HALO_GRIDS: required += ["density"] elif isinstance(input_box, TsBox): required += ["J_21_LW_box"] @@ -490,6 +492,11 @@ def get_required_input_arrays(self, input_box: _BaseOutputStruct) -> list[str]: elif isinstance(input_box, InitialConditions): if self.user_params.USE_RELATIVE_VELOCITIES: required += ["lowres_vcb"] + if ( + self.flag_options.HALO_STOCHASTICITY + and self.user_params.AVG_BELOW_SAMPLER + ): + required += ["lowres_density"] else: raise ValueError(f"{type(input_box)} is not an input required for HaloBox!") From a374af3bd3b8fa0c7961d0adf17b88e458d31ba7 Mon Sep 17 00:00:00 2001 From: alserene Date: Fri, 8 Nov 2024 17:12:16 +1100 Subject: [PATCH 024/145] First draft of SpinTemp kernel. --- src/py21cmfast/src/SpinTemperatureBox.c | 75 ++++--- src/py21cmfast/src/SpinTemperatureBox.cu | 249 +++++++++++++++++++++++ src/py21cmfast/src/SpinTemperatureBox.h | 8 + 3 files changed, 299 insertions(+), 33 deletions(-) create mode 100644 src/py21cmfast/src/SpinTemperatureBox.cu diff --git a/src/py21cmfast/src/SpinTemperatureBox.c b/src/py21cmfast/src/SpinTemperatureBox.c index 0affacb37..7ffd6fef9 100644 --- a/src/py21cmfast/src/SpinTemperatureBox.c +++ b/src/py21cmfast/src/SpinTemperatureBox.c @@ -935,42 +935,51 @@ void calculate_sfrd_from_grid(int R_ct, float *dens_R_grid, float *Mcrit_R_grid, } } - #pragma omp parallel num_threads(user_params_global->N_THREADS) - { - unsigned long long int box_ct; - double curr_dens; - double curr_mcrit = 0.; - double fcoll, dfcoll; - double fcoll_MINI=0; - - #pragma omp for reduction(+:ave_sfrd_buf,ave_sfrd_buf_mini) - for (box_ct=0; box_ctUSE_MINI_HALOS) - curr_mcrit = Mcrit_R_grid[box_ct]; - - if(flag_options_global->USE_MASS_DEPENDENT_ZETA){ - fcoll = EvaluateSFRD_Conditional(curr_dens,zpp_growth[R_ct],M_min_R[R_ct],M_max_R[R_ct],M_max_R[R_ct],sigma_max[R_ct], - Mcrit_atom_interp_table[R_ct],Mlim_Fstar_g); - sfrd_grid[box_ct] = (1.+curr_dens)*fcoll; - - if (flag_options_global->USE_MINI_HALOS){ - fcoll_MINI = EvaluateSFRD_Conditional_MINI(curr_dens,curr_mcrit,zpp_growth[R_ct],M_min_R[R_ct],M_max_R[R_ct],M_max_R[R_ct], - sigma_max[R_ct],Mcrit_atom_interp_table[R_ct],Mlim_Fstar_MINI_g); - sfrd_grid_mini[box_ct] = (1.+curr_dens)*fcoll_MINI; - } - } - else{ - fcoll = EvaluateFcoll_delta(curr_dens,zpp_growth[R_ct],sigma_min[R_ct],sigma_max[R_ct]); - dfcoll = EvaluatedFcolldz(curr_dens,zpp_for_evolve_list[R_ct],sigma_min[R_ct],sigma_max[R_ct]); - sfrd_grid[box_ct] = (1.+curr_dens)*dfcoll; + // -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + // If GPU is to be used and flags are ideal, call GPU version of reduction + if (true && flag_options_global->USE_MASS_DEPENDENT_ZETA && user_params_global->USE_INTERPOLATION_TABLES && !flag_options_global->USE_MINI_HALOS) { + calculate_sfrd_from_grid_gpu(&SFRD_conditional_table, dens_R_grid, zpp_growth, R_ct, sfrd_grid, ave_sfrd_buf, HII_TOT_NUM_PIXELS); + } else { + // Else, run CPU reduction + #pragma omp parallel num_threads(user_params_global->N_THREADS) + { + unsigned long long int box_ct; + double curr_dens; + double curr_mcrit = 0.; + double fcoll, dfcoll; + double fcoll_MINI=0; + + #pragma omp for reduction(+:ave_sfrd_buf,ave_sfrd_buf_mini) + for (box_ct=0; box_ctUSE_MINI_HALOS) + curr_mcrit = Mcrit_R_grid[box_ct]; + + if(flag_options_global->USE_MASS_DEPENDENT_ZETA){ + fcoll = EvaluateSFRD_Conditional(curr_dens,zpp_growth[R_ct],M_min_R[R_ct],M_max_R[R_ct],M_max_R[R_ct],sigma_max[R_ct], + Mcrit_atom_interp_table[R_ct],Mlim_Fstar_g); + sfrd_grid[box_ct] = (1.+curr_dens)*fcoll; + + if (flag_options_global->USE_MINI_HALOS){ + fcoll_MINI = EvaluateSFRD_Conditional_MINI(curr_dens,curr_mcrit,zpp_growth[R_ct],M_min_R[R_ct],M_max_R[R_ct],M_max_R[R_ct], + sigma_max[R_ct],Mcrit_atom_interp_table[R_ct],Mlim_Fstar_MINI_g); + sfrd_grid_mini[box_ct] = (1.+curr_dens)*fcoll_MINI; + } + } + else{ + fcoll = EvaluateFcoll_delta(curr_dens,zpp_growth[R_ct],sigma_min[R_ct],sigma_max[R_ct]); + dfcoll = EvaluatedFcolldz(curr_dens,zpp_for_evolve_list[R_ct],sigma_min[R_ct],sigma_max[R_ct]); + sfrd_grid[box_ct] = (1.+curr_dens)*dfcoll; + } + ave_sfrd_buf += fcoll; + ave_sfrd_buf_mini += fcoll_MINI; } - ave_sfrd_buf += fcoll; - ave_sfrd_buf_mini += fcoll_MINI; } } - *ave_sfrd = ave_sfrd_buf/HII_TOT_NUM_PIXELS; - *ave_sfrd_mini = ave_sfrd_buf_mini/HII_TOT_NUM_PIXELS; + // -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + + *ave_sfrd = ave_sfrd_buf / HII_TOT_NUM_PIXELS; + *ave_sfrd_mini = ave_sfrd_buf_mini / HII_TOT_NUM_PIXELS; //These functions check for allocation free_conditional_tables(); diff --git a/src/py21cmfast/src/SpinTemperatureBox.cu b/src/py21cmfast/src/SpinTemperatureBox.cu new file mode 100644 index 000000000..ebcc2cd58 --- /dev/null +++ b/src/py21cmfast/src/SpinTemperatureBox.cu @@ -0,0 +1,249 @@ +// Most of the following includes likely can be removed. +#include +#include +#include +#include +#include +#include + +// we use thrust for reduction +#include +#include +#include // thrust::plus + +#include "cexcept.h" +#include "exceptions.h" +#include "logger.h" +#include "Constants.h" +#include "indexing.h" +#include "InputParameters.h" +#include "OutputStructs.h" +#include "heating_helper_progs.h" +#include "elec_interp.h" +#include "interp_tables.h" +#include "debugging.h" +#include "cosmology.h" +#include "hmf.h" +#include "dft.h" +#include "filtering.h" +#include "thermochem.h" + +#include "SpinTemperatureBox.h" + + +__device__ inline double EvaluateRGTable1D_f_gpu(double x, RGTable1D_f *table) { + + double x_min = table->x_min; + double x_width = table->x_width; + + int idx = (int)floor((x - x_min) / x_width); + + double table_val = x_min + x_width * (float)idx; + double interp_point = (x - table_val) / x_width; + + return table->y_arr[idx] * (1 - interp_point) + table->y_arr[idx + 1] * (interp_point); +} + +template +__device__ void warp_reduce(volatile double *sdata, unsigned int tid) { + // Reduce by half + // No syncing required with threads < 32 + if (threadsPerBlock >= 64) sdata[tid] += sdata[tid + 32]; + if (threadsPerBlock >= 32) sdata[tid] += sdata[tid + 16]; + if (threadsPerBlock >= 16) sdata[tid] += sdata[tid + 8]; + if (threadsPerBlock >= 8) sdata[tid] += sdata[tid + 4]; + if (threadsPerBlock >= 4) sdata[tid] += sdata[tid + 2]; + if (threadsPerBlock >= 2) sdata[tid] += sdata[tid + 1]; +} + +template +__global__ void compute_and_reduce( + RGTable1D_f *SFRD_conditional_table, // input data + float *dens_R_grid, // input data + double zpp_growth_R_ct, // input value + float *sfrd_grid, // star formation rate density grid to be updated + double *ave_sfrd_buf, // output buffer of length ceil(n / (threadsPerBlock * 2)) + unsigned int num_pixels // length of input data + ) { + + // An array to store intermediate summations + // Shared between all threads in block + extern __shared__ double sdata[]; + + unsigned int tid = threadIdx.x; // thread within current block + unsigned int i = blockIdx.x * (threadsPerBlock * 2) + tid; // index of input data + unsigned int gridSize = threadsPerBlock * 2 * gridDim.x; + + sdata[tid] = 0; + + // In bounds of gridSize, sum pairs of collapse fraction data together + // And update the star formation rate density grid. + double curr_dens_i; + double curr_dens_j; + double fcoll_i; + double fcoll_j; + + while (i < num_pixels) { + // Compute current density from density grid value * redshift-scaled growth factor + curr_dens_i = dens_R_grid[i] * zpp_growth_R_ct; + curr_dens_j = dens_R_grid[i + threadsPerBlock] * zpp_growth_R_ct; + + // Compute fraction of mass that has collapsed to form stars/other structures + fcoll_i = exp(EvaluateRGTable1D_f_gpu(curr_dens_i, &SFRD_conditional_table)); + fcoll_j = exp(EvaluateRGTable1D_f_gpu(curr_dens_j, &SFRD_conditional_table)); + + // Update the shared buffer with the collapse fractions + sdata[tid] += fcoll_i + fcoll_j; + + // Update the relevant cells in the star formation rate density grid + sfrd_grid[i] = (1. + curr_dens_i) * fcoll_i; + sfrd_grid[i + threadsPerBlock] = (1. + curr_dens_j) * fcoll_j; + + i += gridSize; + } + __syncthreads(); + + // Reduce by half and sync (and repeat) + if (threadsPerBlock >= 512) { if (tid < 256) { sdata[tid] += sdata[tid + 256]; } __syncthreads(); } + if (threadsPerBlock >= 256) { if (tid < 128) { sdata[tid] += sdata[tid + 128]; } __syncthreads(); } + if (threadsPerBlock >= 128) { if (tid < 64) { sdata[tid] += sdata[tid + 64]; } __syncthreads(); } + + // Final reduction by separate kernel + if (tid < 32) warp_reduce(sdata, tid); + + // The first thread of each block updates the block totals + if (tid == 0) ave_sfrd_buf[blockIdx.x] = sdata[0]; +} + +void calculate_sfrd_from_grid_gpu( + RGTable1D_f *SFRD_conditional_table, // input data + float *dens_R_grid, // input data + double *zpp_growth, // input data + int R_ct, // input data + float *sfrd_grid, // star formation rate density grid to be updated + double *ave_sfrd_buf, // final output (to be divided by HII_TOT_NUM_PIXELS) + unsigned int num_pixels // length of input data +) { + // Input data + double zpp_growth_R_ct = zpp_growth[R_ct]; + + // The kernel only needs access to some fields of the SFRD_conditional_table struct + // so we allocate device memory and copy data only for required fields. + + // Create device pointers + double *x_min, *x_width, *y_arr; + // Allocate device memory + cudaMalloc(&x_min, sizeof(double)); + cudaMalloc(&x_width, sizeof(double)); + cudaMalloc(&y_arr, sizeof(double) * SFRD_conditional_table->n_bin); + // Copy data from host to device + cudaMemcpy(x_min, &SFRD_conditional_table->x_min, sizeof(double), cudaMemcpyHostToDevice); // Can also pass in + cudaMemcpy(x_width, &SFRD_conditional_table->x_width, sizeof(double), cudaMemcpyHostToDevice); // Can also pass in + cudaMemcpy(y_arr, SFRD_conditional_table->y_arr, sizeof(double) * SFRD_conditional_table->n_bin, cudaMemcpyHostToDevice); + + // Allocate & populate device memory for other inputs. + + // Create device pointers + float *d_dens_R_grid, *d_sfrd_grid; + // Allocate device memory + cudaMalloc(&d_dens_R_grid, sizeof(float) * num_pixels); + cudaMalloc(&d_sfrd_grid, sizeof(float) * num_pixels); + // Copy data from host to device + cudaMemcpy(d_dens_R_grid, &dens_R_grid, sizeof(float) * num_pixels, cudaMemcpyHostToDevice); + cudaMemcpy(d_sfrd_grid, &sfrd_grid, sizeof(float) * num_pixels, cudaMemcpyHostToDevice); + + cudaError_t err = cudaGetLastError(); + if (err != cudaSuccess) { + LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); + Throw(CUDAError); + } + + // Get max threads/block for device + int maxThreadsPerBlock; + cudaDeviceGetAttribute(&maxThreadsPerBlock, cudaDevAttrMaxThreadsPerBlock, 0); + + // Set threads/block based on device max + int threadsPerBlock; + if (maxThreadsPerBlock >= 512) { + threadsPerBlock = 512; + } else if (maxThreadsPerBlock >= 256) { + threadsPerBlock = 256; + } else if (maxThreadsPerBlock >= 128) { + threadsPerBlock = 128; + } else if (maxThreadsPerBlock >= 64) { + threadsPerBlock = 64; + } else if (maxThreadsPerBlock >= 32) { + threadsPerBlock = 32; + } else { + threadsPerBlock = 16; + } + int numBlocks = (num_pixels + threadsPerBlock - 1) / threadsPerBlock; // 91m & 256 -> 355959 + int smemSize = threadsPerBlock * sizeof(double); // shared memory + + // Allocate device memory for output buffer and set to 0 + double* d_ave_sfrd_buf; + unsigned int buffer_length = ceil(num_pixels / (threadsPerBlock * 2)); + cudaMalloc(&d_ave_sfrd_buf, sizeof(double) * buffer_length); // 91m & 256 -> 177979 + // cudaMalloc((void**)&d_ave_sfrd_buf, sizeof(double) * buffer_length); + cudaMemset(d_ave_sfrd_buf, 0, sizeof(double) * buffer_length); // fill with byte=0 + + // Invoke kernel + switch (threadsPerBlock) { + case 512: + compute_and_reduce<512><<< numBlocks, threadsPerBlock, smemSize >>>(&x_min, &x_width, &y_arr, &d_dens_R_grid, zpp_growth_R_ct, &d_sfrd_grid, &d_ave_sfrd_buf, num_pixels); + break; + case 256: + compute_and_reduce<256><<< numBlocks, threadsPerBlock, smemSize >>>(&x_min, &x_width, &y_arr, &d_dens_R_grid, zpp_growth_R_ct, &d_sfrd_grid, &d_ave_sfrd_buf, num_pixels); + break; + case 128: + compute_and_reduce<128><<< numBlocks, threadsPerBlock, smemSize >>>(&x_min, &x_width, &y_arr, &d_dens_R_grid, zpp_growth_R_ct, &d_sfrd_grid, &d_ave_sfrd_buf, num_pixels); + break; + case 64: + compute_and_reduce<64><<< numBlocks, threadsPerBlock, smemSize >>>(&x_min, &x_width, &y_arr, &d_dens_R_grid, zpp_growth_R_ct, &d_sfrd_grid, &d_ave_sfrd_buf, num_pixels); + break; + case 32: + compute_and_reduce<32><<< numBlocks, threadsPerBlock, smemSize >>>(&x_min, &x_width, &y_arr, &d_dens_R_grid, zpp_growth_R_ct, &d_sfrd_grid, &d_ave_sfrd_buf, num_pixels); + break; + default: + // LOG_WARNING("Thread size invalid; defaulting to 256.") + compute_and_reduce<256><<< numBlocks, 256, 256 * sizeof(double) >>>(&x_min, &x_width, &y_arr, &d_dens_R_grid, zpp_growth_R_ct, &d_sfrd_grid, &d_ave_sfrd_buf, num_pixels); + } + + // Only use during development! + err = cudaDeviceSynchronize(); + CATCH_CUDA_ERROR(err); + + err = cudaGetLastError(); + if (err != cudaSuccess) { + LOG_ERROR("Kernel launch error: %s", cudaGetErrorString(err)); + Throw(CUDAError); + } + + // Use thrust to reduce computed buffer values to one value. + + // Wrap device pointer in a thrust::device_ptr + thrust::device_ptr d_ave_sfrd_buf_ptr(d_ave_sfrd_buf); + // Reduce final buffer values to one value + ave_sfrd_buf = thrust::reduce(d_ave_sfrd_buf_ptr, d_ave_sfrd_buf_ptr + buffer_length, 0., thrust::plus()); + + // Copy results from device to host. + err = cudaMemcpy(sfrd_grid, d_sfrd_grid, sizeof(float) * num_pixels, cudaMemcpyDeviceToHost); + if (err != cudaSuccess) { + LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); + Throw(CUDAError); + } + + // Deallocate device memory. + cudaFree(x_min); + cudaFree(x_width); + cudaFree(y_arr); + cudaFree(d_dens_R_grid); + cudaFree(d_sfrd_grid); + cudaFree(d_ave_sfrd_buf); + + err = cudaGetLastError(); + if (err != cudaSuccess) { + LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); + Throw(CUDAError); + } +} diff --git a/src/py21cmfast/src/SpinTemperatureBox.h b/src/py21cmfast/src/SpinTemperatureBox.h index 2ace07e90..e90b87aca 100644 --- a/src/py21cmfast/src/SpinTemperatureBox.h +++ b/src/py21cmfast/src/SpinTemperatureBox.h @@ -1,6 +1,8 @@ #ifndef _SPINTEMP_H #define _SPINTEMP_H +#include + #include "InputParameters.h" #include "OutputStructs.h" @@ -17,6 +19,12 @@ int UpdateXraySourceBox(UserParams *user_params, CosmoParams *cosmo_params, AstroParams *astro_params, FlagOptions *flag_options, HaloBox *halobox, double R_inner, double R_outer, int R_ct, XraySourceBox *source_box); +void calculate_sfrd_from_grid(int R_ct, float *dens_R_grid, float *Mcrit_R_grid, float *sfrd_grid, + float *sfrd_grid_mini, double *ave_sfrd, double *ave_sfrd_mini); + +void calculate_sfrd_from_grid_gpu(RGTable1D_f *SFRD_conditional_table, float *dens_R_grid, double *zpp_growth, + int R_ct, float *sfrd_grid, double *ave_sfrd_buf, unsigned int num_pixels); + #ifdef __cplusplus } #endif From 8e62698f438b69c1588d87605949a97295f28a01 Mon Sep 17 00:00:00 2001 From: alserene Date: Fri, 8 Nov 2024 18:18:38 +1100 Subject: [PATCH 025/145] Make corrections to fix compilation errors. --- src/py21cmfast/src/SpinTemperatureBox.cu | 39 ++++++++++++------------ 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/src/py21cmfast/src/SpinTemperatureBox.cu b/src/py21cmfast/src/SpinTemperatureBox.cu index ebcc2cd58..725af6198 100644 --- a/src/py21cmfast/src/SpinTemperatureBox.cu +++ b/src/py21cmfast/src/SpinTemperatureBox.cu @@ -27,21 +27,19 @@ #include "dft.h" #include "filtering.h" #include "thermochem.h" +#include "interpolation.h" #include "SpinTemperatureBox.h" -__device__ inline double EvaluateRGTable1D_f_gpu(double x, RGTable1D_f *table) { +__device__ inline double EvaluateRGTable1D_f_gpu(double x, double *x_min, double *x_width, double *y_arr) { - double x_min = table->x_min; - double x_width = table->x_width; + int idx = (int)floor((x - *x_min) / *x_width); - int idx = (int)floor((x - x_min) / x_width); + double table_val = *x_min + *x_width * (float)idx; + double interp_point = (x - table_val) / *x_width; - double table_val = x_min + x_width * (float)idx; - double interp_point = (x - table_val) / x_width; - - return table->y_arr[idx] * (1 - interp_point) + table->y_arr[idx + 1] * (interp_point); + return y_arr[idx] * (1 - interp_point) + y_arr[idx + 1] * (interp_point); } template @@ -58,7 +56,9 @@ __device__ void warp_reduce(volatile double *sdata, unsigned int tid) { template __global__ void compute_and_reduce( - RGTable1D_f *SFRD_conditional_table, // input data + double *x_min, // input data + double *x_width, // input data + double *y_arr, // input data float *dens_R_grid, // input data double zpp_growth_R_ct, // input value float *sfrd_grid, // star formation rate density grid to be updated @@ -89,8 +89,8 @@ __global__ void compute_and_reduce( curr_dens_j = dens_R_grid[i + threadsPerBlock] * zpp_growth_R_ct; // Compute fraction of mass that has collapsed to form stars/other structures - fcoll_i = exp(EvaluateRGTable1D_f_gpu(curr_dens_i, &SFRD_conditional_table)); - fcoll_j = exp(EvaluateRGTable1D_f_gpu(curr_dens_j, &SFRD_conditional_table)); + fcoll_i = exp(EvaluateRGTable1D_f_gpu(curr_dens_i, x_min, x_width, y_arr)); + fcoll_j = exp(EvaluateRGTable1D_f_gpu(curr_dens_j, x_min, x_width, y_arr)); // Update the shared buffer with the collapse fractions sdata[tid] += fcoll_i + fcoll_j; @@ -109,7 +109,7 @@ __global__ void compute_and_reduce( if (threadsPerBlock >= 128) { if (tid < 64) { sdata[tid] += sdata[tid + 64]; } __syncthreads(); } // Final reduction by separate kernel - if (tid < 32) warp_reduce(sdata, tid); + if (tid < 32) warp_reduce(sdata, tid); // The first thread of each block updates the block totals if (tid == 0) ave_sfrd_buf[blockIdx.x] = sdata[0]; @@ -190,23 +190,23 @@ void calculate_sfrd_from_grid_gpu( // Invoke kernel switch (threadsPerBlock) { case 512: - compute_and_reduce<512><<< numBlocks, threadsPerBlock, smemSize >>>(&x_min, &x_width, &y_arr, &d_dens_R_grid, zpp_growth_R_ct, &d_sfrd_grid, &d_ave_sfrd_buf, num_pixels); + compute_and_reduce<512><<< numBlocks, threadsPerBlock, smemSize >>>(x_min, x_width, y_arr, d_dens_R_grid, zpp_growth_R_ct, d_sfrd_grid, d_ave_sfrd_buf, num_pixels); break; case 256: - compute_and_reduce<256><<< numBlocks, threadsPerBlock, smemSize >>>(&x_min, &x_width, &y_arr, &d_dens_R_grid, zpp_growth_R_ct, &d_sfrd_grid, &d_ave_sfrd_buf, num_pixels); + compute_and_reduce<256><<< numBlocks, threadsPerBlock, smemSize >>>(x_min, x_width, y_arr, d_dens_R_grid, zpp_growth_R_ct, d_sfrd_grid, d_ave_sfrd_buf, num_pixels); break; case 128: - compute_and_reduce<128><<< numBlocks, threadsPerBlock, smemSize >>>(&x_min, &x_width, &y_arr, &d_dens_R_grid, zpp_growth_R_ct, &d_sfrd_grid, &d_ave_sfrd_buf, num_pixels); + compute_and_reduce<128><<< numBlocks, threadsPerBlock, smemSize >>>(x_min, x_width, y_arr, d_dens_R_grid, zpp_growth_R_ct, d_sfrd_grid, d_ave_sfrd_buf, num_pixels); break; case 64: - compute_and_reduce<64><<< numBlocks, threadsPerBlock, smemSize >>>(&x_min, &x_width, &y_arr, &d_dens_R_grid, zpp_growth_R_ct, &d_sfrd_grid, &d_ave_sfrd_buf, num_pixels); + compute_and_reduce<64><<< numBlocks, threadsPerBlock, smemSize >>>(x_min, x_width, y_arr, d_dens_R_grid, zpp_growth_R_ct, d_sfrd_grid, d_ave_sfrd_buf, num_pixels); break; case 32: - compute_and_reduce<32><<< numBlocks, threadsPerBlock, smemSize >>>(&x_min, &x_width, &y_arr, &d_dens_R_grid, zpp_growth_R_ct, &d_sfrd_grid, &d_ave_sfrd_buf, num_pixels); + compute_and_reduce<32><<< numBlocks, threadsPerBlock, smemSize >>>(x_min, x_width, y_arr, d_dens_R_grid, zpp_growth_R_ct, d_sfrd_grid, d_ave_sfrd_buf, num_pixels); break; default: // LOG_WARNING("Thread size invalid; defaulting to 256.") - compute_and_reduce<256><<< numBlocks, 256, 256 * sizeof(double) >>>(&x_min, &x_width, &y_arr, &d_dens_R_grid, zpp_growth_R_ct, &d_sfrd_grid, &d_ave_sfrd_buf, num_pixels); + compute_and_reduce<256><<< numBlocks, 256, 256 * sizeof(double) >>>(x_min, x_width, y_arr, d_dens_R_grid, zpp_growth_R_ct, d_sfrd_grid, d_ave_sfrd_buf, num_pixels); } // Only use during development! @@ -224,7 +224,8 @@ void calculate_sfrd_from_grid_gpu( // Wrap device pointer in a thrust::device_ptr thrust::device_ptr d_ave_sfrd_buf_ptr(d_ave_sfrd_buf); // Reduce final buffer values to one value - ave_sfrd_buf = thrust::reduce(d_ave_sfrd_buf_ptr, d_ave_sfrd_buf_ptr + buffer_length, 0., thrust::plus()); + double reduced_value = thrust::reduce(d_ave_sfrd_buf_ptr, d_ave_sfrd_buf_ptr + buffer_length, 0., thrust::plus()); + *ave_sfrd_buf = reduced_value; // Copy results from device to host. err = cudaMemcpy(sfrd_grid, d_sfrd_grid, sizeof(float) * num_pixels, cudaMemcpyDeviceToHost); From 7045288a2310093aca446dcf1d190f9a55f54d2d Mon Sep 17 00:00:00 2001 From: alserene Date: Fri, 8 Nov 2024 18:20:19 +1100 Subject: [PATCH 026/145] Add SpinTemp CUDA object to build file. --- build_cffi.py | 1 + 1 file changed, 1 insertion(+) diff --git a/build_cffi.py b/build_cffi.py index 2a8be0578..03c5e80b3 100755 --- a/build_cffi.py +++ b/build_cffi.py @@ -32,6 +32,7 @@ os.path.join(CLOC, "hello_world.o"), os.path.join(CLOC, "filtering.o"), os.path.join(CLOC, "PerturbField.o"), + os.path.join(CLOC, "SpinTemperatureBox.o"), ] extra_link_args = ["-lcudart"] From f1d5c6c0d8a59ebaa67ff32be37e13aa026eba0a Mon Sep 17 00:00:00 2001 From: alserene Date: Sat, 9 Nov 2024 17:24:39 +1100 Subject: [PATCH 027/145] Add -lstdc++ flag for thrust. --- build_cffi.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/build_cffi.py b/build_cffi.py index 03c5e80b3..dff686551 100755 --- a/build_cffi.py +++ b/build_cffi.py @@ -34,7 +34,9 @@ os.path.join(CLOC, "PerturbField.o"), os.path.join(CLOC, "SpinTemperatureBox.o"), ] -extra_link_args = ["-lcudart"] +# extra_link_args = ["-lcudart"] +# extra_link_args = ["-lcudart", "-no-undefined"] +extra_link_args = ["-lcudart", "-lstdc++"] # Set the C-code logging level. # If DEBUG is set, we default to the highest level, but if not, @@ -87,7 +89,7 @@ # stuff for gperftools if "PROFILE" in os.environ: - # libraries += ["profiler", "tcmalloc"] + # libraries += ["profiler", "tcmalloc"] # tcmalloc causing errors libraries += ["profiler"] # we need this even if DEBUG is off extra_compile_args += ["-g"] From 3689bfa341e67cd64d34c745df303cff59b53cc6 Mon Sep 17 00:00:00 2001 From: alserene Date: Sat, 9 Nov 2024 17:26:06 +1100 Subject: [PATCH 028/145] Add workaround for table struct corruption. --- src/py21cmfast/src/SpinTemperatureBox.c | 12 +- src/py21cmfast/src/SpinTemperatureBox.cu | 170 +++++++++++++++++++---- src/py21cmfast/src/SpinTemperatureBox.h | 10 +- 3 files changed, 162 insertions(+), 30 deletions(-) diff --git a/src/py21cmfast/src/SpinTemperatureBox.c b/src/py21cmfast/src/SpinTemperatureBox.c index 7ffd6fef9..ad133152d 100644 --- a/src/py21cmfast/src/SpinTemperatureBox.c +++ b/src/py21cmfast/src/SpinTemperatureBox.c @@ -938,7 +938,17 @@ void calculate_sfrd_from_grid(int R_ct, float *dens_R_grid, float *Mcrit_R_grid, // -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- // If GPU is to be used and flags are ideal, call GPU version of reduction if (true && flag_options_global->USE_MASS_DEPENDENT_ZETA && user_params_global->USE_INTERPOLATION_TABLES && !flag_options_global->USE_MINI_HALOS) { - calculate_sfrd_from_grid_gpu(&SFRD_conditional_table, dens_R_grid, zpp_growth, R_ct, sfrd_grid, ave_sfrd_buf, HII_TOT_NUM_PIXELS); + RGTable1D_f* SFRD_conditional_table = get_SFRD_conditional_table(); + // ave_sfrd_buf = calculate_sfrd_from_grid_gpu(&SFRD_conditional_table, dens_R_grid, zpp_growth, R_ct, sfrd_grid, HII_TOT_NUM_PIXELS); + + // -------------- CODE FOR CORRUPTION BUG WORKAROUND + double x_min = SFRD_conditional_table->x_min; + double x_width = SFRD_conditional_table->x_width; + int n_bin = SFRD_conditional_table->n_bin; + float* y_arr = SFRD_conditional_table->y_arr; // pointer, not new array! + ave_sfrd_buf = calculate_sfrd_from_grid_gpu(x_min, x_width, n_bin, y_arr, dens_R_grid, zpp_growth, R_ct, sfrd_grid, HII_TOT_NUM_PIXELS); + // -------------- CODE FOR CORRUPTION BUG WORKAROUND + } else { // Else, run CPU reduction #pragma omp parallel num_threads(user_params_global->N_THREADS) diff --git a/src/py21cmfast/src/SpinTemperatureBox.cu b/src/py21cmfast/src/SpinTemperatureBox.cu index 725af6198..394e0262c 100644 --- a/src/py21cmfast/src/SpinTemperatureBox.cu +++ b/src/py21cmfast/src/SpinTemperatureBox.cu @@ -6,7 +6,10 @@ #include #include -// we use thrust for reduction +// GPU +#include +#include +// We use thrust for reduction #include #include #include // thrust::plus @@ -32,7 +35,7 @@ #include "SpinTemperatureBox.h" -__device__ inline double EvaluateRGTable1D_f_gpu(double x, double *x_min, double *x_width, double *y_arr) { +__device__ inline double EvaluateRGTable1D_f_gpu(double x, double *x_min, double *x_width, float *y_arr) { int idx = (int)floor((x - *x_min) / *x_width); @@ -58,7 +61,7 @@ template __global__ void compute_and_reduce( double *x_min, // input data double *x_width, // input data - double *y_arr, // input data + float *y_arr, // input data float *dens_R_grid, // input data double zpp_growth_R_ct, // input value float *sfrd_grid, // star formation rate density grid to be updated @@ -115,44 +118,113 @@ __global__ void compute_and_reduce( if (tid == 0) ave_sfrd_buf[blockIdx.x] = sdata[0]; } -void calculate_sfrd_from_grid_gpu( - RGTable1D_f *SFRD_conditional_table, // input data +double calculate_sfrd_from_grid_gpu( + // RGTable1D_f *SFRD_conditional_table, // input data + // -------------- CODE FOR CORRUPTION BUG WORKAROUND + double x_min, + double x_width, + int n_bin, + float* y_arr, + // -------------- CODE FOR CORRUPTION BUG WORKAROUND float *dens_R_grid, // input data double *zpp_growth, // input data int R_ct, // input data float *sfrd_grid, // star formation rate density grid to be updated - double *ave_sfrd_buf, // final output (to be divided by HII_TOT_NUM_PIXELS) unsigned int num_pixels // length of input data ) { + cudaError_t err = cudaGetLastError(); + // <-- ADD BREAKPOINT HERE // Input data double zpp_growth_R_ct = zpp_growth[R_ct]; // The kernel only needs access to some fields of the SFRD_conditional_table struct // so we allocate device memory and copy data only for required fields. + + // -------------- CODE FOR CORRUPTION BUG WORKAROUND // Create device pointers - double *x_min, *x_width, *y_arr; + double *d_x_min, *d_x_width; + float *d_y_arr; + // Allocate device memory - cudaMalloc(&x_min, sizeof(double)); - cudaMalloc(&x_width, sizeof(double)); - cudaMalloc(&y_arr, sizeof(double) * SFRD_conditional_table->n_bin); + cudaMalloc(&d_x_min, sizeof(double)); + cudaMalloc(&d_x_width, sizeof(double)); + cudaMalloc(&d_y_arr, sizeof(float) * n_bin); + LOG_INFO("SFRD_conditional_table fields allocated on device."); + + LOG_INFO("x_min: %f, x_width: %f, n_bin: %d", x_min, x_width, n_bin); + // <-- ADD BREAKPOINT HERE // Copy data from host to device - cudaMemcpy(x_min, &SFRD_conditional_table->x_min, sizeof(double), cudaMemcpyHostToDevice); // Can also pass in - cudaMemcpy(x_width, &SFRD_conditional_table->x_width, sizeof(double), cudaMemcpyHostToDevice); // Can also pass in - cudaMemcpy(y_arr, SFRD_conditional_table->y_arr, sizeof(double) * SFRD_conditional_table->n_bin, cudaMemcpyHostToDevice); + err = cudaMemcpy(d_x_min, &x_min, sizeof(double), cudaMemcpyHostToDevice); // Can also pass in + if (err != cudaSuccess) { + LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); + Throw(CUDAError); + } + // <-- ADD BREAKPOINT HERE + err = cudaMemcpy(d_x_width, &x_width, sizeof(double), cudaMemcpyHostToDevice); // Can also pass in + if (err != cudaSuccess) { + LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); + Throw(CUDAError); + } + // <-- ADD BREAKPOINT HERE + err = cudaMemcpy(d_y_arr, y_arr, sizeof(float) * n_bin, cudaMemcpyHostToDevice); + if (err != cudaSuccess) { + LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); + Throw(CUDAError); + } + // <-- ADD BREAKPOINT HERE + LOG_INFO("SFRD_conditional_table fields copied to device."); + // -------------- CODE FOR CORRUPTION BUG WORKAROUND + + // // Create device pointers + // double *x_min, *x_width; + // float *y_arr; + + // // Allocate device memory + // cudaMalloc(&x_min, sizeof(double)); + // cudaMalloc(&x_width, sizeof(double)); + // cudaMalloc(&y_arr, sizeof(float) * SFRD_conditional_table->n_bin); + // LOG_INFO("SFRD_conditional_table fields allocated on device."); + + // LOG_INFO("x_min: %f, x_width: %f, n_bin: %d", SFRD_conditional_table->x_min, SFRD_conditional_table->x_width, SFRD_conditional_table->n_bin); + // // <-- ADD BREAKPOINT HERE + // // Copy data from host to device + // err = cudaMemcpy(x_min, &SFRD_conditional_table->x_min, sizeof(double), cudaMemcpyHostToDevice); // Can also pass in + // if (err != cudaSuccess) { + // LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); + // Throw(CUDAError); + // } + // // <-- ADD BREAKPOINT HERE + // err = cudaMemcpy(x_width, &SFRD_conditional_table->x_width, sizeof(double), cudaMemcpyHostToDevice); // Can also pass in + // if (err != cudaSuccess) { + // LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); + // Throw(CUDAError); + // } + // // <-- ADD BREAKPOINT HERE + // err = cudaMemcpy(y_arr, SFRD_conditional_table->y_arr, sizeof(float) * SFRD_conditional_table->n_bin, cudaMemcpyHostToDevice); + // if (err != cudaSuccess) { + // LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); + // Throw(CUDAError); + // } + // // <-- ADD BREAKPOINT HERE + // LOG_INFO("SFRD_conditional_table fields copied to device."); // Allocate & populate device memory for other inputs. // Create device pointers float *d_dens_R_grid, *d_sfrd_grid; + // Allocate device memory cudaMalloc(&d_dens_R_grid, sizeof(float) * num_pixels); cudaMalloc(&d_sfrd_grid, sizeof(float) * num_pixels); + LOG_INFO("density and sfrd grids allocated on device."); + // Copy data from host to device - cudaMemcpy(d_dens_R_grid, &dens_R_grid, sizeof(float) * num_pixels, cudaMemcpyHostToDevice); - cudaMemcpy(d_sfrd_grid, &sfrd_grid, sizeof(float) * num_pixels, cudaMemcpyHostToDevice); + cudaMemcpy(d_dens_R_grid, dens_R_grid, sizeof(float) * num_pixels, cudaMemcpyHostToDevice); + cudaMemcpy(d_sfrd_grid, sfrd_grid, sizeof(float) * num_pixels, cudaMemcpyHostToDevice); + LOG_INFO("density and sfrd grids copied to device."); - cudaError_t err = cudaGetLastError(); + err = cudaGetLastError(); if (err != cudaSuccess) { LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); Throw(CUDAError); @@ -184,30 +256,63 @@ void calculate_sfrd_from_grid_gpu( double* d_ave_sfrd_buf; unsigned int buffer_length = ceil(num_pixels / (threadsPerBlock * 2)); cudaMalloc(&d_ave_sfrd_buf, sizeof(double) * buffer_length); // 91m & 256 -> 177979 + LOG_INFO("buffer allocated on device."); // cudaMalloc((void**)&d_ave_sfrd_buf, sizeof(double) * buffer_length); cudaMemset(d_ave_sfrd_buf, 0, sizeof(double) * buffer_length); // fill with byte=0 + LOG_INFO("buffer copied to device."); + + err = cudaGetLastError(); + if (err != cudaSuccess) { + LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); + Throw(CUDAError); + } + // -------------- CODE FOR CORRUPTION BUG WORKAROUND // Invoke kernel switch (threadsPerBlock) { case 512: - compute_and_reduce<512><<< numBlocks, threadsPerBlock, smemSize >>>(x_min, x_width, y_arr, d_dens_R_grid, zpp_growth_R_ct, d_sfrd_grid, d_ave_sfrd_buf, num_pixels); + compute_and_reduce<512><<< numBlocks, threadsPerBlock, smemSize >>>(d_x_min, d_x_width, d_y_arr, d_dens_R_grid, zpp_growth_R_ct, d_sfrd_grid, d_ave_sfrd_buf, num_pixels); break; case 256: - compute_and_reduce<256><<< numBlocks, threadsPerBlock, smemSize >>>(x_min, x_width, y_arr, d_dens_R_grid, zpp_growth_R_ct, d_sfrd_grid, d_ave_sfrd_buf, num_pixels); + compute_and_reduce<256><<< numBlocks, threadsPerBlock, smemSize >>>(d_x_min, d_x_width, d_y_arr, d_dens_R_grid, zpp_growth_R_ct, d_sfrd_grid, d_ave_sfrd_buf, num_pixels); break; case 128: - compute_and_reduce<128><<< numBlocks, threadsPerBlock, smemSize >>>(x_min, x_width, y_arr, d_dens_R_grid, zpp_growth_R_ct, d_sfrd_grid, d_ave_sfrd_buf, num_pixels); + compute_and_reduce<128><<< numBlocks, threadsPerBlock, smemSize >>>(d_x_min, d_x_width, d_y_arr, d_dens_R_grid, zpp_growth_R_ct, d_sfrd_grid, d_ave_sfrd_buf, num_pixels); break; case 64: - compute_and_reduce<64><<< numBlocks, threadsPerBlock, smemSize >>>(x_min, x_width, y_arr, d_dens_R_grid, zpp_growth_R_ct, d_sfrd_grid, d_ave_sfrd_buf, num_pixels); + compute_and_reduce<64><<< numBlocks, threadsPerBlock, smemSize >>>(d_x_min, d_x_width, d_y_arr, d_dens_R_grid, zpp_growth_R_ct, d_sfrd_grid, d_ave_sfrd_buf, num_pixels); break; case 32: - compute_and_reduce<32><<< numBlocks, threadsPerBlock, smemSize >>>(x_min, x_width, y_arr, d_dens_R_grid, zpp_growth_R_ct, d_sfrd_grid, d_ave_sfrd_buf, num_pixels); + compute_and_reduce<32><<< numBlocks, threadsPerBlock, smemSize >>>(d_x_min, d_x_width, d_y_arr, d_dens_R_grid, zpp_growth_R_ct, d_sfrd_grid, d_ave_sfrd_buf, num_pixels); break; default: // LOG_WARNING("Thread size invalid; defaulting to 256.") - compute_and_reduce<256><<< numBlocks, 256, 256 * sizeof(double) >>>(x_min, x_width, y_arr, d_dens_R_grid, zpp_growth_R_ct, d_sfrd_grid, d_ave_sfrd_buf, num_pixels); + compute_and_reduce<256><<< numBlocks, 256, 256 * sizeof(double) >>>(d_x_min, d_x_width, d_y_arr, d_dens_R_grid, zpp_growth_R_ct, d_sfrd_grid, d_ave_sfrd_buf, num_pixels); } + // -------------- CODE FOR CORRUPTION BUG WORKAROUND + + // // Invoke kernel + // switch (threadsPerBlock) { + // case 512: + // compute_and_reduce<512><<< numBlocks, threadsPerBlock, smemSize >>>(x_min, x_width, y_arr, d_dens_R_grid, zpp_growth_R_ct, d_sfrd_grid, d_ave_sfrd_buf, num_pixels); + // break; + // case 256: + // compute_and_reduce<256><<< numBlocks, threadsPerBlock, smemSize >>>(x_min, x_width, y_arr, d_dens_R_grid, zpp_growth_R_ct, d_sfrd_grid, d_ave_sfrd_buf, num_pixels); + // break; + // case 128: + // compute_and_reduce<128><<< numBlocks, threadsPerBlock, smemSize >>>(x_min, x_width, y_arr, d_dens_R_grid, zpp_growth_R_ct, d_sfrd_grid, d_ave_sfrd_buf, num_pixels); + // break; + // case 64: + // compute_and_reduce<64><<< numBlocks, threadsPerBlock, smemSize >>>(x_min, x_width, y_arr, d_dens_R_grid, zpp_growth_R_ct, d_sfrd_grid, d_ave_sfrd_buf, num_pixels); + // break; + // case 32: + // compute_and_reduce<32><<< numBlocks, threadsPerBlock, smemSize >>>(x_min, x_width, y_arr, d_dens_R_grid, zpp_growth_R_ct, d_sfrd_grid, d_ave_sfrd_buf, num_pixels); + // break; + // default: + // // LOG_WARNING("Thread size invalid; defaulting to 256.") + // compute_and_reduce<256><<< numBlocks, 256, 256 * sizeof(double) >>>(x_min, x_width, y_arr, d_dens_R_grid, zpp_growth_R_ct, d_sfrd_grid, d_ave_sfrd_buf, num_pixels); + // } + LOG_INFO("kernel called."); // Only use during development! err = cudaDeviceSynchronize(); @@ -224,27 +329,38 @@ void calculate_sfrd_from_grid_gpu( // Wrap device pointer in a thrust::device_ptr thrust::device_ptr d_ave_sfrd_buf_ptr(d_ave_sfrd_buf); // Reduce final buffer values to one value - double reduced_value = thrust::reduce(d_ave_sfrd_buf_ptr, d_ave_sfrd_buf_ptr + buffer_length, 0., thrust::plus()); - *ave_sfrd_buf = reduced_value; + double ave_sfrd_buf = thrust::reduce(d_ave_sfrd_buf_ptr, d_ave_sfrd_buf_ptr + buffer_length, 0., thrust::plus()); + LOG_INFO("thrust reduced buffer."); // Copy results from device to host. err = cudaMemcpy(sfrd_grid, d_sfrd_grid, sizeof(float) * num_pixels, cudaMemcpyDeviceToHost); + LOG_INFO("buffer copied to host."); + if (err != cudaSuccess) { LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); Throw(CUDAError); } // Deallocate device memory. - cudaFree(x_min); - cudaFree(x_width); - cudaFree(y_arr); + // -------------- CODE FOR CORRUPTION BUG WORKAROUND + cudaFree(d_x_min); + cudaFree(d_x_width); + cudaFree(d_y_arr); + // -------------- CODE FOR CORRUPTION BUG WORKAROUND + // cudaFree(x_min); + // cudaFree(x_width); + // cudaFree(y_arr); cudaFree(d_dens_R_grid); cudaFree(d_sfrd_grid); cudaFree(d_ave_sfrd_buf); + LOG_INFO("remaining memory freed."); + err = cudaGetLastError(); if (err != cudaSuccess) { LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); Throw(CUDAError); } + + return ave_sfrd_buf; } diff --git a/src/py21cmfast/src/SpinTemperatureBox.h b/src/py21cmfast/src/SpinTemperatureBox.h index e90b87aca..c2043aad0 100644 --- a/src/py21cmfast/src/SpinTemperatureBox.h +++ b/src/py21cmfast/src/SpinTemperatureBox.h @@ -5,6 +5,7 @@ #include "InputParameters.h" #include "OutputStructs.h" +#include "interpolation.h" #ifdef __cplusplus extern "C" { @@ -22,8 +23,13 @@ int UpdateXraySourceBox(UserParams *user_params, CosmoParams *cosmo_params, void calculate_sfrd_from_grid(int R_ct, float *dens_R_grid, float *Mcrit_R_grid, float *sfrd_grid, float *sfrd_grid_mini, double *ave_sfrd, double *ave_sfrd_mini); -void calculate_sfrd_from_grid_gpu(RGTable1D_f *SFRD_conditional_table, float *dens_R_grid, double *zpp_growth, - int R_ct, float *sfrd_grid, double *ave_sfrd_buf, unsigned int num_pixels); +// double calculate_sfrd_from_grid_gpu(RGTable1D_f *SFRD_conditional_table, float *dens_R_grid, double *zpp_growth, +// int R_ct, float *sfrd_grid, unsigned int num_pixels); + +// -------------- CODE FOR CORRUPTION BUG WORKAROUND +double calculate_sfrd_from_grid_gpu(double x_min, double x_width, int n_bin, float* y_arr, float *dens_R_grid, double *zpp_growth, + int R_ct, float *sfrd_grid, unsigned int num_pixels); +// -------------- CODE FOR CORRUPTION BUG WORKAROUND #ifdef __cplusplus } From 4413c3395ac3e9319f2427435315910fe3d16fb6 Mon Sep 17 00:00:00 2001 From: alserene Date: Sat, 9 Nov 2024 17:26:57 +1100 Subject: [PATCH 029/145] Add accessor function for SFRD_conditional_table. --- src/py21cmfast/src/interp_tables.c | 7 ++++++- src/py21cmfast/src/interp_tables.h | 3 +++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/py21cmfast/src/interp_tables.c b/src/py21cmfast/src/interp_tables.c index aaebab267..85d844f08 100644 --- a/src/py21cmfast/src/interp_tables.c +++ b/src/py21cmfast/src/interp_tables.c @@ -18,7 +18,7 @@ #include "cosmology.h" #include "InputParameters.h" #include "hmf.h" -#include "interpolation.h" +// #include "interpolation.h" #include "interp_tables.h" @@ -1046,3 +1046,8 @@ double EvaluatedSigmasqdm(double lnM){ } return dsigmasqdm_z0(exp(lnM)); } + +// Accessor function for the GPU SpinTemp kernel to access table. +RGTable1D_f* get_SFRD_conditional_table(void) { + return &SFRD_conditional_table; +} diff --git a/src/py21cmfast/src/interp_tables.h b/src/py21cmfast/src/interp_tables.h index 3c62c09af..a2b692f62 100644 --- a/src/py21cmfast/src/interp_tables.h +++ b/src/py21cmfast/src/interp_tables.h @@ -2,6 +2,7 @@ #define _INTERP_TABLES_H #include "InputParameters.h" +#include "interpolation.h" //Functions within interp_tables.c need the parameter structures, but we don't want to pass them all down the chain, so we broadcast them //TODO: in future it would be better to use a context struct. See `HaloBox.c` @@ -65,6 +66,8 @@ void free_conditional_tables(); void free_global_tables(); void free_dNdM_tables(); +RGTable1D_f* get_SFRD_conditional_table(void); + #ifdef __cplusplus } #endif From b25cefe6942d767e6a84b5d3616755ba7283d054 Mon Sep 17 00:00:00 2001 From: alserene Date: Fri, 15 Nov 2024 18:42:29 +1100 Subject: [PATCH 030/145] Fix struct corruption bug. --- src/py21cmfast/src/SpinTemperatureBox.c | 11 +- src/py21cmfast/src/SpinTemperatureBox.cu | 131 ++++++----------------- src/py21cmfast/src/SpinTemperatureBox.h | 7 +- 3 files changed, 32 insertions(+), 117 deletions(-) diff --git a/src/py21cmfast/src/SpinTemperatureBox.c b/src/py21cmfast/src/SpinTemperatureBox.c index ad133152d..bd9f4ada4 100644 --- a/src/py21cmfast/src/SpinTemperatureBox.c +++ b/src/py21cmfast/src/SpinTemperatureBox.c @@ -939,16 +939,7 @@ void calculate_sfrd_from_grid(int R_ct, float *dens_R_grid, float *Mcrit_R_grid, // If GPU is to be used and flags are ideal, call GPU version of reduction if (true && flag_options_global->USE_MASS_DEPENDENT_ZETA && user_params_global->USE_INTERPOLATION_TABLES && !flag_options_global->USE_MINI_HALOS) { RGTable1D_f* SFRD_conditional_table = get_SFRD_conditional_table(); - // ave_sfrd_buf = calculate_sfrd_from_grid_gpu(&SFRD_conditional_table, dens_R_grid, zpp_growth, R_ct, sfrd_grid, HII_TOT_NUM_PIXELS); - - // -------------- CODE FOR CORRUPTION BUG WORKAROUND - double x_min = SFRD_conditional_table->x_min; - double x_width = SFRD_conditional_table->x_width; - int n_bin = SFRD_conditional_table->n_bin; - float* y_arr = SFRD_conditional_table->y_arr; // pointer, not new array! - ave_sfrd_buf = calculate_sfrd_from_grid_gpu(x_min, x_width, n_bin, y_arr, dens_R_grid, zpp_growth, R_ct, sfrd_grid, HII_TOT_NUM_PIXELS); - // -------------- CODE FOR CORRUPTION BUG WORKAROUND - + ave_sfrd_buf = calculate_sfrd_from_grid_gpu(SFRD_conditional_table, dens_R_grid, zpp_growth, R_ct, sfrd_grid, HII_TOT_NUM_PIXELS); } else { // Else, run CPU reduction #pragma omp parallel num_threads(user_params_global->N_THREADS) diff --git a/src/py21cmfast/src/SpinTemperatureBox.cu b/src/py21cmfast/src/SpinTemperatureBox.cu index 394e0262c..fb9438ea0 100644 --- a/src/py21cmfast/src/SpinTemperatureBox.cu +++ b/src/py21cmfast/src/SpinTemperatureBox.cu @@ -59,11 +59,11 @@ __device__ void warp_reduce(volatile double *sdata, unsigned int tid) { template __global__ void compute_and_reduce( - double *x_min, // input data - double *x_width, // input data - float *y_arr, // input data - float *dens_R_grid, // input data - double zpp_growth_R_ct, // input value + double *x_min, // reference + double *x_width, // reference + float *y_arr, // reference + float *dens_R_grid, // reference + double zpp_growth_R_ct, // value float *sfrd_grid, // star formation rate density grid to be updated double *ave_sfrd_buf, // output buffer of length ceil(n / (threadsPerBlock * 2)) unsigned int num_pixels // length of input data @@ -119,13 +119,7 @@ __global__ void compute_and_reduce( } double calculate_sfrd_from_grid_gpu( - // RGTable1D_f *SFRD_conditional_table, // input data - // -------------- CODE FOR CORRUPTION BUG WORKAROUND - double x_min, - double x_width, - int n_bin, - float* y_arr, - // -------------- CODE FOR CORRUPTION BUG WORKAROUND + RGTable1D_f *SFRD_conditional_table, // input data float *dens_R_grid, // input data double *zpp_growth, // input data int R_ct, // input data @@ -133,81 +127,45 @@ double calculate_sfrd_from_grid_gpu( unsigned int num_pixels // length of input data ) { cudaError_t err = cudaGetLastError(); - // <-- ADD BREAKPOINT HERE + // Input data double zpp_growth_R_ct = zpp_growth[R_ct]; // The kernel only needs access to some fields of the SFRD_conditional_table struct // so we allocate device memory and copy data only for required fields. - - // -------------- CODE FOR CORRUPTION BUG WORKAROUND // Create device pointers - double *d_x_min, *d_x_width; - float *d_y_arr; + double *x_min, *x_width; + float *y_arr; // Allocate device memory - cudaMalloc(&d_x_min, sizeof(double)); - cudaMalloc(&d_x_width, sizeof(double)); - cudaMalloc(&d_y_arr, sizeof(float) * n_bin); + cudaMalloc(&x_min, sizeof(double)); // TODO: don't allocate, just pass in? + cudaMalloc(&x_width, sizeof(double)); + cudaMalloc(&y_arr, sizeof(float) * SFRD_conditional_table->n_bin); LOG_INFO("SFRD_conditional_table fields allocated on device."); - LOG_INFO("x_min: %f, x_width: %f, n_bin: %d", x_min, x_width, n_bin); - // <-- ADD BREAKPOINT HERE + LOG_INFO("x_min: %f, x_width: %f, n_bin: %d", SFRD_conditional_table->x_min, SFRD_conditional_table->x_width, SFRD_conditional_table->n_bin); + // Copy data from host to device - err = cudaMemcpy(d_x_min, &x_min, sizeof(double), cudaMemcpyHostToDevice); // Can also pass in + err = cudaMemcpy(x_min, &SFRD_conditional_table->x_min, sizeof(double), cudaMemcpyHostToDevice); // Can also pass in if (err != cudaSuccess) { LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); Throw(CUDAError); } - // <-- ADD BREAKPOINT HERE - err = cudaMemcpy(d_x_width, &x_width, sizeof(double), cudaMemcpyHostToDevice); // Can also pass in + + err = cudaMemcpy(x_width, &SFRD_conditional_table->x_width, sizeof(double), cudaMemcpyHostToDevice); // Can also pass in if (err != cudaSuccess) { LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); Throw(CUDAError); } - // <-- ADD BREAKPOINT HERE - err = cudaMemcpy(d_y_arr, y_arr, sizeof(float) * n_bin, cudaMemcpyHostToDevice); + + err = cudaMemcpy(y_arr, SFRD_conditional_table->y_arr, sizeof(float) * SFRD_conditional_table->n_bin, cudaMemcpyHostToDevice); if (err != cudaSuccess) { LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); Throw(CUDAError); } - // <-- ADD BREAKPOINT HERE + LOG_INFO("SFRD_conditional_table fields copied to device."); - // -------------- CODE FOR CORRUPTION BUG WORKAROUND - - // // Create device pointers - // double *x_min, *x_width; - // float *y_arr; - - // // Allocate device memory - // cudaMalloc(&x_min, sizeof(double)); - // cudaMalloc(&x_width, sizeof(double)); - // cudaMalloc(&y_arr, sizeof(float) * SFRD_conditional_table->n_bin); - // LOG_INFO("SFRD_conditional_table fields allocated on device."); - - // LOG_INFO("x_min: %f, x_width: %f, n_bin: %d", SFRD_conditional_table->x_min, SFRD_conditional_table->x_width, SFRD_conditional_table->n_bin); - // // <-- ADD BREAKPOINT HERE - // // Copy data from host to device - // err = cudaMemcpy(x_min, &SFRD_conditional_table->x_min, sizeof(double), cudaMemcpyHostToDevice); // Can also pass in - // if (err != cudaSuccess) { - // LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); - // Throw(CUDAError); - // } - // // <-- ADD BREAKPOINT HERE - // err = cudaMemcpy(x_width, &SFRD_conditional_table->x_width, sizeof(double), cudaMemcpyHostToDevice); // Can also pass in - // if (err != cudaSuccess) { - // LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); - // Throw(CUDAError); - // } - // // <-- ADD BREAKPOINT HERE - // err = cudaMemcpy(y_arr, SFRD_conditional_table->y_arr, sizeof(float) * SFRD_conditional_table->n_bin, cudaMemcpyHostToDevice); - // if (err != cudaSuccess) { - // LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); - // Throw(CUDAError); - // } - // // <-- ADD BREAKPOINT HERE - // LOG_INFO("SFRD_conditional_table fields copied to device."); // Allocate & populate device memory for other inputs. @@ -257,7 +215,7 @@ double calculate_sfrd_from_grid_gpu( unsigned int buffer_length = ceil(num_pixels / (threadsPerBlock * 2)); cudaMalloc(&d_ave_sfrd_buf, sizeof(double) * buffer_length); // 91m & 256 -> 177979 LOG_INFO("buffer allocated on device."); - // cudaMalloc((void**)&d_ave_sfrd_buf, sizeof(double) * buffer_length); + // cudaMalloc((void**)&d_ave_sfrd_buf, sizeof(double) * buffer_length); // TODO: should I be using this instead? cudaMemset(d_ave_sfrd_buf, 0, sizeof(double) * buffer_length); // fill with byte=0 LOG_INFO("buffer copied to device."); @@ -267,51 +225,27 @@ double calculate_sfrd_from_grid_gpu( Throw(CUDAError); } - // -------------- CODE FOR CORRUPTION BUG WORKAROUND // Invoke kernel switch (threadsPerBlock) { case 512: - compute_and_reduce<512><<< numBlocks, threadsPerBlock, smemSize >>>(d_x_min, d_x_width, d_y_arr, d_dens_R_grid, zpp_growth_R_ct, d_sfrd_grid, d_ave_sfrd_buf, num_pixels); + compute_and_reduce<512><<< numBlocks, threadsPerBlock, smemSize >>>(x_min, x_width, y_arr, d_dens_R_grid, zpp_growth_R_ct, d_sfrd_grid, d_ave_sfrd_buf, num_pixels); break; case 256: - compute_and_reduce<256><<< numBlocks, threadsPerBlock, smemSize >>>(d_x_min, d_x_width, d_y_arr, d_dens_R_grid, zpp_growth_R_ct, d_sfrd_grid, d_ave_sfrd_buf, num_pixels); + compute_and_reduce<256><<< numBlocks, threadsPerBlock, smemSize >>>(x_min, x_width, y_arr, d_dens_R_grid, zpp_growth_R_ct, d_sfrd_grid, d_ave_sfrd_buf, num_pixels); break; case 128: - compute_and_reduce<128><<< numBlocks, threadsPerBlock, smemSize >>>(d_x_min, d_x_width, d_y_arr, d_dens_R_grid, zpp_growth_R_ct, d_sfrd_grid, d_ave_sfrd_buf, num_pixels); + compute_and_reduce<128><<< numBlocks, threadsPerBlock, smemSize >>>(x_min, x_width, y_arr, d_dens_R_grid, zpp_growth_R_ct, d_sfrd_grid, d_ave_sfrd_buf, num_pixels); break; case 64: - compute_and_reduce<64><<< numBlocks, threadsPerBlock, smemSize >>>(d_x_min, d_x_width, d_y_arr, d_dens_R_grid, zpp_growth_R_ct, d_sfrd_grid, d_ave_sfrd_buf, num_pixels); + compute_and_reduce<64><<< numBlocks, threadsPerBlock, smemSize >>>(x_min, x_width, y_arr, d_dens_R_grid, zpp_growth_R_ct, d_sfrd_grid, d_ave_sfrd_buf, num_pixels); break; case 32: - compute_and_reduce<32><<< numBlocks, threadsPerBlock, smemSize >>>(d_x_min, d_x_width, d_y_arr, d_dens_R_grid, zpp_growth_R_ct, d_sfrd_grid, d_ave_sfrd_buf, num_pixels); + compute_and_reduce<32><<< numBlocks, threadsPerBlock, smemSize >>>(x_min, x_width, y_arr, d_dens_R_grid, zpp_growth_R_ct, d_sfrd_grid, d_ave_sfrd_buf, num_pixels); break; default: // LOG_WARNING("Thread size invalid; defaulting to 256.") - compute_and_reduce<256><<< numBlocks, 256, 256 * sizeof(double) >>>(d_x_min, d_x_width, d_y_arr, d_dens_R_grid, zpp_growth_R_ct, d_sfrd_grid, d_ave_sfrd_buf, num_pixels); + compute_and_reduce<256><<< numBlocks, 256, 256 * sizeof(double) >>>(x_min, x_width, y_arr, d_dens_R_grid, zpp_growth_R_ct, d_sfrd_grid, d_ave_sfrd_buf, num_pixels); } - // -------------- CODE FOR CORRUPTION BUG WORKAROUND - - // // Invoke kernel - // switch (threadsPerBlock) { - // case 512: - // compute_and_reduce<512><<< numBlocks, threadsPerBlock, smemSize >>>(x_min, x_width, y_arr, d_dens_R_grid, zpp_growth_R_ct, d_sfrd_grid, d_ave_sfrd_buf, num_pixels); - // break; - // case 256: - // compute_and_reduce<256><<< numBlocks, threadsPerBlock, smemSize >>>(x_min, x_width, y_arr, d_dens_R_grid, zpp_growth_R_ct, d_sfrd_grid, d_ave_sfrd_buf, num_pixels); - // break; - // case 128: - // compute_and_reduce<128><<< numBlocks, threadsPerBlock, smemSize >>>(x_min, x_width, y_arr, d_dens_R_grid, zpp_growth_R_ct, d_sfrd_grid, d_ave_sfrd_buf, num_pixels); - // break; - // case 64: - // compute_and_reduce<64><<< numBlocks, threadsPerBlock, smemSize >>>(x_min, x_width, y_arr, d_dens_R_grid, zpp_growth_R_ct, d_sfrd_grid, d_ave_sfrd_buf, num_pixels); - // break; - // case 32: - // compute_and_reduce<32><<< numBlocks, threadsPerBlock, smemSize >>>(x_min, x_width, y_arr, d_dens_R_grid, zpp_growth_R_ct, d_sfrd_grid, d_ave_sfrd_buf, num_pixels); - // break; - // default: - // // LOG_WARNING("Thread size invalid; defaulting to 256.") - // compute_and_reduce<256><<< numBlocks, 256, 256 * sizeof(double) >>>(x_min, x_width, y_arr, d_dens_R_grid, zpp_growth_R_ct, d_sfrd_grid, d_ave_sfrd_buf, num_pixels); - // } LOG_INFO("kernel called."); // Only use during development! @@ -342,14 +276,9 @@ double calculate_sfrd_from_grid_gpu( } // Deallocate device memory. - // -------------- CODE FOR CORRUPTION BUG WORKAROUND - cudaFree(d_x_min); - cudaFree(d_x_width); - cudaFree(d_y_arr); - // -------------- CODE FOR CORRUPTION BUG WORKAROUND - // cudaFree(x_min); - // cudaFree(x_width); - // cudaFree(y_arr); + cudaFree(x_min); + cudaFree(x_width); + cudaFree(y_arr); cudaFree(d_dens_R_grid); cudaFree(d_sfrd_grid); cudaFree(d_ave_sfrd_buf); diff --git a/src/py21cmfast/src/SpinTemperatureBox.h b/src/py21cmfast/src/SpinTemperatureBox.h index c2043aad0..9a3448d2d 100644 --- a/src/py21cmfast/src/SpinTemperatureBox.h +++ b/src/py21cmfast/src/SpinTemperatureBox.h @@ -23,13 +23,8 @@ int UpdateXraySourceBox(UserParams *user_params, CosmoParams *cosmo_params, void calculate_sfrd_from_grid(int R_ct, float *dens_R_grid, float *Mcrit_R_grid, float *sfrd_grid, float *sfrd_grid_mini, double *ave_sfrd, double *ave_sfrd_mini); -// double calculate_sfrd_from_grid_gpu(RGTable1D_f *SFRD_conditional_table, float *dens_R_grid, double *zpp_growth, -// int R_ct, float *sfrd_grid, unsigned int num_pixels); - -// -------------- CODE FOR CORRUPTION BUG WORKAROUND -double calculate_sfrd_from_grid_gpu(double x_min, double x_width, int n_bin, float* y_arr, float *dens_R_grid, double *zpp_growth, +double calculate_sfrd_from_grid_gpu(RGTable1D_f *SFRD_conditional_table, float *dens_R_grid, double *zpp_growth, int R_ct, float *sfrd_grid, unsigned int num_pixels); -// -------------- CODE FOR CORRUPTION BUG WORKAROUND #ifdef __cplusplus } From 456446dcbd90440ce51878af1900ac6bb4ed83ba Mon Sep 17 00:00:00 2001 From: alserene Date: Sun, 17 Nov 2024 00:24:55 +1100 Subject: [PATCH 031/145] Reuse memory for unchanged arrays. --- src/py21cmfast/src/SpinTemperatureBox.c | 19 +- src/py21cmfast/src/SpinTemperatureBox.cu | 215 +++++++++++++---------- src/py21cmfast/src/SpinTemperatureBox.h | 12 +- 3 files changed, 145 insertions(+), 101 deletions(-) diff --git a/src/py21cmfast/src/SpinTemperatureBox.c b/src/py21cmfast/src/SpinTemperatureBox.c index bd9f4ada4..be31027fe 100644 --- a/src/py21cmfast/src/SpinTemperatureBox.c +++ b/src/py21cmfast/src/SpinTemperatureBox.c @@ -914,7 +914,8 @@ int global_reion_properties(double zp, double x_e_ave, double *log10_Mcrit_LW_av } void calculate_sfrd_from_grid(int R_ct, float *dens_R_grid, float *Mcrit_R_grid, float *sfrd_grid, - float *sfrd_grid_mini, double *ave_sfrd, double *ave_sfrd_mini){ + float *sfrd_grid_mini, double *ave_sfrd, double *ave_sfrd_mini, + float *d_y_arr, float *d_dens_R_grid, float *d_sfrd_grid, double *d_ave_sfrd_buf){ double ave_sfrd_buf=0; double ave_sfrd_buf_mini=0; if(user_params_global->INTEGRATION_METHOD_ATOMIC == 1 || (flag_options_global->USE_MINI_HALOS && user_params_global->INTEGRATION_METHOD_MINI == 1)) @@ -938,8 +939,12 @@ void calculate_sfrd_from_grid(int R_ct, float *dens_R_grid, float *Mcrit_R_grid, // -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- // If GPU is to be used and flags are ideal, call GPU version of reduction if (true && flag_options_global->USE_MASS_DEPENDENT_ZETA && user_params_global->USE_INTERPOLATION_TABLES && !flag_options_global->USE_MINI_HALOS) { + RGTable1D_f* SFRD_conditional_table = get_SFRD_conditional_table(); - ave_sfrd_buf = calculate_sfrd_from_grid_gpu(SFRD_conditional_table, dens_R_grid, zpp_growth, R_ct, sfrd_grid, HII_TOT_NUM_PIXELS); + // ave_sfrd_buf = calculate_sfrd_from_grid_gpu(SFRD_conditional_table, dens_R_grid, zpp_growth, R_ct, sfrd_grid, HII_TOT_NUM_PIXELS); + ave_sfrd_buf = calculate_sfrd_from_grid_gpu(SFRD_conditional_table, dens_R_grid, zpp_growth, R_ct, sfrd_grid, HII_TOT_NUM_PIXELS, + d_y_arr, d_dens_R_grid, d_sfrd_grid, d_ave_sfrd_buf); + } else { // Else, run CPU reduction #pragma omp parallel num_threads(user_params_global->N_THREADS) @@ -1418,8 +1423,15 @@ void ts_main(float redshift, float prev_redshift, UserParams *user_params, Cosmo float *delta_box_input; float *Mcrit_box_input = NULL; //may be unused + // Device pointers that reference GPU memory and need to persist across loop iterations + float *d_y_arr = NULL; + float *d_dens_R_grid = NULL; + float *d_sfrd_grid = NULL; + double *d_ave_sfrd_buf = NULL; + //if we have stars, fill in the heating term boxes if(!NO_LIGHT) { + // R_ct starts at 39 and goes down to 0 for(R_ct=global_params.NUM_FILTER_STEPS_FOR_Ts; R_ct--;){ dzpp_for_evolve = dzpp_list[R_ct]; zpp = zpp_for_evolve_list[R_ct]; @@ -1458,7 +1470,8 @@ void ts_main(float redshift, float prev_redshift, UserParams *user_params, Cosmo if(flag_options->USE_MINI_HALOS){ Mcrit_box_input = log10_Mcrit_LW[R_index]; } - calculate_sfrd_from_grid(R_ct,delta_box_input,Mcrit_box_input,del_fcoll_Rct,del_fcoll_Rct_MINI,&ave_fcoll,&ave_fcoll_MINI); + calculate_sfrd_from_grid(R_ct, delta_box_input, Mcrit_box_input, del_fcoll_Rct, del_fcoll_Rct_MINI, &ave_fcoll, &ave_fcoll_MINI, d_y_arr, d_dens_R_grid, d_sfrd_grid, d_ave_sfrd_buf); + // calculate_sfrd_from_grid(R_ct,delta_box_input,Mcrit_box_input,del_fcoll_Rct,del_fcoll_Rct_MINI,&ave_fcoll,&ave_fcoll_MINI); avg_fix_term = mean_sfr_zpp[R_ct]/ave_fcoll; if(flag_options->USE_MINI_HALOS) avg_fix_term_MINI = mean_sfr_zpp_mini[R_ct]/ave_fcoll_MINI; diff --git a/src/py21cmfast/src/SpinTemperatureBox.cu b/src/py21cmfast/src/SpinTemperatureBox.cu index fb9438ea0..0f325e792 100644 --- a/src/py21cmfast/src/SpinTemperatureBox.cu +++ b/src/py21cmfast/src/SpinTemperatureBox.cu @@ -35,12 +35,12 @@ #include "SpinTemperatureBox.h" -__device__ inline double EvaluateRGTable1D_f_gpu(double x, double *x_min, double *x_width, float *y_arr) { +__device__ inline double EvaluateRGTable1D_f_gpu(double x, double x_min, double x_width, float *y_arr) { - int idx = (int)floor((x - *x_min) / *x_width); + int idx = (int)floor((x - x_min) / x_width); - double table_val = *x_min + *x_width * (float)idx; - double interp_point = (x - table_val) / *x_width; + double table_val = x_min + x_width * (float)idx; + double interp_point = (x - table_val) / x_width; return y_arr[idx] * (1 - interp_point) + y_arr[idx + 1] * (interp_point); } @@ -59,8 +59,8 @@ __device__ void warp_reduce(volatile double *sdata, unsigned int tid) { template __global__ void compute_and_reduce( - double *x_min, // reference - double *x_width, // reference + double x_min, // reference + double x_width, // reference float *y_arr, // reference float *dens_R_grid, // reference double zpp_growth_R_ct, // value @@ -122,70 +122,81 @@ double calculate_sfrd_from_grid_gpu( RGTable1D_f *SFRD_conditional_table, // input data float *dens_R_grid, // input data double *zpp_growth, // input data - int R_ct, // input data + int R_ct, // filter step/loop iteration/spherical annuli (out of 40 by default) float *sfrd_grid, // star formation rate density grid to be updated - unsigned int num_pixels // length of input data + unsigned int num_pixels, // length of input data + float *d_y_arr, float *d_dens_R_grid, float *d_sfrd_grid, double *d_ave_sfrd_buf // device pointers ) { cudaError_t err = cudaGetLastError(); - // Input data - double zpp_growth_R_ct = zpp_growth[R_ct]; - - // The kernel only needs access to some fields of the SFRD_conditional_table struct - // so we allocate device memory and copy data only for required fields. - - // Create device pointers - double *x_min, *x_width; - float *y_arr; - - // Allocate device memory - cudaMalloc(&x_min, sizeof(double)); // TODO: don't allocate, just pass in? - cudaMalloc(&x_width, sizeof(double)); - cudaMalloc(&y_arr, sizeof(float) * SFRD_conditional_table->n_bin); - LOG_INFO("SFRD_conditional_table fields allocated on device."); + // Set bools for initial and final filtering steps to allow for memory reuse + bool initial_filter_step = false; + bool final_filter_step = false; + + // Default NUM_FILTER_STEPS_FOR_Ts = 40 + if (global_params.NUM_FILTER_STEPS_FOR_Ts - 1 == R_ct) { + initial_filter_step = true; + } else if (R_ct == 0) { + final_filter_step = true; + } else if (global_params.NUM_FILTER_STEPS_FOR_Ts == 1) { + // Would case of NUM_FILTER_STEPS_FOR_Ts = 1 ever occur? + initial_filter_step = true; + final_filter_step = true; + } - LOG_INFO("x_min: %f, x_width: %f, n_bin: %d", SFRD_conditional_table->x_min, SFRD_conditional_table->x_width, SFRD_conditional_table->n_bin); + // Get growth factor for current filter step + double zpp_growth_R_ct = zpp_growth[R_ct]; - // Copy data from host to device - err = cudaMemcpy(x_min, &SFRD_conditional_table->x_min, sizeof(double), cudaMemcpyHostToDevice); // Can also pass in - if (err != cudaSuccess) { - LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); - Throw(CUDAError); + // ============================================================ <- these pointers need to persist across kernel calls, i.e. across loop iterations! + // Device pointers are initialised before loop + // if (initial_filter_step) { + // float *d_y_arr, *d_dens_R_grid, *d_sfrd_grid; + // double* d_ave_sfrd_buf; + // } + // ============================================================ + + // Allocate device memory ------------------------------------------------------------------------------------------ + if (initial_filter_step) { + err = cudaMalloc((void**)&d_y_arr, sizeof(float) * SFRD_conditional_table->n_bin); + if (err != cudaSuccess) { + LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); + Throw(CUDAError); + } + err = cudaMalloc((void**)&d_dens_R_grid, sizeof(float) * num_pixels); + if (err != cudaSuccess) { + LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); + Throw(CUDAError); + } + err = cudaMalloc((void**)&d_sfrd_grid, sizeof(float) * num_pixels); + if (err != cudaSuccess) { + LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); + Throw(CUDAError); + } + LOG_INFO("SFRD_conditional_table.y_arr and density and sfrd grids allocated on device."); } - err = cudaMemcpy(x_width, &SFRD_conditional_table->x_width, sizeof(double), cudaMemcpyHostToDevice); // Can also pass in + // Copy data from host to device ----------------------------------------------------------------------------------- + err = cudaMemcpy(d_y_arr, SFRD_conditional_table->y_arr, sizeof(float) * SFRD_conditional_table->n_bin, cudaMemcpyHostToDevice); if (err != cudaSuccess) { LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); Throw(CUDAError); } - - err = cudaMemcpy(y_arr, SFRD_conditional_table->y_arr, sizeof(float) * SFRD_conditional_table->n_bin, cudaMemcpyHostToDevice); + err = cudaMemcpy(d_dens_R_grid, dens_R_grid, sizeof(float) * num_pixels, cudaMemcpyHostToDevice); // TODO: Does this change between filter steps? if (err != cudaSuccess) { LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); Throw(CUDAError); } - - LOG_INFO("SFRD_conditional_table fields copied to device."); - - // Allocate & populate device memory for other inputs. - - // Create device pointers - float *d_dens_R_grid, *d_sfrd_grid; - - // Allocate device memory - cudaMalloc(&d_dens_R_grid, sizeof(float) * num_pixels); - cudaMalloc(&d_sfrd_grid, sizeof(float) * num_pixels); - LOG_INFO("density and sfrd grids allocated on device."); - - // Copy data from host to device - cudaMemcpy(d_dens_R_grid, dens_R_grid, sizeof(float) * num_pixels, cudaMemcpyHostToDevice); - cudaMemcpy(d_sfrd_grid, sfrd_grid, sizeof(float) * num_pixels, cudaMemcpyHostToDevice); - LOG_INFO("density and sfrd grids copied to device."); - - err = cudaGetLastError(); - if (err != cudaSuccess) { - LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); - Throw(CUDAError); + // In initial filter step, set array to 0; + // for future steps, previous array values will be written over + if (initial_filter_step) { + err = cudaMemset(d_sfrd_grid, 0, sizeof(float) * num_pixels); // fill with byte=0 + if (err != cudaSuccess) { + LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); + Throw(CUDAError); + } + LOG_INFO("SFRD_conditional_table.y_arr and density and sfrd grids copied to device."); + } else { + LOG_INFO("SFRD_conditional_table.y_arr and density grid copied to device."); } // Get max threads/block for device @@ -208,47 +219,51 @@ double calculate_sfrd_from_grid_gpu( threadsPerBlock = 16; } int numBlocks = (num_pixels + threadsPerBlock - 1) / threadsPerBlock; // 91m & 256 -> 355959 - int smemSize = threadsPerBlock * sizeof(double); // shared memory + int smemSize = threadsPerBlock * sizeof(double); // shared memory - // Allocate device memory for output buffer and set to 0 - double* d_ave_sfrd_buf; + // Allocate memory for SFRD sum buffer and initialise to 0 only for initial filter step; + // reuse memory for remaining filter steps. unsigned int buffer_length = ceil(num_pixels / (threadsPerBlock * 2)); - cudaMalloc(&d_ave_sfrd_buf, sizeof(double) * buffer_length); // 91m & 256 -> 177979 - LOG_INFO("buffer allocated on device."); - // cudaMalloc((void**)&d_ave_sfrd_buf, sizeof(double) * buffer_length); // TODO: should I be using this instead? - cudaMemset(d_ave_sfrd_buf, 0, sizeof(double) * buffer_length); // fill with byte=0 - LOG_INFO("buffer copied to device."); - - err = cudaGetLastError(); - if (err != cudaSuccess) { - LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); - Throw(CUDAError); + if (initial_filter_step) { + err = cudaMalloc((void**)&d_ave_sfrd_buf, sizeof(double) * buffer_length); // 91m & 256 -> 177979 + if (err != cudaSuccess) { + LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); + Throw(CUDAError); + } + LOG_INFO("SFRD sum reduction buffer allocated on device."); + + err = cudaMemset(d_ave_sfrd_buf, 0, sizeof(double) * buffer_length); // fill with byte=0 + if (err != cudaSuccess) { + LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); + Throw(CUDAError); + } + LOG_INFO("SFRD sum reduction buffer initialised to 0."); } // Invoke kernel switch (threadsPerBlock) { case 512: - compute_and_reduce<512><<< numBlocks, threadsPerBlock, smemSize >>>(x_min, x_width, y_arr, d_dens_R_grid, zpp_growth_R_ct, d_sfrd_grid, d_ave_sfrd_buf, num_pixels); + compute_and_reduce<512><<< numBlocks, threadsPerBlock, smemSize >>>(SFRD_conditional_table->x_min, SFRD_conditional_table->x_width, d_y_arr, d_dens_R_grid, zpp_growth_R_ct, d_sfrd_grid, d_ave_sfrd_buf, num_pixels); break; case 256: - compute_and_reduce<256><<< numBlocks, threadsPerBlock, smemSize >>>(x_min, x_width, y_arr, d_dens_R_grid, zpp_growth_R_ct, d_sfrd_grid, d_ave_sfrd_buf, num_pixels); + compute_and_reduce<256><<< numBlocks, threadsPerBlock, smemSize >>>(SFRD_conditional_table->x_min, SFRD_conditional_table->x_width, d_y_arr, d_dens_R_grid, zpp_growth_R_ct, d_sfrd_grid, d_ave_sfrd_buf, num_pixels); break; case 128: - compute_and_reduce<128><<< numBlocks, threadsPerBlock, smemSize >>>(x_min, x_width, y_arr, d_dens_R_grid, zpp_growth_R_ct, d_sfrd_grid, d_ave_sfrd_buf, num_pixels); + compute_and_reduce<128><<< numBlocks, threadsPerBlock, smemSize >>>(SFRD_conditional_table->x_min, SFRD_conditional_table->x_width, d_y_arr, d_dens_R_grid, zpp_growth_R_ct, d_sfrd_grid, d_ave_sfrd_buf, num_pixels); break; case 64: - compute_and_reduce<64><<< numBlocks, threadsPerBlock, smemSize >>>(x_min, x_width, y_arr, d_dens_R_grid, zpp_growth_R_ct, d_sfrd_grid, d_ave_sfrd_buf, num_pixels); + compute_and_reduce<64><<< numBlocks, threadsPerBlock, smemSize >>>(SFRD_conditional_table->x_min, SFRD_conditional_table->x_width, d_y_arr, d_dens_R_grid, zpp_growth_R_ct, d_sfrd_grid, d_ave_sfrd_buf, num_pixels); break; case 32: - compute_and_reduce<32><<< numBlocks, threadsPerBlock, smemSize >>>(x_min, x_width, y_arr, d_dens_R_grid, zpp_growth_R_ct, d_sfrd_grid, d_ave_sfrd_buf, num_pixels); + compute_and_reduce<32><<< numBlocks, threadsPerBlock, smemSize >>>(SFRD_conditional_table->x_min, SFRD_conditional_table->x_width, d_y_arr, d_dens_R_grid, zpp_growth_R_ct, d_sfrd_grid, d_ave_sfrd_buf, num_pixels); break; default: - // LOG_WARNING("Thread size invalid; defaulting to 256.") - compute_and_reduce<256><<< numBlocks, 256, 256 * sizeof(double) >>>(x_min, x_width, y_arr, d_dens_R_grid, zpp_growth_R_ct, d_sfrd_grid, d_ave_sfrd_buf, num_pixels); + LOG_WARNING("Thread size invalid; defaulting to 256."); + compute_and_reduce<256><<< numBlocks, 256, 256 * sizeof(double) >>>(SFRD_conditional_table->x_min, SFRD_conditional_table->x_width, d_y_arr, d_dens_R_grid, zpp_growth_R_ct, d_sfrd_grid, d_ave_sfrd_buf, num_pixels); } - LOG_INFO("kernel called."); + LOG_INFO("SpinTemperatureBox compute-and-reduce kernel called."); - // Only use during development! + // Only use during development? err = cudaDeviceSynchronize(); CATCH_CUDA_ERROR(err); @@ -258,37 +273,45 @@ double calculate_sfrd_from_grid_gpu( Throw(CUDAError); } - // Use thrust to reduce computed buffer values to one value. + // Use thrust to reduce computed sums to one value // Wrap device pointer in a thrust::device_ptr thrust::device_ptr d_ave_sfrd_buf_ptr(d_ave_sfrd_buf); - // Reduce final buffer values to one value + // Reduce final buffer sums to one value double ave_sfrd_buf = thrust::reduce(d_ave_sfrd_buf_ptr, d_ave_sfrd_buf_ptr + buffer_length, 0., thrust::plus()); - LOG_INFO("thrust reduced buffer."); + LOG_INFO("SFRD sum reduced to single value by thrust::reduce operation."); // Copy results from device to host. err = cudaMemcpy(sfrd_grid, d_sfrd_grid, sizeof(float) * num_pixels, cudaMemcpyDeviceToHost); - LOG_INFO("buffer copied to host."); - if (err != cudaSuccess) { LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); Throw(CUDAError); } - - // Deallocate device memory. - cudaFree(x_min); - cudaFree(x_width); - cudaFree(y_arr); - cudaFree(d_dens_R_grid); - cudaFree(d_sfrd_grid); - cudaFree(d_ave_sfrd_buf); - - LOG_INFO("remaining memory freed."); - - err = cudaGetLastError(); - if (err != cudaSuccess) { - LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); - Throw(CUDAError); + LOG_INFO("SFRD sum copied to host."); + + // Deallocate device memory on final filter step. + if (final_filter_step) { + err = cudaFree(d_y_arr); + if (err != cudaSuccess) { + LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); + Throw(CUDAError); + } + err = cudaFree(d_dens_R_grid); + if (err != cudaSuccess) { + LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); + Throw(CUDAError); + } + err = cudaFree(d_sfrd_grid); + if (err != cudaSuccess) { + LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); + Throw(CUDAError); + } + err = cudaFree(d_ave_sfrd_buf); + if (err != cudaSuccess) { + LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); + Throw(CUDAError); + } + LOG_INFO("Device memory freed."); } return ave_sfrd_buf; diff --git a/src/py21cmfast/src/SpinTemperatureBox.h b/src/py21cmfast/src/SpinTemperatureBox.h index 9a3448d2d..687e3cdeb 100644 --- a/src/py21cmfast/src/SpinTemperatureBox.h +++ b/src/py21cmfast/src/SpinTemperatureBox.h @@ -20,11 +20,19 @@ int UpdateXraySourceBox(UserParams *user_params, CosmoParams *cosmo_params, AstroParams *astro_params, FlagOptions *flag_options, HaloBox *halobox, double R_inner, double R_outer, int R_ct, XraySourceBox *source_box); +// void calculate_sfrd_from_grid(int R_ct, float *dens_R_grid, float *Mcrit_R_grid, float *sfrd_grid, +// float *sfrd_grid_mini, double *ave_sfrd, double *ave_sfrd_mini); + +// double calculate_sfrd_from_grid_gpu(RGTable1D_f *SFRD_conditional_table, float *dens_R_grid, double *zpp_growth, +// int R_ct, float *sfrd_grid, unsigned int num_pixels); + void calculate_sfrd_from_grid(int R_ct, float *dens_R_grid, float *Mcrit_R_grid, float *sfrd_grid, - float *sfrd_grid_mini, double *ave_sfrd, double *ave_sfrd_mini); + float *sfrd_grid_mini, double *ave_sfrd, double *ave_sfrd_mini, + float *d_y_arr, float *d_dens_R_grid, float *d_sfrd_grid, double *d_ave_sfrd_buf); double calculate_sfrd_from_grid_gpu(RGTable1D_f *SFRD_conditional_table, float *dens_R_grid, double *zpp_growth, - int R_ct, float *sfrd_grid, unsigned int num_pixels); + int R_ct, float *sfrd_grid, unsigned int num_pixels, + float *d_y_arr, float *d_dens_R_grid, float *d_sfrd_grid, double *d_ave_sfrd_buf); #ifdef __cplusplus } From 4c3800c37b483233945f112c451e63f6ce5775ba Mon Sep 17 00:00:00 2001 From: alserene Date: Thu, 21 Nov 2024 07:37:16 +1100 Subject: [PATCH 032/145] Fix pointer passing issue. --- src/py21cmfast/src/SpinTemperatureBox.c | 63 ++- src/py21cmfast/src/SpinTemperatureBox.cu | 607 +++++++++++++++++++---- src/py21cmfast/src/SpinTemperatureBox.h | 39 +- 3 files changed, 583 insertions(+), 126 deletions(-) diff --git a/src/py21cmfast/src/SpinTemperatureBox.c b/src/py21cmfast/src/SpinTemperatureBox.c index be31027fe..52b15df09 100644 --- a/src/py21cmfast/src/SpinTemperatureBox.c +++ b/src/py21cmfast/src/SpinTemperatureBox.c @@ -914,8 +914,9 @@ int global_reion_properties(double zp, double x_e_ave, double *log10_Mcrit_LW_av } void calculate_sfrd_from_grid(int R_ct, float *dens_R_grid, float *Mcrit_R_grid, float *sfrd_grid, - float *sfrd_grid_mini, double *ave_sfrd, double *ave_sfrd_mini, - float *d_y_arr, float *d_dens_R_grid, float *d_sfrd_grid, double *d_ave_sfrd_buf){ + float *sfrd_grid_mini, double *ave_sfrd, double *ave_sfrd_mini, + unsigned int threadsPerBlock, // const sfrd_gpu_data *d_data){ + float *d_y_arr, float *d_dens_R_grid, float *d_sfrd_grid, double *d_ave_sfrd_buf){ double ave_sfrd_buf=0; double ave_sfrd_buf_mini=0; if(user_params_global->INTEGRATION_METHOD_ATOMIC == 1 || (flag_options_global->USE_MINI_HALOS && user_params_global->INTEGRATION_METHOD_MINI == 1)) @@ -942,9 +943,12 @@ void calculate_sfrd_from_grid(int R_ct, float *dens_R_grid, float *Mcrit_R_grid, RGTable1D_f* SFRD_conditional_table = get_SFRD_conditional_table(); // ave_sfrd_buf = calculate_sfrd_from_grid_gpu(SFRD_conditional_table, dens_R_grid, zpp_growth, R_ct, sfrd_grid, HII_TOT_NUM_PIXELS); - ave_sfrd_buf = calculate_sfrd_from_grid_gpu(SFRD_conditional_table, dens_R_grid, zpp_growth, R_ct, sfrd_grid, HII_TOT_NUM_PIXELS, - d_y_arr, d_dens_R_grid, d_sfrd_grid, d_ave_sfrd_buf); - + // ave_sfrd_buf = calculate_sfrd_from_grid_gpu(SFRD_conditional_table, dens_R_grid, zpp_growth, R_ct, sfrd_grid, HII_TOT_NUM_PIXELS, + // d_y_arr, d_dens_R_grid, d_sfrd_grid, d_ave_sfrd_buf); + ave_sfrd_buf = calculate_sfrd_from_grid_gpu(SFRD_conditional_table, dens_R_grid, zpp_growth, R_ct, sfrd_grid, HII_TOT_NUM_PIXELS, threadsPerBlock, + // d_data + d_y_arr, d_dens_R_grid, d_sfrd_grid, d_ave_sfrd_buf + ); } else { // Else, run CPU reduction #pragma omp parallel num_threads(user_params_global->N_THREADS) @@ -1423,14 +1427,38 @@ void ts_main(float redshift, float prev_redshift, UserParams *user_params, Cosmo float *delta_box_input; float *Mcrit_box_input = NULL; //may be unused - // Device pointers that reference GPU memory and need to persist across loop iterations - float *d_y_arr = NULL; - float *d_dens_R_grid = NULL; - float *d_sfrd_grid = NULL; - double *d_ave_sfrd_buf = NULL; - //if we have stars, fill in the heating term boxes if(!NO_LIGHT) { + + // Device pointers that reference GPU memory and need to persist across loop iterations ------------------------------------------------------------------------- + float *d_y_arr = NULL; + float *d_dens_R_grid = NULL; + float *d_sfrd_grid = NULL; + double *d_ave_sfrd_buf = NULL; + + // initialise pointer to struct of pointers ---------------------------------------------------------------------------------------------------------------------- + // sfrd_gpu_data *device_data; + // sfrd_gpu_data *device_data = (sfrd_gpu_data *)malloc(sizeof(sfrd_gpu_data)); + unsigned int threadsPerBlock = 0; + unsigned int sfrd_nbins = get_nbins(); + + // GPU=True + // if (true) { + // // unsigned int init_sfrd_gpu_data(float *dens_R_grid, float *sfrd_grid, unsigned long long num_pixels, + // // unsigned int nbins, sfrd_gpu_data *d_data); + // threadsPerBlock = init_sfrd_gpu_data(delta_box_input, del_fcoll_Rct, HII_TOT_NUM_PIXELS, sfrd_nbins, &device_data); + // } + // struct --------------------------------------------------------------------------------------------------------------------------------------------------------- + // threadsPerBlock = init_sfrd_gpu_data(delta_box_input, del_fcoll_Rct, HII_TOT_NUM_PIXELS, sfrd_nbins, &device_data); + // pointers ------------------------------------------------------------------------------------------------------------------------------------------------------- + threadsPerBlock = init_sfrd_gpu_data(delta_box_input, del_fcoll_Rct, HII_TOT_NUM_PIXELS, sfrd_nbins, &d_y_arr, &d_dens_R_grid, &d_sfrd_grid, &d_ave_sfrd_buf); + // threadsPerBlock = init_sfrd_gpu_data(delta_box_input, del_fcoll_Rct, HII_TOT_NUM_PIXELS, sfrd_nbins, d_y_arr, d_dens_R_grid, d_sfrd_grid, d_ave_sfrd_buf); + if (threadsPerBlock == 0) { + LOG_DEBUG("Memory allocation failed inside init_sfrd_gpu_data."); + } else { + LOG_DEBUG("threadsPerBlock = %u", threadsPerBlock); + } // --------------------------------------------------------------------------------------------------------------------------------------------------------------- + // R_ct starts at 39 and goes down to 0 for(R_ct=global_params.NUM_FILTER_STEPS_FOR_Ts; R_ct--;){ dzpp_for_evolve = dzpp_list[R_ct]; @@ -1470,8 +1498,13 @@ void ts_main(float redshift, float prev_redshift, UserParams *user_params, Cosmo if(flag_options->USE_MINI_HALOS){ Mcrit_box_input = log10_Mcrit_LW[R_index]; } - calculate_sfrd_from_grid(R_ct, delta_box_input, Mcrit_box_input, del_fcoll_Rct, del_fcoll_Rct_MINI, &ave_fcoll, &ave_fcoll_MINI, d_y_arr, d_dens_R_grid, d_sfrd_grid, d_ave_sfrd_buf); + // struct ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ + // calculate_sfrd_from_grid(R_ct, delta_box_input, Mcrit_box_input, del_fcoll_Rct, del_fcoll_Rct_MINI, &ave_fcoll, &ave_fcoll_MINI, threadsPerBlock, device_data); + // pointers ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + calculate_sfrd_from_grid(R_ct, delta_box_input, Mcrit_box_input, del_fcoll_Rct, del_fcoll_Rct_MINI, &ave_fcoll, &ave_fcoll_MINI, threadsPerBlock, d_y_arr, d_dens_R_grid, d_sfrd_grid, d_ave_sfrd_buf); + // calculate_sfrd_from_grid(R_ct, delta_box_input, Mcrit_box_input, del_fcoll_Rct, del_fcoll_Rct_MINI, &ave_fcoll, &ave_fcoll_MINI, d_y_arr, d_dens_R_grid, d_sfrd_grid, d_ave_sfrd_buf); // calculate_sfrd_from_grid(R_ct,delta_box_input,Mcrit_box_input,del_fcoll_Rct,del_fcoll_Rct_MINI,&ave_fcoll,&ave_fcoll_MINI); + // ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- avg_fix_term = mean_sfr_zpp[R_ct]/ave_fcoll; if(flag_options->USE_MINI_HALOS) avg_fix_term_MINI = mean_sfr_zpp_mini[R_ct]/ave_fcoll_MINI; @@ -1582,6 +1615,12 @@ void ts_main(float redshift, float prev_redshift, UserParams *user_params, Cosmo } } } + // struct ------------------------------------------------------------------------------------------------------------------------------------------------------------------ + // free_sfrd_gpu_data(device_data); + // free(device_data); + // pointers ---------------------------------------------------------------------------------------------------------------------------------------------------------------- + free_sfrd_gpu_data(&d_y_arr, &d_dens_R_grid, &d_sfrd_grid, &d_ave_sfrd_buf); + // ------------------------------------------------------------------------------------------------------------------------------------------------------------------------- } //we definitely don't need these tables anymore diff --git a/src/py21cmfast/src/SpinTemperatureBox.cu b/src/py21cmfast/src/SpinTemperatureBox.cu index 0f325e792..8c88cd1f1 100644 --- a/src/py21cmfast/src/SpinTemperatureBox.cu +++ b/src/py21cmfast/src/SpinTemperatureBox.cu @@ -118,93 +118,51 @@ __global__ void compute_and_reduce( if (tid == 0) ave_sfrd_buf[blockIdx.x] = sdata[0]; } -double calculate_sfrd_from_grid_gpu( - RGTable1D_f *SFRD_conditional_table, // input data +unsigned int init_sfrd_gpu_data( float *dens_R_grid, // input data - double *zpp_growth, // input data - int R_ct, // filter step/loop iteration/spherical annuli (out of 40 by default) float *sfrd_grid, // star formation rate density grid to be updated - unsigned int num_pixels, // length of input data - float *d_y_arr, float *d_dens_R_grid, float *d_sfrd_grid, double *d_ave_sfrd_buf // device pointers + unsigned long long num_pixels, // length of input data + unsigned int nbins, // nbins for sfrd_grid->y + float **d_y_arr, // copies of pointers to pointers + float **d_dens_R_grid, + float **d_sfrd_grid, + double **d_ave_sfrd_buf ) { cudaError_t err = cudaGetLastError(); - // Set bools for initial and final filtering steps to allow for memory reuse - bool initial_filter_step = false; - bool final_filter_step = false; - - // Default NUM_FILTER_STEPS_FOR_Ts = 40 - if (global_params.NUM_FILTER_STEPS_FOR_Ts - 1 == R_ct) { - initial_filter_step = true; - } else if (R_ct == 0) { - final_filter_step = true; - } else if (global_params.NUM_FILTER_STEPS_FOR_Ts == 1) { - // Would case of NUM_FILTER_STEPS_FOR_Ts = 1 ever occur? - initial_filter_step = true; - final_filter_step = true; - } - - // Get growth factor for current filter step - double zpp_growth_R_ct = zpp_growth[R_ct]; - - // ============================================================ <- these pointers need to persist across kernel calls, i.e. across loop iterations! - // Device pointers are initialised before loop - // if (initial_filter_step) { - // float *d_y_arr, *d_dens_R_grid, *d_sfrd_grid; - // double* d_ave_sfrd_buf; - // } - // ============================================================ - // Allocate device memory ------------------------------------------------------------------------------------------ - if (initial_filter_step) { - err = cudaMalloc((void**)&d_y_arr, sizeof(float) * SFRD_conditional_table->n_bin); - if (err != cudaSuccess) { - LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); - Throw(CUDAError); - } - err = cudaMalloc((void**)&d_dens_R_grid, sizeof(float) * num_pixels); - if (err != cudaSuccess) { - LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); - Throw(CUDAError); - } - err = cudaMalloc((void**)&d_sfrd_grid, sizeof(float) * num_pixels); - if (err != cudaSuccess) { - LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); - Throw(CUDAError); - } - LOG_INFO("SFRD_conditional_table.y_arr and density and sfrd grids allocated on device."); + err = cudaMalloc((void**)d_y_arr, sizeof(float) * nbins); // already pointers to pointers (no &) + if (err != cudaSuccess) { + LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); + Throw(CUDAError); } - - // Copy data from host to device ----------------------------------------------------------------------------------- - err = cudaMemcpy(d_y_arr, SFRD_conditional_table->y_arr, sizeof(float) * SFRD_conditional_table->n_bin, cudaMemcpyHostToDevice); + err = cudaMalloc((void**)d_dens_R_grid, sizeof(float) * num_pixels); if (err != cudaSuccess) { LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); Throw(CUDAError); } - err = cudaMemcpy(d_dens_R_grid, dens_R_grid, sizeof(float) * num_pixels, cudaMemcpyHostToDevice); // TODO: Does this change between filter steps? + err = cudaMalloc((void**)d_sfrd_grid, sizeof(float) * num_pixels); if (err != cudaSuccess) { LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); Throw(CUDAError); } - // In initial filter step, set array to 0; - // for future steps, previous array values will be written over - if (initial_filter_step) { - err = cudaMemset(d_sfrd_grid, 0, sizeof(float) * num_pixels); // fill with byte=0 - if (err != cudaSuccess) { - LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); - Throw(CUDAError); - } - LOG_INFO("SFRD_conditional_table.y_arr and density and sfrd grids copied to device."); - } else { - LOG_INFO("SFRD_conditional_table.y_arr and density grid copied to device."); + LOG_INFO("SFRD_conditional_table.y_arr and density and sfrd grids allocated on device."); + + // Initialise sfrd_grid to 0 ---------------------------------------------------------------------------------------- + // dereference the pointers to pointers (*) + err = cudaMemset(*d_sfrd_grid, 0, sizeof(float) * num_pixels); // fill with byte=0 + if (err != cudaSuccess) { + LOG_ERROR("CUDA error: %s: %p", cudaGetErrorString(err), d_sfrd_grid); + Throw(CUDAError); } + LOG_INFO("density grid copied to device. sfrd grid initialised to 0."); // Get max threads/block for device int maxThreadsPerBlock; cudaDeviceGetAttribute(&maxThreadsPerBlock, cudaDevAttrMaxThreadsPerBlock, 0); // Set threads/block based on device max - int threadsPerBlock; + unsigned int threadsPerBlock; if (maxThreadsPerBlock >= 512) { threadsPerBlock = 512; } else if (maxThreadsPerBlock >= 256) { @@ -218,27 +176,63 @@ double calculate_sfrd_from_grid_gpu( } else { threadsPerBlock = 16; } - int numBlocks = (num_pixels + threadsPerBlock - 1) / threadsPerBlock; // 91m & 256 -> 355959 - int smemSize = threadsPerBlock * sizeof(double); // shared memory // Allocate memory for SFRD sum buffer and initialise to 0 only for initial filter step; // reuse memory for remaining filter steps. unsigned int buffer_length = ceil(num_pixels / (threadsPerBlock * 2)); - if (initial_filter_step) { - err = cudaMalloc((void**)&d_ave_sfrd_buf, sizeof(double) * buffer_length); // 91m & 256 -> 177979 - if (err != cudaSuccess) { - LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); - Throw(CUDAError); - } - LOG_INFO("SFRD sum reduction buffer allocated on device."); - - err = cudaMemset(d_ave_sfrd_buf, 0, sizeof(double) * buffer_length); // fill with byte=0 - if (err != cudaSuccess) { - LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); - Throw(CUDAError); - } - LOG_INFO("SFRD sum reduction buffer initialised to 0."); + // already pointers to pointers (no &) + err = cudaMalloc((void**)d_ave_sfrd_buf, sizeof(double) * buffer_length); // 91m & 256 -> 177979 + if (err != cudaSuccess) { + LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); + Throw(CUDAError); + } + LOG_INFO("SFRD sum reduction buffer allocated on device."); + + // dereference the pointers to pointers (*) + err = cudaMemset(*d_ave_sfrd_buf, 0, sizeof(double) * buffer_length); // fill with byte=0 + if (err != cudaSuccess) { + LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); + Throw(CUDAError); } + LOG_INFO("SFRD sum reduction buffer initialised to 0."); + + return threadsPerBlock; +} + +double calculate_sfrd_from_grid_gpu( + RGTable1D_f *SFRD_conditional_table, // input data + float *dens_R_grid, // input data + double *zpp_growth, // input data + int R_ct, // filter step/loop iteration/spherical annuli (out of 40 by default) + float *sfrd_grid, // star formation rate density grid to be updated + unsigned long long num_pixels, // length of input data + unsigned int threadsPerBlock, // computed in init function + float *d_y_arr, + float *d_dens_R_grid, + float *d_sfrd_grid, + double *d_ave_sfrd_buf +) { + cudaError_t err = cudaGetLastError(); + + // Get growth factor for current filter step + double zpp_growth_R_ct = zpp_growth[R_ct]; + + // Copy data from host to device ----------------------------------------------------------------------------------- + err = cudaMemcpy(d_y_arr, SFRD_conditional_table->y_arr, sizeof(float) * SFRD_conditional_table->n_bin, cudaMemcpyHostToDevice); + if (err != cudaSuccess) { + LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); + Throw(CUDAError); + } + err = cudaMemcpy(d_dens_R_grid, dens_R_grid, sizeof(float) * num_pixels, cudaMemcpyHostToDevice); // TODO: Does this change between filter steps? + if (err != cudaSuccess) { + LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); + Throw(CUDAError); + } + LOG_INFO("SFRD_conditional_table.y_arr and density grid copied to device."); + + unsigned int numBlocks = (num_pixels + threadsPerBlock - 1) / threadsPerBlock; // 91m & 256 -> 355959 + unsigned int smemSize = threadsPerBlock * sizeof(double); // shared memory + unsigned int buffer_length = ceil(num_pixels / (threadsPerBlock * 2)); // Invoke kernel switch (threadsPerBlock) { @@ -289,30 +283,433 @@ double calculate_sfrd_from_grid_gpu( } LOG_INFO("SFRD sum copied to host."); - // Deallocate device memory on final filter step. - if (final_filter_step) { - err = cudaFree(d_y_arr); - if (err != cudaSuccess) { - LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); - Throw(CUDAError); - } - err = cudaFree(d_dens_R_grid); - if (err != cudaSuccess) { - LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); - Throw(CUDAError); - } - err = cudaFree(d_sfrd_grid); - if (err != cudaSuccess) { - LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); - Throw(CUDAError); - } - err = cudaFree(d_ave_sfrd_buf); - if (err != cudaSuccess) { - LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); - Throw(CUDAError); - } - LOG_INFO("Device memory freed."); + return ave_sfrd_buf; +} + +void free_sfrd_gpu_data( + float **d_y_arr, // copies of pointers to pointers + float **d_dens_R_grid, + float **d_sfrd_grid, + double **d_ave_sfrd_buf +) { + cudaError_t err = cudaGetLastError(); + + // Need to dereference the pointers to pointers + err = cudaFree(*d_y_arr); + if (err != cudaSuccess) { + LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); + Throw(CUDAError); + } + err = cudaFree(*d_dens_R_grid); + if (err != cudaSuccess) { + LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); + Throw(CUDAError); + } + err = cudaFree(*d_sfrd_grid); + if (err != cudaSuccess) { + LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); + Throw(CUDAError); + } + err = cudaFree(*d_ave_sfrd_buf); + if (err != cudaSuccess) { + LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); + Throw(CUDAError); } - return ave_sfrd_buf; + LOG_INFO("Device memory freed."); } + + +// ------------------------------------------------------------------------------------------------------------------------ +// unsigned int init_sfrd_gpu_data( +// // RGTable1D_f *SFRD_conditional_table, // input data +// float *dens_R_grid, // input data +// float *sfrd_grid, // star formation rate density grid to be updated +// unsigned long long num_pixels, // length of input data +// unsigned int nbins, // nbins for sfrd_grid->y +// sfrd_gpu_data *d_data // struct of device pointers +// ) { +// cudaError_t err = cudaGetLastError(); + +// // Allocate device memory ------------------------------------------------------------------------------------------ +// err = cudaMalloc((void**)&d_data->d_y_arr, sizeof(float) * nbins); +// if (err != cudaSuccess) { +// LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); +// Throw(CUDAError); +// } +// err = cudaMalloc((void**)&d_data->d_dens_R_grid, sizeof(float) * num_pixels); +// if (err != cudaSuccess) { +// LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); +// Throw(CUDAError); +// } +// err = cudaMalloc((void**)&d_data->d_sfrd_grid, sizeof(float) * num_pixels); +// if (err != cudaSuccess) { +// LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); +// Throw(CUDAError); +// } +// LOG_INFO("SFRD_conditional_table.y_arr and density and sfrd grids allocated on device."); + +// // Copy data from host to device / initialise to 0 -------------------------------------------------------------------- +// // err = cudaMemcpy(d_data->d_dens_R_grid, dens_R_grid, sizeof(float) * num_pixels, cudaMemcpyHostToDevice); // TODO: Does this change between filter steps? +// // if (err != cudaSuccess) { +// // LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); +// // Throw(CUDAError); +// // } +// err = cudaMemset(d_data->d_sfrd_grid, 0, sizeof(float) * num_pixels); // fill with byte=0 +// if (err != cudaSuccess) { +// LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); +// Throw(CUDAError); +// } +// LOG_INFO("density grid copied to device. sfrd grid initialised to 0."); + +// // Get max threads/block for device +// int maxThreadsPerBlock; +// cudaDeviceGetAttribute(&maxThreadsPerBlock, cudaDevAttrMaxThreadsPerBlock, 0); + +// // Set threads/block based on device max +// unsigned int threadsPerBlock; +// if (maxThreadsPerBlock >= 512) { +// threadsPerBlock = 512; +// } else if (maxThreadsPerBlock >= 256) { +// threadsPerBlock = 256; +// } else if (maxThreadsPerBlock >= 128) { +// threadsPerBlock = 128; +// } else if (maxThreadsPerBlock >= 64) { +// threadsPerBlock = 64; +// } else if (maxThreadsPerBlock >= 32) { +// threadsPerBlock = 32; +// } else { +// threadsPerBlock = 16; +// } + +// // Allocate memory for SFRD sum buffer and initialise to 0 only for initial filter step; +// // reuse memory for remaining filter steps. +// unsigned int buffer_length = ceil(num_pixels / (threadsPerBlock * 2)); +// err = cudaMalloc((void**)&d_data->d_ave_sfrd_buf, sizeof(double) * buffer_length); // 91m & 256 -> 177979 +// if (err != cudaSuccess) { +// LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); +// Throw(CUDAError); +// } +// LOG_INFO("SFRD sum reduction buffer allocated on device."); + +// err = cudaMemset(d_data->d_ave_sfrd_buf, 0, sizeof(double) * buffer_length); // fill with byte=0 +// if (err != cudaSuccess) { +// LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); +// Throw(CUDAError); +// } +// LOG_INFO("SFRD sum reduction buffer initialised to 0."); + +// return threadsPerBlock; +// } + +// double calculate_sfrd_from_grid_gpu( +// RGTable1D_f *SFRD_conditional_table, // input data +// float *dens_R_grid, // input data +// double *zpp_growth, // input data +// int R_ct, // filter step/loop iteration/spherical annuli (out of 40 by default) +// float *sfrd_grid, // star formation rate density grid to be updated +// unsigned long long num_pixels, // length of input data +// unsigned int threadsPerBlock, // computed in init function +// const sfrd_gpu_data *d_data // device pointers +// ) { +// cudaError_t err = cudaGetLastError(); + +// // Get growth factor for current filter step +// double zpp_growth_R_ct = zpp_growth[R_ct]; + +// // Copy data from host to device ----------------------------------------------------------------------------------- +// err = cudaMemcpy(d_data->d_y_arr, SFRD_conditional_table->y_arr, sizeof(float) * SFRD_conditional_table->n_bin, cudaMemcpyHostToDevice); +// if (err != cudaSuccess) { +// LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); +// Throw(CUDAError); +// } +// err = cudaMemcpy(d_data->d_dens_R_grid, dens_R_grid, sizeof(float) * num_pixels, cudaMemcpyHostToDevice); // TODO: Does this change between filter steps? +// if (err != cudaSuccess) { +// LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); +// Throw(CUDAError); +// } +// LOG_INFO("SFRD_conditional_table.y_arr and density grid copied to device."); + +// unsigned int numBlocks = (num_pixels + threadsPerBlock - 1) / threadsPerBlock; // 91m & 256 -> 355959 +// unsigned int smemSize = threadsPerBlock * sizeof(double); // shared memory +// unsigned int buffer_length = ceil(num_pixels / (threadsPerBlock * 2)); + +// // Invoke kernel +// switch (threadsPerBlock) { +// case 512: +// compute_and_reduce<512><<< numBlocks, threadsPerBlock, smemSize >>>(SFRD_conditional_table->x_min, SFRD_conditional_table->x_width, d_data->d_y_arr, d_data->d_dens_R_grid, zpp_growth_R_ct, d_data->d_sfrd_grid, d_data->d_ave_sfrd_buf, num_pixels); +// break; +// case 256: +// compute_and_reduce<256><<< numBlocks, threadsPerBlock, smemSize >>>(SFRD_conditional_table->x_min, SFRD_conditional_table->x_width, d_data->d_y_arr, d_data->d_dens_R_grid, zpp_growth_R_ct, d_data->d_sfrd_grid, d_data->d_ave_sfrd_buf, num_pixels); +// break; +// case 128: +// compute_and_reduce<128><<< numBlocks, threadsPerBlock, smemSize >>>(SFRD_conditional_table->x_min, SFRD_conditional_table->x_width, d_data->d_y_arr, d_data->d_dens_R_grid, zpp_growth_R_ct, d_data->d_sfrd_grid, d_data->d_ave_sfrd_buf, num_pixels); +// break; +// case 64: +// compute_and_reduce<64><<< numBlocks, threadsPerBlock, smemSize >>>(SFRD_conditional_table->x_min, SFRD_conditional_table->x_width, d_data->d_y_arr, d_data->d_dens_R_grid, zpp_growth_R_ct, d_data->d_sfrd_grid, d_data->d_ave_sfrd_buf, num_pixels); +// break; +// case 32: +// compute_and_reduce<32><<< numBlocks, threadsPerBlock, smemSize >>>(SFRD_conditional_table->x_min, SFRD_conditional_table->x_width, d_data->d_y_arr, d_data->d_dens_R_grid, zpp_growth_R_ct, d_data->d_sfrd_grid, d_data->d_ave_sfrd_buf, num_pixels); +// break; +// default: +// LOG_WARNING("Thread size invalid; defaulting to 256."); +// compute_and_reduce<256><<< numBlocks, 256, 256 * sizeof(double) >>>(SFRD_conditional_table->x_min, SFRD_conditional_table->x_width, d_data->d_y_arr, d_data->d_dens_R_grid, zpp_growth_R_ct, d_data->d_sfrd_grid, d_data->d_ave_sfrd_buf, num_pixels); +// } +// LOG_INFO("SpinTemperatureBox compute-and-reduce kernel called."); + +// // Only use during development? +// err = cudaDeviceSynchronize(); +// CATCH_CUDA_ERROR(err); + +// err = cudaGetLastError(); +// if (err != cudaSuccess) { +// LOG_ERROR("Kernel launch error: %s", cudaGetErrorString(err)); +// Throw(CUDAError); +// } + +// // Use thrust to reduce computed sums to one value + +// // Wrap device pointer in a thrust::device_ptr +// thrust::device_ptr d_ave_sfrd_buf_ptr(d_data->d_ave_sfrd_buf); +// // Reduce final buffer sums to one value +// double ave_sfrd_buf = thrust::reduce(d_ave_sfrd_buf_ptr, d_ave_sfrd_buf_ptr + buffer_length, 0., thrust::plus()); +// LOG_INFO("SFRD sum reduced to single value by thrust::reduce operation."); + +// // Copy results from device to host. +// err = cudaMemcpy(sfrd_grid, d_data->d_sfrd_grid, sizeof(float) * num_pixels, cudaMemcpyDeviceToHost); +// if (err != cudaSuccess) { +// LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); +// Throw(CUDAError); +// } +// LOG_INFO("SFRD sum copied to host."); + +// return ave_sfrd_buf; +// } + +// void free_sfrd_gpu_data(sfrd_gpu_data *d_data) { +// cudaError_t err = cudaGetLastError(); + +// err = cudaFree(d_data->d_y_arr); +// if (err != cudaSuccess) { +// LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); +// Throw(CUDAError); +// } +// err = cudaFree(d_data->d_dens_R_grid); +// if (err != cudaSuccess) { +// LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); +// Throw(CUDAError); +// } +// err = cudaFree(d_data->d_sfrd_grid); +// if (err != cudaSuccess) { +// LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); +// Throw(CUDAError); +// } +// err = cudaFree(d_data->d_ave_sfrd_buf); +// if (err != cudaSuccess) { +// LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); +// Throw(CUDAError); +// } + +// LOG_INFO("Device memory freed."); +// } + + +// ---------------------------------------------------------------------------------------------------------------------------- +// double calculate_sfrd_from_grid_gpu( +// RGTable1D_f *SFRD_conditional_table, // input data +// float *dens_R_grid, // input data +// double *zpp_growth, // input data +// int R_ct, // filter step/loop iteration/spherical annuli (out of 40 by default) +// float *sfrd_grid, // star formation rate density grid to be updated +// unsigned int num_pixels, // length of input data +// float *d_y_arr, float *d_dens_R_grid, float *d_sfrd_grid, double *d_ave_sfrd_buf // device pointers +// ) { +// cudaError_t err = cudaGetLastError(); + +// // Set bools for initial and final filtering steps to allow for memory reuse +// bool initial_filter_step = false; +// bool final_filter_step = false; + +// // Default NUM_FILTER_STEPS_FOR_Ts = 40 +// if (global_params.NUM_FILTER_STEPS_FOR_Ts - 1 == R_ct) { +// initial_filter_step = true; +// } else if (R_ct == 0) { +// final_filter_step = true; +// } else if (global_params.NUM_FILTER_STEPS_FOR_Ts == 1) { +// // Would case of NUM_FILTER_STEPS_FOR_Ts = 1 ever occur? +// initial_filter_step = true; +// final_filter_step = true; +// } + +// // Get growth factor for current filter step +// double zpp_growth_R_ct = zpp_growth[R_ct]; + +// // ============================================================ +// // These device pointers are initialised in ts_main outside the loop that calls this function +// // so that the pointers can persist across loop iterations (filter steps). +// // +// // float *d_y_arr, *d_dens_R_grid, *d_sfrd_grid; +// // double* d_ave_sfrd_buf; +// // ============================================================ + +// // Allocate device memory ------------------------------------------------------------------------------------------ +// if (initial_filter_step) { +// err = cudaMalloc((void**)&d_y_arr, sizeof(float) * SFRD_conditional_table->n_bin); +// if (err != cudaSuccess) { +// LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); +// Throw(CUDAError); +// } +// err = cudaMalloc((void**)&d_dens_R_grid, sizeof(float) * num_pixels); +// if (err != cudaSuccess) { +// LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); +// Throw(CUDAError); +// } +// err = cudaMalloc((void**)&d_sfrd_grid, sizeof(float) * num_pixels); +// if (err != cudaSuccess) { +// LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); +// Throw(CUDAError); +// } +// LOG_INFO("SFRD_conditional_table.y_arr and density and sfrd grids allocated on device."); +// } + +// // Copy data from host to device ----------------------------------------------------------------------------------- +// err = cudaMemcpy(d_y_arr, SFRD_conditional_table->y_arr, sizeof(float) * SFRD_conditional_table->n_bin, cudaMemcpyHostToDevice); +// if (err != cudaSuccess) { +// LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); +// Throw(CUDAError); +// } +// err = cudaMemcpy(d_dens_R_grid, dens_R_grid, sizeof(float) * num_pixels, cudaMemcpyHostToDevice); // TODO: Does this change between filter steps? +// if (err != cudaSuccess) { +// LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); +// Throw(CUDAError); +// } +// // In initial filter step, set array to 0; +// // for future steps, previous array values will be written over +// if (initial_filter_step) { +// err = cudaMemset(d_sfrd_grid, 0, sizeof(float) * num_pixels); // fill with byte=0 +// if (err != cudaSuccess) { +// LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); +// Throw(CUDAError); +// } +// LOG_INFO("SFRD_conditional_table.y_arr and density and sfrd grids copied to device."); +// } else { +// LOG_INFO("SFRD_conditional_table.y_arr and density grid copied to device."); +// } + +// // Get max threads/block for device +// int maxThreadsPerBlock; +// cudaDeviceGetAttribute(&maxThreadsPerBlock, cudaDevAttrMaxThreadsPerBlock, 0); + +// // Set threads/block based on device max +// int threadsPerBlock; +// if (maxThreadsPerBlock >= 512) { +// threadsPerBlock = 512; +// } else if (maxThreadsPerBlock >= 256) { +// threadsPerBlock = 256; +// } else if (maxThreadsPerBlock >= 128) { +// threadsPerBlock = 128; +// } else if (maxThreadsPerBlock >= 64) { +// threadsPerBlock = 64; +// } else if (maxThreadsPerBlock >= 32) { +// threadsPerBlock = 32; +// } else { +// threadsPerBlock = 16; +// } +// int numBlocks = (num_pixels + threadsPerBlock - 1) / threadsPerBlock; // 91m & 256 -> 355959 +// int smemSize = threadsPerBlock * sizeof(double); // shared memory + +// // Allocate memory for SFRD sum buffer and initialise to 0 only for initial filter step; +// // reuse memory for remaining filter steps. +// unsigned int buffer_length = ceil(num_pixels / (threadsPerBlock * 2)); +// if (initial_filter_step) { +// err = cudaMalloc((void**)&d_ave_sfrd_buf, sizeof(double) * buffer_length); // 91m & 256 -> 177979 +// if (err != cudaSuccess) { +// LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); +// Throw(CUDAError); +// } +// LOG_INFO("SFRD sum reduction buffer allocated on device."); + +// err = cudaMemset(d_ave_sfrd_buf, 0, sizeof(double) * buffer_length); // fill with byte=0 +// if (err != cudaSuccess) { +// LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); +// Throw(CUDAError); +// } +// LOG_INFO("SFRD sum reduction buffer initialised to 0."); +// } + +// // Invoke kernel +// switch (threadsPerBlock) { +// case 512: +// compute_and_reduce<512><<< numBlocks, threadsPerBlock, smemSize >>>(SFRD_conditional_table->x_min, SFRD_conditional_table->x_width, d_y_arr, d_dens_R_grid, zpp_growth_R_ct, d_sfrd_grid, d_ave_sfrd_buf, num_pixels); +// break; +// case 256: +// compute_and_reduce<256><<< numBlocks, threadsPerBlock, smemSize >>>(SFRD_conditional_table->x_min, SFRD_conditional_table->x_width, d_y_arr, d_dens_R_grid, zpp_growth_R_ct, d_sfrd_grid, d_ave_sfrd_buf, num_pixels); +// break; +// case 128: +// compute_and_reduce<128><<< numBlocks, threadsPerBlock, smemSize >>>(SFRD_conditional_table->x_min, SFRD_conditional_table->x_width, d_y_arr, d_dens_R_grid, zpp_growth_R_ct, d_sfrd_grid, d_ave_sfrd_buf, num_pixels); +// break; +// case 64: +// compute_and_reduce<64><<< numBlocks, threadsPerBlock, smemSize >>>(SFRD_conditional_table->x_min, SFRD_conditional_table->x_width, d_y_arr, d_dens_R_grid, zpp_growth_R_ct, d_sfrd_grid, d_ave_sfrd_buf, num_pixels); +// break; +// case 32: +// compute_and_reduce<32><<< numBlocks, threadsPerBlock, smemSize >>>(SFRD_conditional_table->x_min, SFRD_conditional_table->x_width, d_y_arr, d_dens_R_grid, zpp_growth_R_ct, d_sfrd_grid, d_ave_sfrd_buf, num_pixels); +// break; +// default: +// LOG_WARNING("Thread size invalid; defaulting to 256."); +// compute_and_reduce<256><<< numBlocks, 256, 256 * sizeof(double) >>>(SFRD_conditional_table->x_min, SFRD_conditional_table->x_width, d_y_arr, d_dens_R_grid, zpp_growth_R_ct, d_sfrd_grid, d_ave_sfrd_buf, num_pixels); +// } +// LOG_INFO("SpinTemperatureBox compute-and-reduce kernel called."); + +// // Only use during development? +// err = cudaDeviceSynchronize(); +// CATCH_CUDA_ERROR(err); + +// err = cudaGetLastError(); +// if (err != cudaSuccess) { +// LOG_ERROR("Kernel launch error: %s", cudaGetErrorString(err)); +// Throw(CUDAError); +// } + +// // Use thrust to reduce computed sums to one value + +// // Wrap device pointer in a thrust::device_ptr +// thrust::device_ptr d_ave_sfrd_buf_ptr(d_ave_sfrd_buf); +// // Reduce final buffer sums to one value +// double ave_sfrd_buf = thrust::reduce(d_ave_sfrd_buf_ptr, d_ave_sfrd_buf_ptr + buffer_length, 0., thrust::plus()); +// LOG_INFO("SFRD sum reduced to single value by thrust::reduce operation."); + +// // Copy results from device to host. +// err = cudaMemcpy(sfrd_grid, d_sfrd_grid, sizeof(float) * num_pixels, cudaMemcpyDeviceToHost); +// if (err != cudaSuccess) { +// LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); +// Throw(CUDAError); +// } +// LOG_INFO("SFRD sum copied to host."); + +// // Deallocate device memory on final filter step. +// if (final_filter_step) { +// err = cudaFree(d_y_arr); +// if (err != cudaSuccess) { +// LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); +// Throw(CUDAError); +// } +// err = cudaFree(d_dens_R_grid); +// if (err != cudaSuccess) { +// LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); +// Throw(CUDAError); +// } +// err = cudaFree(d_sfrd_grid); +// if (err != cudaSuccess) { +// LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); +// Throw(CUDAError); +// } +// err = cudaFree(d_ave_sfrd_buf); +// if (err != cudaSuccess) { +// LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); +// Throw(CUDAError); +// } +// LOG_INFO("Device memory freed."); +// } + +// return ave_sfrd_buf; +// } diff --git a/src/py21cmfast/src/SpinTemperatureBox.h b/src/py21cmfast/src/SpinTemperatureBox.h index 687e3cdeb..110bfc519 100644 --- a/src/py21cmfast/src/SpinTemperatureBox.h +++ b/src/py21cmfast/src/SpinTemperatureBox.h @@ -10,6 +10,13 @@ #ifdef __cplusplus extern "C" { #endif +// typedef struct sfrd_gpu_data { +// float *d_y_arr; +// float *d_dens_R_grid; +// float *d_sfrd_grid; +// double *d_ave_sfrd_buf; +// } sfrd_gpu_data; + int ComputeTsBox(float redshift, float prev_redshift, UserParams *user_params, CosmoParams *cosmo_params, AstroParams *astro_params, FlagOptions *flag_options, float perturbed_field_redshift, short cleanup, @@ -20,20 +27,34 @@ int UpdateXraySourceBox(UserParams *user_params, CosmoParams *cosmo_params, AstroParams *astro_params, FlagOptions *flag_options, HaloBox *halobox, double R_inner, double R_outer, int R_ct, XraySourceBox *source_box); -// void calculate_sfrd_from_grid(int R_ct, float *dens_R_grid, float *Mcrit_R_grid, float *sfrd_grid, -// float *sfrd_grid_mini, double *ave_sfrd, double *ave_sfrd_mini); - -// double calculate_sfrd_from_grid_gpu(RGTable1D_f *SFRD_conditional_table, float *dens_R_grid, double *zpp_growth, -// int R_ct, float *sfrd_grid, unsigned int num_pixels); - +// pointers -------------------------------------------------------------------------------------------------------- void calculate_sfrd_from_grid(int R_ct, float *dens_R_grid, float *Mcrit_R_grid, float *sfrd_grid, - float *sfrd_grid_mini, double *ave_sfrd, double *ave_sfrd_mini, + float *sfrd_grid_mini, double *ave_sfrd, double *ave_sfrd_mini, unsigned int threadsPerBlock, float *d_y_arr, float *d_dens_R_grid, float *d_sfrd_grid, double *d_ave_sfrd_buf); -double calculate_sfrd_from_grid_gpu(RGTable1D_f *SFRD_conditional_table, float *dens_R_grid, double *zpp_growth, - int R_ct, float *sfrd_grid, unsigned int num_pixels, +unsigned int init_sfrd_gpu_data(float *dens_R_grid, float *sfrd_grid, unsigned long long num_pixels, unsigned int nbins, + float **d_y_arr, float **d_dens_R_grid, float **d_sfrd_grid, double **d_ave_sfrd_buf); + +double calculate_sfrd_from_grid_gpu(RGTable1D_f *SFRD_conditional_table, float *dens_R_grid, + double *zpp_growth, int R_ct, float *sfrd_grid, unsigned long long num_pixels, unsigned int threadsPerBlock, float *d_y_arr, float *d_dens_R_grid, float *d_sfrd_grid, double *d_ave_sfrd_buf); +void free_sfrd_gpu_data(float **d_y_arr, float **d_dens_R_grid, float **d_sfrd_grid, double **d_ave_sfrd_buf); + +// wrap pointers in struct ------------------------------------------------------------------------------------------ +// void calculate_sfrd_from_grid(int R_ct, float *dens_R_grid, float *Mcrit_R_grid, float *sfrd_grid, +// float *sfrd_grid_mini, double *ave_sfrd, double *ave_sfrd_mini, +// unsigned int threadsPerBlock, const sfrd_gpu_data *d_data); + +// unsigned int init_sfrd_gpu_data(float *dens_R_grid, float *sfrd_grid, unsigned long long num_pixels, +// unsigned int nbins, sfrd_gpu_data *d_data); + +// double calculate_sfrd_from_grid_gpu(RGTable1D_f *SFRD_conditional_table, float *dens_R_grid, +// double *zpp_growth, int R_ct, float *sfrd_grid, unsigned long long num_pixels, +// unsigned int threadsPerBlock, const sfrd_gpu_data *d_data); + +// void free_sfrd_gpu_data(sfrd_gpu_data *d_data); + #ifdef __cplusplus } #endif From 1bd2f8835a0ef8e78bea882417d77c8761954f2e Mon Sep 17 00:00:00 2001 From: alserene Date: Thu, 21 Nov 2024 07:37:49 +1100 Subject: [PATCH 033/145] Add accerssor function for nbins. --- src/py21cmfast/src/interp_tables.c | 5 +++++ src/py21cmfast/src/interp_tables.h | 1 + 2 files changed, 6 insertions(+) diff --git a/src/py21cmfast/src/interp_tables.c b/src/py21cmfast/src/interp_tables.c index 85d844f08..59df01b3f 100644 --- a/src/py21cmfast/src/interp_tables.c +++ b/src/py21cmfast/src/interp_tables.c @@ -1051,3 +1051,8 @@ double EvaluatedSigmasqdm(double lnM){ RGTable1D_f* get_SFRD_conditional_table(void) { return &SFRD_conditional_table; } + +// Accessor function for the GPU SpinTemp memory allocation function to access nbins. +int get_nbins(void) { + return NDELTA; +} diff --git a/src/py21cmfast/src/interp_tables.h b/src/py21cmfast/src/interp_tables.h index a2b692f62..86482f082 100644 --- a/src/py21cmfast/src/interp_tables.h +++ b/src/py21cmfast/src/interp_tables.h @@ -67,6 +67,7 @@ void free_global_tables(); void free_dNdM_tables(); RGTable1D_f* get_SFRD_conditional_table(void); +int get_nbins(void); #ifdef __cplusplus } From 80a3e6668c6edd261b858623fa2d2c94f48549a5 Mon Sep 17 00:00:00 2001 From: alserene Date: Thu, 21 Nov 2024 23:02:58 +1100 Subject: [PATCH 034/145] Update kernel param to correct type. --- src/py21cmfast/src/SpinTemperatureBox.cu | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/src/py21cmfast/src/SpinTemperatureBox.cu b/src/py21cmfast/src/SpinTemperatureBox.cu index 8c88cd1f1..8b366f34f 100644 --- a/src/py21cmfast/src/SpinTemperatureBox.cu +++ b/src/py21cmfast/src/SpinTemperatureBox.cu @@ -66,7 +66,7 @@ __global__ void compute_and_reduce( double zpp_growth_R_ct, // value float *sfrd_grid, // star formation rate density grid to be updated double *ave_sfrd_buf, // output buffer of length ceil(n / (threadsPerBlock * 2)) - unsigned int num_pixels // length of input data + unsigned long long num_pixels // length of input data ) { // An array to store intermediate summations @@ -131,7 +131,7 @@ unsigned int init_sfrd_gpu_data( cudaError_t err = cudaGetLastError(); // Allocate device memory ------------------------------------------------------------------------------------------ - err = cudaMalloc((void**)d_y_arr, sizeof(float) * nbins); // already pointers to pointers (no &) + err = cudaMalloc((void**)d_y_arr, sizeof(float) * nbins); // already pointers to pointers (no & needed) if (err != cudaSuccess) { LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); Throw(CUDAError); @@ -148,14 +148,13 @@ unsigned int init_sfrd_gpu_data( } LOG_INFO("SFRD_conditional_table.y_arr and density and sfrd grids allocated on device."); - // Initialise sfrd_grid to 0 ---------------------------------------------------------------------------------------- - // dereference the pointers to pointers (*) - err = cudaMemset(*d_sfrd_grid, 0, sizeof(float) * num_pixels); // fill with byte=0 + // Initialise sfrd_grid to 0 (fill with byte=0) ---------------------------------------------------------------------- + err = cudaMemset(*d_sfrd_grid, 0, sizeof(float) * num_pixels); // dereference the pointer to a pointer (*) if (err != cudaSuccess) { LOG_ERROR("CUDA error: %s: %p", cudaGetErrorString(err), d_sfrd_grid); Throw(CUDAError); } - LOG_INFO("density grid copied to device. sfrd grid initialised to 0."); + LOG_INFO("sfrd grid initialised to 0."); // Get max threads/block for device int maxThreadsPerBlock; @@ -180,16 +179,15 @@ unsigned int init_sfrd_gpu_data( // Allocate memory for SFRD sum buffer and initialise to 0 only for initial filter step; // reuse memory for remaining filter steps. unsigned int buffer_length = ceil(num_pixels / (threadsPerBlock * 2)); - // already pointers to pointers (no &) - err = cudaMalloc((void**)d_ave_sfrd_buf, sizeof(double) * buffer_length); // 91m & 256 -> 177979 + err = cudaMalloc((void**)d_ave_sfrd_buf, sizeof(double) * buffer_length); // already pointer to a pointer (no & needed) ...91m & 256 -> 177979 if (err != cudaSuccess) { LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); Throw(CUDAError); } LOG_INFO("SFRD sum reduction buffer allocated on device."); - // dereference the pointers to pointers (*) - err = cudaMemset(*d_ave_sfrd_buf, 0, sizeof(double) * buffer_length); // fill with byte=0 + // Initialise buffer to 0 (fill with byte=0) + err = cudaMemset(*d_ave_sfrd_buf, 0, sizeof(double) * buffer_length); // dereference the pointer to a pointer (*) if (err != cudaSuccess) { LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); Throw(CUDAError); @@ -223,7 +221,7 @@ double calculate_sfrd_from_grid_gpu( LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); Throw(CUDAError); } - err = cudaMemcpy(d_dens_R_grid, dens_R_grid, sizeof(float) * num_pixels, cudaMemcpyHostToDevice); // TODO: Does this change between filter steps? + err = cudaMemcpy(d_dens_R_grid, dens_R_grid, sizeof(float) * num_pixels, cudaMemcpyHostToDevice); if (err != cudaSuccess) { LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); Throw(CUDAError); @@ -275,7 +273,7 @@ double calculate_sfrd_from_grid_gpu( double ave_sfrd_buf = thrust::reduce(d_ave_sfrd_buf_ptr, d_ave_sfrd_buf_ptr + buffer_length, 0., thrust::plus()); LOG_INFO("SFRD sum reduced to single value by thrust::reduce operation."); - // Copy results from device to host. + // Copy results from device to host err = cudaMemcpy(sfrd_grid, d_sfrd_grid, sizeof(float) * num_pixels, cudaMemcpyDeviceToHost); if (err != cudaSuccess) { LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); @@ -294,7 +292,7 @@ void free_sfrd_gpu_data( ) { cudaError_t err = cudaGetLastError(); - // Need to dereference the pointers to pointers + // Need to dereference the pointers to pointers (*) err = cudaFree(*d_y_arr); if (err != cudaSuccess) { LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); From 6c95c8b44fa80cbc892a420e6e8281722c320624 Mon Sep 17 00:00:00 2001 From: alserene Date: Mon, 25 Nov 2024 19:40:57 +1100 Subject: [PATCH 035/145] Remove commented old code. --- src/py21cmfast/src/SpinTemperatureBox.c | 3 - src/py21cmfast/src/SpinTemperatureBox.cu | 395 ----------------------- 2 files changed, 398 deletions(-) diff --git a/src/py21cmfast/src/SpinTemperatureBox.c b/src/py21cmfast/src/SpinTemperatureBox.c index 52b15df09..720ad58d3 100644 --- a/src/py21cmfast/src/SpinTemperatureBox.c +++ b/src/py21cmfast/src/SpinTemperatureBox.c @@ -942,9 +942,6 @@ void calculate_sfrd_from_grid(int R_ct, float *dens_R_grid, float *Mcrit_R_grid, if (true && flag_options_global->USE_MASS_DEPENDENT_ZETA && user_params_global->USE_INTERPOLATION_TABLES && !flag_options_global->USE_MINI_HALOS) { RGTable1D_f* SFRD_conditional_table = get_SFRD_conditional_table(); - // ave_sfrd_buf = calculate_sfrd_from_grid_gpu(SFRD_conditional_table, dens_R_grid, zpp_growth, R_ct, sfrd_grid, HII_TOT_NUM_PIXELS); - // ave_sfrd_buf = calculate_sfrd_from_grid_gpu(SFRD_conditional_table, dens_R_grid, zpp_growth, R_ct, sfrd_grid, HII_TOT_NUM_PIXELS, - // d_y_arr, d_dens_R_grid, d_sfrd_grid, d_ave_sfrd_buf); ave_sfrd_buf = calculate_sfrd_from_grid_gpu(SFRD_conditional_table, dens_R_grid, zpp_growth, R_ct, sfrd_grid, HII_TOT_NUM_PIXELS, threadsPerBlock, // d_data d_y_arr, d_dens_R_grid, d_sfrd_grid, d_ave_sfrd_buf diff --git a/src/py21cmfast/src/SpinTemperatureBox.cu b/src/py21cmfast/src/SpinTemperatureBox.cu index 8b366f34f..201becdc4 100644 --- a/src/py21cmfast/src/SpinTemperatureBox.cu +++ b/src/py21cmfast/src/SpinTemperatureBox.cu @@ -316,398 +316,3 @@ void free_sfrd_gpu_data( LOG_INFO("Device memory freed."); } - - -// ------------------------------------------------------------------------------------------------------------------------ -// unsigned int init_sfrd_gpu_data( -// // RGTable1D_f *SFRD_conditional_table, // input data -// float *dens_R_grid, // input data -// float *sfrd_grid, // star formation rate density grid to be updated -// unsigned long long num_pixels, // length of input data -// unsigned int nbins, // nbins for sfrd_grid->y -// sfrd_gpu_data *d_data // struct of device pointers -// ) { -// cudaError_t err = cudaGetLastError(); - -// // Allocate device memory ------------------------------------------------------------------------------------------ -// err = cudaMalloc((void**)&d_data->d_y_arr, sizeof(float) * nbins); -// if (err != cudaSuccess) { -// LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); -// Throw(CUDAError); -// } -// err = cudaMalloc((void**)&d_data->d_dens_R_grid, sizeof(float) * num_pixels); -// if (err != cudaSuccess) { -// LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); -// Throw(CUDAError); -// } -// err = cudaMalloc((void**)&d_data->d_sfrd_grid, sizeof(float) * num_pixels); -// if (err != cudaSuccess) { -// LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); -// Throw(CUDAError); -// } -// LOG_INFO("SFRD_conditional_table.y_arr and density and sfrd grids allocated on device."); - -// // Copy data from host to device / initialise to 0 -------------------------------------------------------------------- -// // err = cudaMemcpy(d_data->d_dens_R_grid, dens_R_grid, sizeof(float) * num_pixels, cudaMemcpyHostToDevice); // TODO: Does this change between filter steps? -// // if (err != cudaSuccess) { -// // LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); -// // Throw(CUDAError); -// // } -// err = cudaMemset(d_data->d_sfrd_grid, 0, sizeof(float) * num_pixels); // fill with byte=0 -// if (err != cudaSuccess) { -// LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); -// Throw(CUDAError); -// } -// LOG_INFO("density grid copied to device. sfrd grid initialised to 0."); - -// // Get max threads/block for device -// int maxThreadsPerBlock; -// cudaDeviceGetAttribute(&maxThreadsPerBlock, cudaDevAttrMaxThreadsPerBlock, 0); - -// // Set threads/block based on device max -// unsigned int threadsPerBlock; -// if (maxThreadsPerBlock >= 512) { -// threadsPerBlock = 512; -// } else if (maxThreadsPerBlock >= 256) { -// threadsPerBlock = 256; -// } else if (maxThreadsPerBlock >= 128) { -// threadsPerBlock = 128; -// } else if (maxThreadsPerBlock >= 64) { -// threadsPerBlock = 64; -// } else if (maxThreadsPerBlock >= 32) { -// threadsPerBlock = 32; -// } else { -// threadsPerBlock = 16; -// } - -// // Allocate memory for SFRD sum buffer and initialise to 0 only for initial filter step; -// // reuse memory for remaining filter steps. -// unsigned int buffer_length = ceil(num_pixels / (threadsPerBlock * 2)); -// err = cudaMalloc((void**)&d_data->d_ave_sfrd_buf, sizeof(double) * buffer_length); // 91m & 256 -> 177979 -// if (err != cudaSuccess) { -// LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); -// Throw(CUDAError); -// } -// LOG_INFO("SFRD sum reduction buffer allocated on device."); - -// err = cudaMemset(d_data->d_ave_sfrd_buf, 0, sizeof(double) * buffer_length); // fill with byte=0 -// if (err != cudaSuccess) { -// LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); -// Throw(CUDAError); -// } -// LOG_INFO("SFRD sum reduction buffer initialised to 0."); - -// return threadsPerBlock; -// } - -// double calculate_sfrd_from_grid_gpu( -// RGTable1D_f *SFRD_conditional_table, // input data -// float *dens_R_grid, // input data -// double *zpp_growth, // input data -// int R_ct, // filter step/loop iteration/spherical annuli (out of 40 by default) -// float *sfrd_grid, // star formation rate density grid to be updated -// unsigned long long num_pixels, // length of input data -// unsigned int threadsPerBlock, // computed in init function -// const sfrd_gpu_data *d_data // device pointers -// ) { -// cudaError_t err = cudaGetLastError(); - -// // Get growth factor for current filter step -// double zpp_growth_R_ct = zpp_growth[R_ct]; - -// // Copy data from host to device ----------------------------------------------------------------------------------- -// err = cudaMemcpy(d_data->d_y_arr, SFRD_conditional_table->y_arr, sizeof(float) * SFRD_conditional_table->n_bin, cudaMemcpyHostToDevice); -// if (err != cudaSuccess) { -// LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); -// Throw(CUDAError); -// } -// err = cudaMemcpy(d_data->d_dens_R_grid, dens_R_grid, sizeof(float) * num_pixels, cudaMemcpyHostToDevice); // TODO: Does this change between filter steps? -// if (err != cudaSuccess) { -// LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); -// Throw(CUDAError); -// } -// LOG_INFO("SFRD_conditional_table.y_arr and density grid copied to device."); - -// unsigned int numBlocks = (num_pixels + threadsPerBlock - 1) / threadsPerBlock; // 91m & 256 -> 355959 -// unsigned int smemSize = threadsPerBlock * sizeof(double); // shared memory -// unsigned int buffer_length = ceil(num_pixels / (threadsPerBlock * 2)); - -// // Invoke kernel -// switch (threadsPerBlock) { -// case 512: -// compute_and_reduce<512><<< numBlocks, threadsPerBlock, smemSize >>>(SFRD_conditional_table->x_min, SFRD_conditional_table->x_width, d_data->d_y_arr, d_data->d_dens_R_grid, zpp_growth_R_ct, d_data->d_sfrd_grid, d_data->d_ave_sfrd_buf, num_pixels); -// break; -// case 256: -// compute_and_reduce<256><<< numBlocks, threadsPerBlock, smemSize >>>(SFRD_conditional_table->x_min, SFRD_conditional_table->x_width, d_data->d_y_arr, d_data->d_dens_R_grid, zpp_growth_R_ct, d_data->d_sfrd_grid, d_data->d_ave_sfrd_buf, num_pixels); -// break; -// case 128: -// compute_and_reduce<128><<< numBlocks, threadsPerBlock, smemSize >>>(SFRD_conditional_table->x_min, SFRD_conditional_table->x_width, d_data->d_y_arr, d_data->d_dens_R_grid, zpp_growth_R_ct, d_data->d_sfrd_grid, d_data->d_ave_sfrd_buf, num_pixels); -// break; -// case 64: -// compute_and_reduce<64><<< numBlocks, threadsPerBlock, smemSize >>>(SFRD_conditional_table->x_min, SFRD_conditional_table->x_width, d_data->d_y_arr, d_data->d_dens_R_grid, zpp_growth_R_ct, d_data->d_sfrd_grid, d_data->d_ave_sfrd_buf, num_pixels); -// break; -// case 32: -// compute_and_reduce<32><<< numBlocks, threadsPerBlock, smemSize >>>(SFRD_conditional_table->x_min, SFRD_conditional_table->x_width, d_data->d_y_arr, d_data->d_dens_R_grid, zpp_growth_R_ct, d_data->d_sfrd_grid, d_data->d_ave_sfrd_buf, num_pixels); -// break; -// default: -// LOG_WARNING("Thread size invalid; defaulting to 256."); -// compute_and_reduce<256><<< numBlocks, 256, 256 * sizeof(double) >>>(SFRD_conditional_table->x_min, SFRD_conditional_table->x_width, d_data->d_y_arr, d_data->d_dens_R_grid, zpp_growth_R_ct, d_data->d_sfrd_grid, d_data->d_ave_sfrd_buf, num_pixels); -// } -// LOG_INFO("SpinTemperatureBox compute-and-reduce kernel called."); - -// // Only use during development? -// err = cudaDeviceSynchronize(); -// CATCH_CUDA_ERROR(err); - -// err = cudaGetLastError(); -// if (err != cudaSuccess) { -// LOG_ERROR("Kernel launch error: %s", cudaGetErrorString(err)); -// Throw(CUDAError); -// } - -// // Use thrust to reduce computed sums to one value - -// // Wrap device pointer in a thrust::device_ptr -// thrust::device_ptr d_ave_sfrd_buf_ptr(d_data->d_ave_sfrd_buf); -// // Reduce final buffer sums to one value -// double ave_sfrd_buf = thrust::reduce(d_ave_sfrd_buf_ptr, d_ave_sfrd_buf_ptr + buffer_length, 0., thrust::plus()); -// LOG_INFO("SFRD sum reduced to single value by thrust::reduce operation."); - -// // Copy results from device to host. -// err = cudaMemcpy(sfrd_grid, d_data->d_sfrd_grid, sizeof(float) * num_pixels, cudaMemcpyDeviceToHost); -// if (err != cudaSuccess) { -// LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); -// Throw(CUDAError); -// } -// LOG_INFO("SFRD sum copied to host."); - -// return ave_sfrd_buf; -// } - -// void free_sfrd_gpu_data(sfrd_gpu_data *d_data) { -// cudaError_t err = cudaGetLastError(); - -// err = cudaFree(d_data->d_y_arr); -// if (err != cudaSuccess) { -// LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); -// Throw(CUDAError); -// } -// err = cudaFree(d_data->d_dens_R_grid); -// if (err != cudaSuccess) { -// LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); -// Throw(CUDAError); -// } -// err = cudaFree(d_data->d_sfrd_grid); -// if (err != cudaSuccess) { -// LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); -// Throw(CUDAError); -// } -// err = cudaFree(d_data->d_ave_sfrd_buf); -// if (err != cudaSuccess) { -// LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); -// Throw(CUDAError); -// } - -// LOG_INFO("Device memory freed."); -// } - - -// ---------------------------------------------------------------------------------------------------------------------------- -// double calculate_sfrd_from_grid_gpu( -// RGTable1D_f *SFRD_conditional_table, // input data -// float *dens_R_grid, // input data -// double *zpp_growth, // input data -// int R_ct, // filter step/loop iteration/spherical annuli (out of 40 by default) -// float *sfrd_grid, // star formation rate density grid to be updated -// unsigned int num_pixels, // length of input data -// float *d_y_arr, float *d_dens_R_grid, float *d_sfrd_grid, double *d_ave_sfrd_buf // device pointers -// ) { -// cudaError_t err = cudaGetLastError(); - -// // Set bools for initial and final filtering steps to allow for memory reuse -// bool initial_filter_step = false; -// bool final_filter_step = false; - -// // Default NUM_FILTER_STEPS_FOR_Ts = 40 -// if (global_params.NUM_FILTER_STEPS_FOR_Ts - 1 == R_ct) { -// initial_filter_step = true; -// } else if (R_ct == 0) { -// final_filter_step = true; -// } else if (global_params.NUM_FILTER_STEPS_FOR_Ts == 1) { -// // Would case of NUM_FILTER_STEPS_FOR_Ts = 1 ever occur? -// initial_filter_step = true; -// final_filter_step = true; -// } - -// // Get growth factor for current filter step -// double zpp_growth_R_ct = zpp_growth[R_ct]; - -// // ============================================================ -// // These device pointers are initialised in ts_main outside the loop that calls this function -// // so that the pointers can persist across loop iterations (filter steps). -// // -// // float *d_y_arr, *d_dens_R_grid, *d_sfrd_grid; -// // double* d_ave_sfrd_buf; -// // ============================================================ - -// // Allocate device memory ------------------------------------------------------------------------------------------ -// if (initial_filter_step) { -// err = cudaMalloc((void**)&d_y_arr, sizeof(float) * SFRD_conditional_table->n_bin); -// if (err != cudaSuccess) { -// LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); -// Throw(CUDAError); -// } -// err = cudaMalloc((void**)&d_dens_R_grid, sizeof(float) * num_pixels); -// if (err != cudaSuccess) { -// LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); -// Throw(CUDAError); -// } -// err = cudaMalloc((void**)&d_sfrd_grid, sizeof(float) * num_pixels); -// if (err != cudaSuccess) { -// LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); -// Throw(CUDAError); -// } -// LOG_INFO("SFRD_conditional_table.y_arr and density and sfrd grids allocated on device."); -// } - -// // Copy data from host to device ----------------------------------------------------------------------------------- -// err = cudaMemcpy(d_y_arr, SFRD_conditional_table->y_arr, sizeof(float) * SFRD_conditional_table->n_bin, cudaMemcpyHostToDevice); -// if (err != cudaSuccess) { -// LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); -// Throw(CUDAError); -// } -// err = cudaMemcpy(d_dens_R_grid, dens_R_grid, sizeof(float) * num_pixels, cudaMemcpyHostToDevice); // TODO: Does this change between filter steps? -// if (err != cudaSuccess) { -// LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); -// Throw(CUDAError); -// } -// // In initial filter step, set array to 0; -// // for future steps, previous array values will be written over -// if (initial_filter_step) { -// err = cudaMemset(d_sfrd_grid, 0, sizeof(float) * num_pixels); // fill with byte=0 -// if (err != cudaSuccess) { -// LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); -// Throw(CUDAError); -// } -// LOG_INFO("SFRD_conditional_table.y_arr and density and sfrd grids copied to device."); -// } else { -// LOG_INFO("SFRD_conditional_table.y_arr and density grid copied to device."); -// } - -// // Get max threads/block for device -// int maxThreadsPerBlock; -// cudaDeviceGetAttribute(&maxThreadsPerBlock, cudaDevAttrMaxThreadsPerBlock, 0); - -// // Set threads/block based on device max -// int threadsPerBlock; -// if (maxThreadsPerBlock >= 512) { -// threadsPerBlock = 512; -// } else if (maxThreadsPerBlock >= 256) { -// threadsPerBlock = 256; -// } else if (maxThreadsPerBlock >= 128) { -// threadsPerBlock = 128; -// } else if (maxThreadsPerBlock >= 64) { -// threadsPerBlock = 64; -// } else if (maxThreadsPerBlock >= 32) { -// threadsPerBlock = 32; -// } else { -// threadsPerBlock = 16; -// } -// int numBlocks = (num_pixels + threadsPerBlock - 1) / threadsPerBlock; // 91m & 256 -> 355959 -// int smemSize = threadsPerBlock * sizeof(double); // shared memory - -// // Allocate memory for SFRD sum buffer and initialise to 0 only for initial filter step; -// // reuse memory for remaining filter steps. -// unsigned int buffer_length = ceil(num_pixels / (threadsPerBlock * 2)); -// if (initial_filter_step) { -// err = cudaMalloc((void**)&d_ave_sfrd_buf, sizeof(double) * buffer_length); // 91m & 256 -> 177979 -// if (err != cudaSuccess) { -// LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); -// Throw(CUDAError); -// } -// LOG_INFO("SFRD sum reduction buffer allocated on device."); - -// err = cudaMemset(d_ave_sfrd_buf, 0, sizeof(double) * buffer_length); // fill with byte=0 -// if (err != cudaSuccess) { -// LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); -// Throw(CUDAError); -// } -// LOG_INFO("SFRD sum reduction buffer initialised to 0."); -// } - -// // Invoke kernel -// switch (threadsPerBlock) { -// case 512: -// compute_and_reduce<512><<< numBlocks, threadsPerBlock, smemSize >>>(SFRD_conditional_table->x_min, SFRD_conditional_table->x_width, d_y_arr, d_dens_R_grid, zpp_growth_R_ct, d_sfrd_grid, d_ave_sfrd_buf, num_pixels); -// break; -// case 256: -// compute_and_reduce<256><<< numBlocks, threadsPerBlock, smemSize >>>(SFRD_conditional_table->x_min, SFRD_conditional_table->x_width, d_y_arr, d_dens_R_grid, zpp_growth_R_ct, d_sfrd_grid, d_ave_sfrd_buf, num_pixels); -// break; -// case 128: -// compute_and_reduce<128><<< numBlocks, threadsPerBlock, smemSize >>>(SFRD_conditional_table->x_min, SFRD_conditional_table->x_width, d_y_arr, d_dens_R_grid, zpp_growth_R_ct, d_sfrd_grid, d_ave_sfrd_buf, num_pixels); -// break; -// case 64: -// compute_and_reduce<64><<< numBlocks, threadsPerBlock, smemSize >>>(SFRD_conditional_table->x_min, SFRD_conditional_table->x_width, d_y_arr, d_dens_R_grid, zpp_growth_R_ct, d_sfrd_grid, d_ave_sfrd_buf, num_pixels); -// break; -// case 32: -// compute_and_reduce<32><<< numBlocks, threadsPerBlock, smemSize >>>(SFRD_conditional_table->x_min, SFRD_conditional_table->x_width, d_y_arr, d_dens_R_grid, zpp_growth_R_ct, d_sfrd_grid, d_ave_sfrd_buf, num_pixels); -// break; -// default: -// LOG_WARNING("Thread size invalid; defaulting to 256."); -// compute_and_reduce<256><<< numBlocks, 256, 256 * sizeof(double) >>>(SFRD_conditional_table->x_min, SFRD_conditional_table->x_width, d_y_arr, d_dens_R_grid, zpp_growth_R_ct, d_sfrd_grid, d_ave_sfrd_buf, num_pixels); -// } -// LOG_INFO("SpinTemperatureBox compute-and-reduce kernel called."); - -// // Only use during development? -// err = cudaDeviceSynchronize(); -// CATCH_CUDA_ERROR(err); - -// err = cudaGetLastError(); -// if (err != cudaSuccess) { -// LOG_ERROR("Kernel launch error: %s", cudaGetErrorString(err)); -// Throw(CUDAError); -// } - -// // Use thrust to reduce computed sums to one value - -// // Wrap device pointer in a thrust::device_ptr -// thrust::device_ptr d_ave_sfrd_buf_ptr(d_ave_sfrd_buf); -// // Reduce final buffer sums to one value -// double ave_sfrd_buf = thrust::reduce(d_ave_sfrd_buf_ptr, d_ave_sfrd_buf_ptr + buffer_length, 0., thrust::plus()); -// LOG_INFO("SFRD sum reduced to single value by thrust::reduce operation."); - -// // Copy results from device to host. -// err = cudaMemcpy(sfrd_grid, d_sfrd_grid, sizeof(float) * num_pixels, cudaMemcpyDeviceToHost); -// if (err != cudaSuccess) { -// LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); -// Throw(CUDAError); -// } -// LOG_INFO("SFRD sum copied to host."); - -// // Deallocate device memory on final filter step. -// if (final_filter_step) { -// err = cudaFree(d_y_arr); -// if (err != cudaSuccess) { -// LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); -// Throw(CUDAError); -// } -// err = cudaFree(d_dens_R_grid); -// if (err != cudaSuccess) { -// LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); -// Throw(CUDAError); -// } -// err = cudaFree(d_sfrd_grid); -// if (err != cudaSuccess) { -// LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); -// Throw(CUDAError); -// } -// err = cudaFree(d_ave_sfrd_buf); -// if (err != cudaSuccess) { -// LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); -// Throw(CUDAError); -// } -// LOG_INFO("Device memory freed."); -// } - -// return ave_sfrd_buf; -// } From 38b9e535313703f7f462b48c7494833afc540568 Mon Sep 17 00:00:00 2001 From: JHu Date: Thu, 28 Nov 2024 14:13:28 +1100 Subject: [PATCH 036/145] store RGTable2D in one block of continuous memory --- src/py21cmfast/src/interpolation.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/py21cmfast/src/interpolation.c b/src/py21cmfast/src/interpolation.c index 3f2156613..8b43533fe 100644 --- a/src/py21cmfast/src/interpolation.c +++ b/src/py21cmfast/src/interpolation.c @@ -39,10 +39,16 @@ void allocate_RGTable2D(int n_x, int n_y, RGTable2D * ptr){ ptr->nx_bin = n_x; ptr->ny_bin = n_y; - ptr->z_arr = calloc(n_x,sizeof(double*)); + ptr->flatten_data = (double *)calloc(n_x * n_y, sizeof(double)); + ptr->z_arr = calloc(n_x, sizeof(double *)); for(i=0;iz_arr[i] = calloc(n_y,sizeof(double)); + ptr->z_arr[i] = &ptr->flatten_data[i * n_y]; } + + // ptr->z_arr = calloc(n_x,sizeof(double*)); + // for(i=0;iz_arr[i] = calloc(n_y,sizeof(double)); + // } ptr->allocated = true; } @@ -71,8 +77,9 @@ void free_RGTable2D_f(RGTable2D_f * ptr){ void free_RGTable2D(RGTable2D * ptr){ int i; if(ptr->allocated){ - for(i=0;inx_bin;i++) - free(ptr->z_arr[i]); + free(ptr->flatten_data); + // for(i=0;inx_bin;i++) + // free(ptr->z_arr[i]); free(ptr->z_arr); ptr->allocated = false; } From 8e330a85e8f2c3dff5b0f944be26e8779511e1d6 Mon Sep 17 00:00:00 2001 From: JHu Date: Thu, 28 Nov 2024 16:14:08 +1100 Subject: [PATCH 037/145] store type def in a separate header file; add flatten data as a member of RGTable2D --- src/py21cmfast/src/interpolation_types.h | 49 ++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 src/py21cmfast/src/interpolation_types.h diff --git a/src/py21cmfast/src/interpolation_types.h b/src/py21cmfast/src/interpolation_types.h new file mode 100644 index 000000000..3d1f3c649 --- /dev/null +++ b/src/py21cmfast/src/interpolation_types.h @@ -0,0 +1,49 @@ +#ifndef _INTERPOLATION_TYPES_H +#define _INTERPOLATION_TYPES_H + +typedef struct RGTable1D +{ + int n_bin; + double x_min; + double x_width; + + double *y_arr; + bool allocated; +} RGTable1D; + +typedef struct RGTable2D +{ + int nx_bin, ny_bin; + double x_min, y_min; + double x_width, y_width; + + double **z_arr; + double *flatten_data; + + double saved_ll, saved_ul; // for future acceleration + bool allocated; +} RGTable2D; + +typedef struct RGTable1D_f +{ + int n_bin; + double x_min; + double x_width; + + float *y_arr; + bool allocated; +} RGTable1D_f; + +typedef struct RGTable2D_f +{ + int nx_bin, ny_bin; + double x_min, y_min; + double x_width, y_width; + + float **z_arr; + + double saved_ll, saved_ul; // for future acceleration + bool allocated; +} RGTable2D_f; + +#endif \ No newline at end of file From 7aedc544b3c527254f6fbfd1c1b698d51e8dab74 Mon Sep 17 00:00:00 2001 From: JHu Date: Fri, 29 Nov 2024 12:42:53 +1100 Subject: [PATCH 038/145] copy global params to device constant memory --- src/py21cmfast/src/DeviceConstants.cuh | 15 +++++++++++++++ src/py21cmfast/src/HaloField.cu | 23 +++++++++++++++++++++++ src/py21cmfast/src/HaloField.cuh | 15 +++++++++++++++ 3 files changed, 53 insertions(+) create mode 100644 src/py21cmfast/src/DeviceConstants.cuh create mode 100644 src/py21cmfast/src/HaloField.cu create mode 100644 src/py21cmfast/src/HaloField.cuh diff --git a/src/py21cmfast/src/DeviceConstants.cuh b/src/py21cmfast/src/DeviceConstants.cuh new file mode 100644 index 000000000..f7b51486a --- /dev/null +++ b/src/py21cmfast/src/DeviceConstants.cuh @@ -0,0 +1,15 @@ +#ifndef _DEVICECONSTANTS_CUH +#define _DEVICECONSTANTS_CUH + +#include "InputParameters.h" + +#ifndef _HALOFIELD_CU // double check whether this is necessary + +extern __constant__ UserParams d_user_params; +extern __constant__ CosmoParams d_cosmo_params; +extern __constant__ AstroParams d_astro_params; +extern __constant__ double d_test_params; + +#endif + +#endif \ No newline at end of file diff --git a/src/py21cmfast/src/HaloField.cu b/src/py21cmfast/src/HaloField.cu new file mode 100644 index 000000000..80cdd28d8 --- /dev/null +++ b/src/py21cmfast/src/HaloField.cu @@ -0,0 +1,23 @@ +#ifndef _HALOFIELD_CU +#define _HALOFIELD_CU + +#include + +#include "DeviceConstants.cuh" +#include "HaloField.cuh" + +// define relevant variables stored in constant memory +__constant__ UserParams d_user_params; +__constant__ CosmoParams d_cosmo_params; +__constant__ AstroParams d_astro_params; +__constant__ double d_test_params; + +void updateGlobalParams(UserParams *h_user_params, CosmoParams *h_cosmo_params, AstroParams *h_astro_params){ + cudaMemcpyToSymbol(d_user_params, h_user_params, sizeof(UserParams), 0, cudaMemcpyHostToDevice); + cudaMemcpyToSymbol(d_cosmo_params, h_cosmo_params, sizeof(CosmoParams), 0, cudaMemcpyHostToDevice); + cudaMemcpyToSymbol(d_astro_params, h_astro_params, sizeof(AstroParams), 0, cudaMemcpyHostToDevice); + double test_data = 5.5; + cudaMemcpyToSymbol(d_test_params, &test_data, sizeof(double), 0, cudaMemcpyHostToDevice); +} + +#endif diff --git a/src/py21cmfast/src/HaloField.cuh b/src/py21cmfast/src/HaloField.cuh new file mode 100644 index 000000000..484c24620 --- /dev/null +++ b/src/py21cmfast/src/HaloField.cuh @@ -0,0 +1,15 @@ +#ifndef _HALOFIELD_CUH +#define _HALOFIELD_CUH +#include "InputParameters.h" +#include "interpolation_types.h" + +#ifdef __cplusplus +extern "C" +{ +#endif + void updateGlobalParams(UserParams *h_user_params, CosmoParams *h_cosmo_params, AstroParams *h_astro_params); +#ifdef __cplusplus +} +#endif + +#endif \ No newline at end of file From 7d237e08e49b37c6651a6f20d1bf446c8d7fff13 Mon Sep 17 00:00:00 2001 From: JHu Date: Fri, 29 Nov 2024 12:45:05 +1100 Subject: [PATCH 039/145] copy nhalo, mcol, and nhalo_inv tables to device constant memory (verified); add place holders for interp table functions --- src/py21cmfast/src/interp_tables.cu | 167 +++++++++++++++++++++++++++ src/py21cmfast/src/interp_tables.cuh | 23 ++++ 2 files changed, 190 insertions(+) create mode 100644 src/py21cmfast/src/interp_tables.cu create mode 100644 src/py21cmfast/src/interp_tables.cuh diff --git a/src/py21cmfast/src/interp_tables.cu b/src/py21cmfast/src/interp_tables.cu new file mode 100644 index 000000000..d1cc46176 --- /dev/null +++ b/src/py21cmfast/src/interp_tables.cu @@ -0,0 +1,167 @@ +#include + +#include +// #include + +// #include "InputParameters.h" +#include "interpolation_types.h" + +#include "interp_tables.cuh" + +// define relevant variables stored in constant memory +__constant__ RGTable1D d_Nhalo_table; +__constant__ RGTable1D d_Mcoll_table; +__constant__ RGTable2D d_Nhalo_inv_table; + +// specify a max size of yarr +const int device_n_max = 200; +__constant__ double d_Nhalo_yarr[device_n_max]; +__constant__ double d_Mcoll_yarr[device_n_max]; + + +// copy tables to gpu +void copyTablesToDevice(RGTable1D h_Nhalo_table, RGTable1D h_Mcoll_table, RGTable2D h_Nhalo_inv_table) +{ + // copy Nhalo table and its member y_arr + size_t size_Nhalo_yarr = sizeof(double) * h_Nhalo_table.n_bin; + // get a copy of the Nhalo table + RGTable1D h_Nhalo_table_to_device = h_Nhalo_table; + if (h_Nhalo_table.n_bin > device_n_max){ + // double *d_Nhalo_yarr; + // todo: declare device yarr (not using constant) + return; + } + else{ + cudaMemcpyToSymbol(d_Nhalo_yarr, h_Nhalo_table.y_arr, size_Nhalo_yarr, 0, cudaMemcpyHostToDevice); + + h_Nhalo_table_to_device.y_arr = d_Nhalo_yarr; + } + cudaMemcpyToSymbol(d_Nhalo_table, &h_Nhalo_table_to_device, sizeof(RGTable1D), 0, cudaMemcpyHostToDevice); + + // copy Mcoll table and its member y_arr + size_t size_Mcoll_yarr = sizeof(double) * h_Mcoll_table.n_bin; + // get a copy of Mcoll table + RGTable1D h_Mcoll_table_to_device = h_Mcoll_table; + if (h_Mcoll_table.n_bin > device_n_max){ + return; + } + else{ + cudaMemcpyToSymbol(d_Mcoll_yarr, h_Mcoll_table.y_arr, size_Mcoll_yarr, 0, cudaMemcpyHostToDevice); + h_Mcoll_table_to_device.y_arr = d_Mcoll_yarr; + } + cudaMemcpyToSymbol(d_Mcoll_table, &h_Mcoll_table_to_device, sizeof(RGTable1D), 0, cudaMemcpyHostToDevice); + + // copy Nhalo_inv table and its member flatten_data + size_t size_Nhalo_inv_flatten_data = sizeof(double) * h_Nhalo_inv_table.nx_bin * h_Nhalo_inv_table.ny_bin; + // get a copy of Nhalo_inv_table + RGTable2D h_Nhalo_inv_table_to_device = h_Nhalo_inv_table; + + double *d_Nhalo_flatten_data; + cudaMalloc(&d_Nhalo_flatten_data, size_Nhalo_inv_flatten_data); + cudaMemcpy(d_Nhalo_flatten_data, h_Nhalo_inv_table.flatten_data, size_Nhalo_inv_flatten_data, cudaMemcpyHostToDevice); + + double **d_z_arr, **z_arr_to_device; + size_t size_z_arr = sizeof(double *) * h_Nhalo_inv_table.nx_bin; + cudaHostAlloc((void **)&z_arr_to_device, size_z_arr, cudaHostAllocDefault); + // get the address of flatten data on the device + int i; + for (i=0;i= n_bin - 1) + { + return 0.0; // Out-of-bounds handling + } + + double table_val = x_min + x_width * (float)idx; + double interp_point = (x - table_val) / x_width; + + return y_arr[idx] * (1 - interp_point) + y_arr[idx + 1] * (interp_point); +} + +// __device__ double EvaluateNhaloInv(double condition, double prob, double x_min, double x_width, double y_width, double **z_arr, double MIN_LOGPROB) +// { +// if (prob == 0.) +// return 1.; // q == 1 -> condition mass +// double lnp = log(prob); +// if (lnp < user_params_global->MIN_LOGPROB) +// return extrapolate_dNdM_inverse(condition, lnp, x_min, x_width, y_width, z_arr, MIN_LOGPROB); +// return EvaluateRGTable2D(condition, lnp, &Nhalo_inv_table); +// } + +// __device__ double extrapolate_dNdM_inverse(double condition, double lnp) +// { +// double x_min = d_Nhalo_inv_table.x_min; +// double x_width = d_Nhalo_inv_table.x_width; +// int x_idx = (int)floor((condition - x_min) / x_width); +// double x_table = x_min + x_idx * x_width; +// double interp_point_x = (condition - x_table) / x_width; + +// double extrap_point_y = (lnp - user_params_global->MIN_LOGPROB) / d_Nhalo_inv_table.y_width; + +// // find the log-mass at the edge of the table for this condition +// double xlimit = d_Nhalo_inv_table.z_arr[x_idx][0] * (interp_point_x) + d_Nhalo_inv_table.z_arr[x_idx + 1][0] * (1 - interp_point_x); +// double xlimit_m1 = d_Nhalo_inv_table.z_arr[x_idx][1] * (interp_point_x) + d_Nhalo_inv_table.z_arr[x_idx + 1][1] * (1 - interp_point_x); + +// double result = xlimit + (xlimit_m1 - xlimit) * (extrap_point_y); + +// return result; +// } + +// double EvaluateNhaloInv(double condition, double prob) +// { +// if (prob == 0.) +// return 1.; // q == 1 -> condition mass +// double lnp = log(prob); +// if (lnp < user_params_global->MIN_LOGPROB) +// return extrapolate_dNdM_inverse(condition, lnp); +// return EvaluateRGTable2D(condition, lnp, &Nhalo_inv_table); +// } + +// __device__ double extrapolate_dNdM_inverse(double condition, double lnp, double x_min, double x_width, double y_width, double **z_arr, double MIN_LOGPROB) +// { +// int x_idx = (int)floor((condition - x_min) / x_width); +// double x_table = x_min + x_idx * x_width; +// double interp_point_x = (condition - x_table) / x_width; + +// double extrap_point_y = (lnp - MIN_LOGPROB) / y_width; + +// // find the log-mass at the edge of the table for this condition +// double xlimit = z_arr[x_idx][0] * (interp_point_x) + z_arr[x_idx + 1][0] * (1 - interp_point_x); +// double xlimit_m1 = z_arr[x_idx][1] * (interp_point_x) + z_arr[x_idx + 1][1] * (1 - interp_point_x); + +// double result = xlimit + (xlimit_m1 - xlimit) * (extrap_point_y); + +// return result; +// } + +// __device__ double EvaluateMcoll() +// { +// // placeholder +// return 0.0; +// } + +// __device__ double EvaluateNhalo() +// { +// // placeholder +// return 0.0; +// } \ No newline at end of file diff --git a/src/py21cmfast/src/interp_tables.cuh b/src/py21cmfast/src/interp_tables.cuh new file mode 100644 index 000000000..c8823e73e --- /dev/null +++ b/src/py21cmfast/src/interp_tables.cuh @@ -0,0 +1,23 @@ +#ifndef _INTERP_TABLES_CUH +#define _INTERP_TABLES_CUH + +#include "interpolation_types.h" + +#ifdef __CUDA_ARCH__ +__device__ double EvaluateSigma(float x, double x_min, double x_width, float *y_arr, int n_bin); +__device__ double EvaluateNhaloInv(); +__device__ double extrapolate_dNdM_inverse(); +__device__ double EvaluateMcoll(); +__device__ double EvaluateNhalo(); +#endif + +#ifdef __cplusplus +extern "C" +{ +#endif + void copyTablesToDevice(RGTable1D h_Nhalo_table, RGTable1D h_Mcoll_table, RGTable2D h_Nhalo_inv_table); +#ifdef __cplusplus +} +#endif + +#endif From c906eee55e98e18f8f44bfdc84995d601507502c Mon Sep 17 00:00:00 2001 From: JHu Date: Sat, 30 Nov 2024 09:26:54 +1100 Subject: [PATCH 040/145] convert relevant functions in interp_tables to device functions; fix constant memory address errors on device --- src/py21cmfast/src/interp_tables.cu | 129 ++++++++++++--------------- src/py21cmfast/src/interp_tables.cuh | 8 +- 2 files changed, 63 insertions(+), 74 deletions(-) diff --git a/src/py21cmfast/src/interp_tables.cu b/src/py21cmfast/src/interp_tables.cu index d1cc46176..3f9033cbd 100644 --- a/src/py21cmfast/src/interp_tables.cu +++ b/src/py21cmfast/src/interp_tables.cu @@ -7,6 +7,9 @@ #include "interpolation_types.h" #include "interp_tables.cuh" +#include "DeviceConstants.cuh" + +#include "interpolation.cu" // define relevant variables stored in constant memory __constant__ RGTable1D d_Nhalo_table; @@ -33,8 +36,11 @@ void copyTablesToDevice(RGTable1D h_Nhalo_table, RGTable1D h_Mcoll_table, RGTabl } else{ cudaMemcpyToSymbol(d_Nhalo_yarr, h_Nhalo_table.y_arr, size_Nhalo_yarr, 0, cudaMemcpyHostToDevice); - - h_Nhalo_table_to_device.y_arr = d_Nhalo_yarr; + // get memory address on the device + double *d_Nhalo_yarr_device; + cudaGetSymbolAddress((void **)&d_Nhalo_yarr_device, d_Nhalo_yarr); + + h_Nhalo_table_to_device.y_arr = d_Nhalo_yarr_device; } cudaMemcpyToSymbol(d_Nhalo_table, &h_Nhalo_table_to_device, sizeof(RGTable1D), 0, cudaMemcpyHostToDevice); @@ -47,7 +53,11 @@ void copyTablesToDevice(RGTable1D h_Nhalo_table, RGTable1D h_Mcoll_table, RGTabl } else{ cudaMemcpyToSymbol(d_Mcoll_yarr, h_Mcoll_table.y_arr, size_Mcoll_yarr, 0, cudaMemcpyHostToDevice); - h_Mcoll_table_to_device.y_arr = d_Mcoll_yarr; + // get memory address on the device + double *d_Mcoll_yarr_device; + cudaGetSymbolAddress((void **)&d_Mcoll_yarr_device, d_Mcoll_yarr); + + h_Mcoll_table_to_device.y_arr = d_Mcoll_yarr_device; } cudaMemcpyToSymbol(d_Mcoll_table, &h_Mcoll_table_to_device, sizeof(RGTable1D), 0, cudaMemcpyHostToDevice); @@ -98,70 +108,49 @@ __device__ double EvaluateSigma(float x, double x_min, double x_width, float *y_ return y_arr[idx] * (1 - interp_point) + y_arr[idx + 1] * (interp_point); } -// __device__ double EvaluateNhaloInv(double condition, double prob, double x_min, double x_width, double y_width, double **z_arr, double MIN_LOGPROB) -// { -// if (prob == 0.) -// return 1.; // q == 1 -> condition mass -// double lnp = log(prob); -// if (lnp < user_params_global->MIN_LOGPROB) -// return extrapolate_dNdM_inverse(condition, lnp, x_min, x_width, y_width, z_arr, MIN_LOGPROB); -// return EvaluateRGTable2D(condition, lnp, &Nhalo_inv_table); -// } - -// __device__ double extrapolate_dNdM_inverse(double condition, double lnp) -// { -// double x_min = d_Nhalo_inv_table.x_min; -// double x_width = d_Nhalo_inv_table.x_width; -// int x_idx = (int)floor((condition - x_min) / x_width); -// double x_table = x_min + x_idx * x_width; -// double interp_point_x = (condition - x_table) / x_width; - -// double extrap_point_y = (lnp - user_params_global->MIN_LOGPROB) / d_Nhalo_inv_table.y_width; - -// // find the log-mass at the edge of the table for this condition -// double xlimit = d_Nhalo_inv_table.z_arr[x_idx][0] * (interp_point_x) + d_Nhalo_inv_table.z_arr[x_idx + 1][0] * (1 - interp_point_x); -// double xlimit_m1 = d_Nhalo_inv_table.z_arr[x_idx][1] * (interp_point_x) + d_Nhalo_inv_table.z_arr[x_idx + 1][1] * (1 - interp_point_x); - -// double result = xlimit + (xlimit_m1 - xlimit) * (extrap_point_y); - -// return result; -// } - -// double EvaluateNhaloInv(double condition, double prob) -// { -// if (prob == 0.) -// return 1.; // q == 1 -> condition mass -// double lnp = log(prob); -// if (lnp < user_params_global->MIN_LOGPROB) -// return extrapolate_dNdM_inverse(condition, lnp); -// return EvaluateRGTable2D(condition, lnp, &Nhalo_inv_table); -// } - -// __device__ double extrapolate_dNdM_inverse(double condition, double lnp, double x_min, double x_width, double y_width, double **z_arr, double MIN_LOGPROB) -// { -// int x_idx = (int)floor((condition - x_min) / x_width); -// double x_table = x_min + x_idx * x_width; -// double interp_point_x = (condition - x_table) / x_width; - -// double extrap_point_y = (lnp - MIN_LOGPROB) / y_width; - -// // find the log-mass at the edge of the table for this condition -// double xlimit = z_arr[x_idx][0] * (interp_point_x) + z_arr[x_idx + 1][0] * (1 - interp_point_x); -// double xlimit_m1 = z_arr[x_idx][1] * (interp_point_x) + z_arr[x_idx + 1][1] * (1 - interp_point_x); - -// double result = xlimit + (xlimit_m1 - xlimit) * (extrap_point_y); - -// return result; -// } - -// __device__ double EvaluateMcoll() -// { -// // placeholder -// return 0.0; -// } - -// __device__ double EvaluateNhalo() -// { -// // placeholder -// return 0.0; -// } \ No newline at end of file +__device__ double extrapolate_dNdM_inverse(double condition, double lnp) +{ + double x_min = d_Nhalo_inv_table.x_min; + double x_width = d_Nhalo_inv_table.x_width; + // printf("condition: %f; lnp: %f \n", condition, lnp); //tmp + int x_idx = (int)floor((condition - x_min) / x_width); + double x_table = x_min + x_idx * x_width; + double interp_point_x = (condition - x_table) / x_width; + + double extrap_point_y = (lnp - d_user_params.MIN_LOGPROB) / d_Nhalo_inv_table.y_width; + + // find the log-mass at the edge of the table for this condition + double xlimit = d_Nhalo_inv_table.z_arr[x_idx][0] * (interp_point_x) + d_Nhalo_inv_table.z_arr[x_idx + 1][0] * (1 - interp_point_x); + double xlimit_m1 = d_Nhalo_inv_table.z_arr[x_idx][1] * (interp_point_x) + d_Nhalo_inv_table.z_arr[x_idx + 1][1] * (1 - interp_point_x); + + double result = xlimit + (xlimit_m1 - xlimit) * (extrap_point_y); + + return result; +} + +__device__ double EvaluateNhaloInv(double condition, double prob) +{ + if (prob == 0.) + return 1.; // q == 1 -> condition mass + double lnp = log(prob); + if (lnp < d_user_params.MIN_LOGPROB) + return extrapolate_dNdM_inverse(condition, lnp); + return EvaluateRGTable2D(condition, lnp, &d_Nhalo_inv_table); +} + +__device__ double EvaluateMcoll(double condition, double growthf, double lnMmin, double lnMmax, double M_cond, double sigma, double delta) +{ + if (d_user_params.USE_INTERPOLATION_TABLES) + return EvaluateRGTable1D(condition, &d_Mcoll_table); + // todo: implement Mcoll_Conditional + return 0; +} + +__device__ double EvaluateNhalo(double condition, double growthf, double lnMmin, double lnMmax, double M_cond, double sigma, double delta) +{ + if (d_user_params.USE_INTERPOLATION_TABLES) + return EvaluateRGTable1D(condition, &d_Nhalo_table); + // todo: implement Nhalo_Conditional + return 0; +} + diff --git a/src/py21cmfast/src/interp_tables.cuh b/src/py21cmfast/src/interp_tables.cuh index c8823e73e..8616f691e 100644 --- a/src/py21cmfast/src/interp_tables.cuh +++ b/src/py21cmfast/src/interp_tables.cuh @@ -5,10 +5,10 @@ #ifdef __CUDA_ARCH__ __device__ double EvaluateSigma(float x, double x_min, double x_width, float *y_arr, int n_bin); -__device__ double EvaluateNhaloInv(); -__device__ double extrapolate_dNdM_inverse(); -__device__ double EvaluateMcoll(); -__device__ double EvaluateNhalo(); +__device__ double extrapolate_dNdM_inverse(double condition, double lnp); +__device__ double EvaluateNhaloInv(double condition, double prob); +__device__ double EvaluateMcoll(double condition, double growthf, double lnMmin, double lnMmax, double M_cond, double sigma, double delta); +__device__ double EvaluateNhalo(double condition, double growthf, double lnMmin, double lnMmax, double M_cond, double sigma, double delta); #endif #ifdef __cplusplus From a4ad5f7371fb858dd18268fffe31034a7b660be7 Mon Sep 17 00:00:00 2001 From: JHu Date: Sun, 1 Dec 2024 19:00:07 +1100 Subject: [PATCH 041/145] add cuda error checking macro --- src/py21cmfast/src/Stochasticity.cu | 483 ++++++++++++++++++++++++++++ src/py21cmfast/src/cuda_utils.cuh | 18 ++ 2 files changed, 501 insertions(+) create mode 100644 src/py21cmfast/src/Stochasticity.cu create mode 100644 src/py21cmfast/src/cuda_utils.cuh diff --git a/src/py21cmfast/src/Stochasticity.cu b/src/py21cmfast/src/Stochasticity.cu new file mode 100644 index 000000000..3820754cd --- /dev/null +++ b/src/py21cmfast/src/Stochasticity.cu @@ -0,0 +1,483 @@ +#include +#include + +#include +#include // host-side header file +#include // device-side header file + +#include "Constants.h" +#include "interpolation_types.h" +#include "Stochasticity.h" + +// #include "tiger_checks.h" +#include "Stochasticity.cuh" +#include "DeviceConstants.cuh" +#include "hmf.cu" +#include "interp_tables.cu" + + + +// define macros +// #ifndef JENKINS_a +// #define JENKINS_a (0.73) // Jenkins+01, SMT has 0.707 +// #endif + +// #ifndef JENKINS_b +// #define JENKINS_b (0.34) // Jenkins+01 fit from Barkana+01, SMT has 0.5 +// #endif + +// #ifndef JENKINS_c +// #define JENKINS_c (0.81) // Jenkins+01 from from Barkana+01, SMT has 0.6 +// #endif + +#ifndef MAX_DELTAC_FRAC +#define MAX_DELTAC_FRAC (float)0.99 // max delta/deltac for the mass function integrals +#endif + +#ifndef DELTA_MIN +#define DELTA_MIN -1 // minimum delta for Lagrangian mass function integrals +#endif + +#ifndef MAX_HALO_CELL +#define MAX_HALO_CELL (int)1e5 +#endif + +// device functions +// __device__ double sheth_delc_fixed(double del, double sig) +// { +// return sqrt(JENKINS_a) * del * (1. + JENKINS_b * pow(sig * sig / (JENKINS_a * del * del), JENKINS_c)); +// } + +// // Get the relevant excursion set barrier density given the user-specified HMF +// __device__ double get_delta_crit(int HMF, double sigma, double growthf) +// { +// if (HMF == 4) +// return DELTAC_DELOS; +// if (HMF == 1) +// return sheth_delc_fixed(Deltac / growthf, sigma) * growthf; + +// return Deltac; +// } + +// __device__ double EvaluateRGTable1D_f(double x, RGTable1D_f *table) +// { +// double x_min = table->x_min; +// double x_width = table->x_width; +// int idx = (int)floor((x - x_min) / x_width); +// double table_val = x_min + x_width * (float)idx; +// double interp_point = (x - table_val) / x_width; + +// return table->y_arr[idx] * (1 - interp_point) + table->y_arr[idx + 1] * (interp_point); +// } + +// // assume use interpolation table is true at this stage, add the check later +// // todo: double check whether I should use float or double or x, it's been mixed used in c code +// __device__ double EvaluateSigma(float x, double x_min, double x_width, float *y_arr, int n_bin) +// { +// // using log units to make the fast option faster and the slow option slower +// // return EvaluateRGTable1D_f(lnM, table); +// int idx = (int)floor((x - x_min) / x_width); +// if (idx < 0 || idx >= n_bin - 1) +// { +// return 0.0; // Out-of-bounds handling +// } + +// double table_val = x_min + x_width * (float)idx; +// double interp_point = (x - table_val) / x_width; + +// return y_arr[idx] * (1 - interp_point) + y_arr[idx + 1] * (interp_point); +// } + +// double EvaluateRGTable1D(double x, RGTable1D *table) +// { +// double x_min = table->x_min; +// double x_width = table->x_width; +// int idx = (int)floor((x - x_min) / x_width); +// double table_val = x_min + x_width * (double)idx; +// double interp_point = (x - table_val) / x_width; + +// // a + f(a-b) is one fewer operation but less precise +// double result = table->y_arr[idx] * (1 - interp_point) + table->y_arr[idx + 1] * (interp_point); + +// return result; +// } + +// // assume use interpolation table is true at this stage, add the check later +// __device__ double EvaluateNhalo(double condition, double growthf, double lnMmin, double lnMmax, double M_cond, double sigma, double delta) +// { +// return EvaluateRGTable1D(condition, &Nhalo_table); + +// } + +// 11-30: the following implementation works (before using any global params on gpu) +__device__ void stoc_set_consts_cond(struct HaloSamplingConstants *const_struct, float cond_val, int HMF, double x_min, double x_width, float *d_y_arr, int n_bin) +{ + double m_exp, n_exp; + // Here the condition is a mass, volume is the Lagrangian volume and delta_l is set by the + // redshift difference which represents the difference in delta_crit across redshifts + if (const_struct->from_catalog){ + const_struct->M_cond = cond_val; + const_struct->lnM_cond = log(cond_val); + const_struct->sigma_cond = EvaluateSigma(const_struct->lnM_cond, x_min, x_width, d_y_arr, n_bin); //todo: update this function using global tables in constant memory + // mean stellar mass of this halo mass, used for stellar z correlations + const_struct->cond_val = const_struct->lnM_cond; + // condition delta is the previous delta crit + const_struct->delta = get_delta_crit(HMF, const_struct->sigma_cond, const_struct->growth_in) / const_struct->growth_in * const_struct->growth_out; //todo: update this function using global variables in constant memory + } + // Here the condition is a cell of a given density, the volume/mass is given by the grid parameters + else + { + // since the condition mass/sigma is already set all we need is delta + const_struct->delta = cond_val; + const_struct->cond_val = cond_val; + } + // Get expected N and M from interptables + // the splines don't work well for cells above Deltac, but there CAN be cells above deltac, since this calculation happens + // before the overlap, and since the smallest dexm mass is M_cell*(1.01^3) there *could* be a cell above Deltac not in a halo + // NOTE: all this does is prevent integration errors below since these cases are also dealt with in stoc_sample + if (const_struct->delta > MAX_DELTAC_FRAC * get_delta_crit(d_user_params.HMF, const_struct->sigma_cond, const_struct->growth_out)){ + const_struct->expected_M = const_struct->M_cond; + const_struct->expected_N = 1; + } + else if (const_struct->delta <= DELTA_MIN){ + const_struct->expected_M = 0; + const_struct->expected_N = 0; + } + else + { + n_exp = EvaluateNhalo(const_struct->cond_val, const_struct->growth_out, const_struct->lnM_min, + const_struct->lnM_max_tb, const_struct->M_cond, const_struct->sigma_cond, const_struct->delta); + m_exp = EvaluateMcoll(const_struct->cond_val, const_struct->growth_out, const_struct->lnM_min, + const_struct->lnM_max_tb, const_struct->M_cond, const_struct->sigma_cond, const_struct->delta); + const_struct->expected_N = n_exp * const_struct->M_cond; + const_struct->expected_M = m_exp * const_struct->M_cond; + } + return; +} + +__device__ double sample_dndM_inverse(double condition, struct HaloSamplingConstants *hs_constants, curandState *state) +{ + double p_in, result; + p_in = curand_uniform_double(state); + result = EvaluateNhaloInv(condition, p_in); + result = fmin(1.0, fmax(0.0, result)); // clip in case of extrapolation + result = result * hs_constants->M_cond; + return result; +} + +__device__ double remove_random_halo(curandState *state, int n_halo, int *idx, double *M_prog, float *M_out){ + double last_M_del; + int random_idx; + do { + random_idx = (int)(curand_uniform(state) * n_halo); + } while (M_out[random_idx] == 0); + last_M_del = M_out[random_idx]; + *M_prog -= last_M_del; + M_out[random_idx] = 0; // zero mass halos are skipped and not counted + + *idx = random_idx; + return last_M_del; +} + +__device__ void fix_mass_sample(curandState *state, double exp_M, int *n_halo_pt, double *M_tot_pt, float *M_out){ + // Keep the last halo if it brings us closer to the expected mass + // This is done by addition or subtraction over the limit to balance + // the bias of the last halo being larger + int random_idx; + double last_M_del; + int sel = curand(state) % 2; + // bool sel = gsl_rng_uniform_int(rng, 2); + // int sel = 1; + if (sel) + { + if (fabs(*M_tot_pt - M_out[*n_halo_pt - 1] - exp_M) < fabs(*M_tot_pt - exp_M)) + { + *M_tot_pt -= M_out[*n_halo_pt - 1]; + // here we remove by setting the counter one lower so it isn't read + (*n_halo_pt)--; // increment has preference over dereference + } + } + else + { + do { + // here we remove by setting halo mass to zero, skipping it during the consolidation + last_M_del = remove_random_halo(state, *n_halo_pt, &random_idx, M_tot_pt, M_out); + } while (*M_tot_pt > exp_M); + + // if the sample with the last subtracted halo is closer to the expected mass, keep it + // LOG_ULTRA_DEBUG("Deciding to keep last halo M %.3e tot %.3e exp %.3e",last_M_del,*M_tot_pt,exp_M); + if (fabs(*M_tot_pt + last_M_del - exp_M) < fabs(*M_tot_pt - exp_M)) + { + M_out[random_idx] = last_M_del; + *M_tot_pt += last_M_del; + } + } +} + +__device__ int stoc_mass_sample(struct HaloSamplingConstants *hs_constants, curandState *state, int *n_halo_out, float *M_out){ + double exp_M = hs_constants->expected_M; + + // The mass-limited sampling as-is has a slight bias to producing too many halos, + // which is independent of density or halo mass, + // this factor reduces the total expected mass to bring it into line with the CMF + // exp_M *= user_params_global->HALOMASS_CORRECTION; + exp_M *= d_user_params.HALOMASS_CORRECTION; + + int n_halo_sampled = 0; + double M_prog = 0; + double M_sample; + + double tbl_arg = hs_constants->cond_val; + + while (M_prog < exp_M){ + M_sample = sample_dndM_inverse(tbl_arg, hs_constants, state); + + M_prog += M_sample; + M_out[n_halo_sampled++] = M_sample; + } + // The above sample is above the expected mass, by up to 100%. I wish to make the average mass equal to exp_M + fix_mass_sample(state, exp_M, &n_halo_sampled, &M_prog, M_out); + + *n_halo_out = n_halo_sampled; + return 0; +} + +__device__ int stoc_sample(struct HaloSamplingConstants *hs_constants, curandState *state, int *n_halo_out, float *M_out){ + // TODO: really examine the case for number/mass sampling + // The poisson sample fails spectacularly for high delta (from_catalogs or dense cells) + // and excludes the correlation between number and mass (e.g many small halos or few large ones) + // The mass sample underperforms at low exp_M/M_max by excluding stochasticity in the total collapsed fraction + // and excluding larger halos (e.g if exp_M is 0.1*M_max we can effectively never sample the large halos) + // i.e there is some case for a delta cut between these two methods however I have no intuition for the exact levels + + int err; + + // If the expected mass is below our minimum saved mass, don't bother calculating + // NOTE: some of these conditions are redundant with set_consts_cond() + if (hs_constants->delta <= DELTA_MIN || hs_constants->expected_M < d_user_params.SAMPLER_MIN_MASS) + { + *n_halo_out = 0; + return 0; + } + // if delta is above critical, form one big halo + if (hs_constants->delta >= MAX_DELTAC_FRAC * get_delta_crit(d_user_params.HMF, hs_constants->sigma_cond, hs_constants->growth_out)){ + *n_halo_out = 1; + + // Expected mass takes into account potential dexm overlap + M_out[0] = hs_constants->expected_M; + return 0; + } + + // todo: implement callee functions for SAMPLE_METHOD (1,2,3) + // We always use Number-Limited sampling for grid-based cases + if (d_user_params.SAMPLE_METHOD == 1 || !hs_constants->from_catalog) + { + // err = stoc_halo_sample(hs_constants, rng, n_halo_out, M_out); + return 0; + } + else if (d_user_params.SAMPLE_METHOD == 0) + { + err = stoc_mass_sample(hs_constants, state, n_halo_out, M_out); + } + else if (d_user_params.SAMPLE_METHOD == 2) + { + // err = stoc_partition_sample(hs_constants, rng, n_halo_out, M_out); + return 0; + } + else if (d_user_params.SAMPLE_METHOD == 3) + { + // err = stoc_split_sample(hs_constants, rng, n_halo_out, M_out); + return 0; + } + else + { + printf("Invalid sampling method \n"); + return 0; + // todo: check how to throw error in cuda + // LOG_ERROR("Invalid sampling method"); + // Throw(ValueError); + } + if (*n_halo_out > MAX_HALO_CELL) + { + printf("too many halos in conditin, buffer overflow"); + // todo: check how to throw error in cuda + // LOG_ERROR("too many halos in condition, buffer overflow"); + // Throw(ValueError); + } + return err; +} + +// todo: implement condense_sparse_halolist +// // todo: just copied the original function here, need to verify it works with cuda +// __device__ void condense_sparse_halolist(HaloField *halofield, unsigned long long int *istart_threads, unsigned long long int *nhalo_threads) +// { +// int i = 0; +// unsigned long long int count_total = 0; +// for (i = 0; i < user_params_global->N_THREADS; i++) +// { +// memmove(&halofield->halo_masses[count_total], &halofield->halo_masses[istart_threads[i]], sizeof(float) * nhalo_threads[i]); +// memmove(&halofield->star_rng[count_total], &halofield->star_rng[istart_threads[i]], sizeof(float) * nhalo_threads[i]); +// memmove(&halofield->sfr_rng[count_total], &halofield->sfr_rng[istart_threads[i]], sizeof(float) * nhalo_threads[i]); +// memmove(&halofield->xray_rng[count_total], &halofield->xray_rng[istart_threads[i]], sizeof(float) * nhalo_threads[i]); +// memmove(&halofield->halo_coords[3 * count_total], &halofield->halo_coords[3 * istart_threads[i]], sizeof(int) * 3 * nhalo_threads[i]); +// LOG_SUPER_DEBUG("Moved array (start,count) (%llu, %llu) to position %llu", istart_threads[i], nhalo_threads[i], count_total); +// count_total += nhalo_threads[i]; +// } +// halofield->n_halos = count_total; + +// // replace the rest with zeros for clarity +// memset(&halofield->halo_masses[count_total], 0, (halofield->buffer_size - count_total) * sizeof(float)); +// memset(&halofield->halo_coords[3 * count_total], 0, 3 * (halofield->buffer_size - count_total) * sizeof(int)); +// memset(&halofield->star_rng[count_total], 0, (halofield->buffer_size - count_total) * sizeof(float)); +// memset(&halofield->sfr_rng[count_total], 0, (halofield->buffer_size - count_total) * sizeof(float)); +// memset(&halofield->xray_rng[count_total], 0, (halofield->buffer_size - count_total) * sizeof(float)); +// LOG_SUPER_DEBUG("Set %llu elements beyond %llu to zero", halofield->buffer_size - count_total, count_total); +// } + +// todo: implement set_prop_rng +// __device__ void set_prop_rng(gsl_rng *rng, bool from_catalog, double *interp, double *input, double *output) +// { +// double rng_star, rng_sfr, rng_xray; + +// // Correlate properties by interpolating between the sampled and descendant gaussians +// rng_star = astro_params_global->SIGMA_STAR > 0. ? gsl_ran_ugaussian(rng) : 0.; +// rng_sfr = astro_params_global->SIGMA_SFR_LIM > 0. ? gsl_ran_ugaussian(rng) : 0.; +// rng_xray = astro_params_global->SIGMA_LX > 0. ? gsl_ran_ugaussian(rng) : 0.; + +// if (from_catalog) +// { +// // this transforms the sample to one from the multivariate Gaussian, conditioned on the first sample +// rng_star = sqrt(1 - interp[0] * interp[0]) * rng_star + interp[0] * input[0]; +// rng_sfr = sqrt(1 - interp[1] * interp[1]) * rng_sfr + interp[1] * input[1]; +// rng_xray = sqrt(1 - interp[2] * interp[2]) * rng_xray + interp[2] * input[2]; +// } + +// output[0] = rng_star; +// output[1] = rng_sfr; +// output[2] = rng_xray; +// return; +// } + +// kernel function +__global__ void setup_random_states(curandState *d_states, unsigned long long int random_seed){ + // get thread idx + int ind = blockIdx.x * blockDim.x + threadIdx.x; + curand_init(random_seed, ind, 0, &d_states[ind]); +} + +__global__ void update_halo_constants(float *d_halo_masses, float *d_y_arr, double x_min, double x_width, + unsigned long long int n_halos, int n_bin, struct HaloSamplingConstants d_hs_constants, + int HMF, curandState *d_states, + float *d_halo_masses_out, float *star_rng_out, + float *sfr_rng_out, float *xray_rng_out, float *halo_coords_out) +{ + // get thread idx + int ind = blockIdx.x * blockDim.x + threadIdx.x; + if (ind >= n_halos) + { + return; + } + + float M = d_halo_masses[ind]; + + int n_prog; // the value will be updated after calling stoc_sample + + // set condition-dependent variables for sampling + stoc_set_consts_cond(&d_hs_constants, M, HMF, x_min, x_width, d_y_arr, n_bin); + + // tmp: just to verify the tables have been copied correctly + if (ind == 0) + { + printf("The first element of Nhalo y_arr: %e (%e) \n", d_Nhalo_yarr[0], d_Nhalo_table.y_arr[0]); + printf("The nhalo table n_bin: %d\n", d_Nhalo_table.n_bin); + printf("The nhalo_inv table nx_bin: %d\n", d_Nhalo_inv_table.nx_bin); + printf("HII_DIM: %d \n", d_user_params.HII_DIM); + printf("test params: %f \n", d_test_params); + printf("A_VCB: %f \n", d_astro_params.A_VCB); + printf("SIGMA_8: %f \n", d_cosmo_params.SIGMA_8); + } + + // todo: each thread across different blocks has unique random state + // curand_init(seed, threadIdx.x, 0, &d_states[threadIdx.x]); + // curandState local_state = d_states[threadIdx.x]; + curandState local_state = d_states[ind]; + // tmp: for validation only + // sample_dndM_inverse(0.38, &d_hs_constants, &local_state); + // int tmp1 = 20; + // double tmp2 = 681273355217.0; + // float tmp3 = 101976856.0; + // remove_random_halo(&local_state, 59, &tmp1, &tmp2, &tmp3); + + d_states[ind] = local_state; + + // Sample the CMF set by the descendant + // stoc_sample(&hs_constants, &local_state, &n_prog, prog_buf); + + // double sigma = EvaluateSigma(log(M), x_min, x_width, d_y_arr, n_bin); + // double delta = get_delta_crit(HMF, sigma, d_hs_constants.growth_in)\ + // / d_hs_constants.growth_in * d_hs_constants.growth_out; + + return; +} + +// function to launch kernel grids +int updateHaloOut(float *halo_masses, unsigned long long int n_halos, float *y_arr, int n_bin_y, double x_min, double x_width, + struct HaloSamplingConstants hs_constants, unsigned long long int n_buffer) +{ + // allocate memory and copy halo_masses to the device + size_t size_halo = sizeof(float) * n_halos; + float *d_halo_masses; + cudaMalloc(&d_halo_masses, size_halo); + cudaMemcpy(d_halo_masses, halo_masses, size_halo, cudaMemcpyHostToDevice); + + // allocate memory and copy y_arr of sigma_table to the device + size_t size_yarr = sizeof(float) * n_bin_y; + float *d_y_arr; + cudaMalloc(&d_y_arr, size_yarr); + cudaMemcpy(d_y_arr, y_arr, size_yarr, cudaMemcpyHostToDevice); + + // allocate memory for out halos + size_t buffer_size = sizeof(float) * n_buffer; + float *d_halo_masses_out; + cudaMalloc(&d_halo_masses_out, buffer_size); + + float *star_rng_out; + cudaMalloc(&star_rng_out, buffer_size); + + float *sfr_rng_out; + cudaMalloc(&sfr_rng_out, buffer_size); + + float *xray_rng_out; + cudaMalloc(&xray_rng_out, buffer_size); + + float *halo_coords_out; + cudaMalloc(&halo_coords_out, buffer_size * 3); + + // get parameters needed by the kernel + int HMF = user_params_global->HMF; + + // define threads layout + int n_threads = 256; + int n_blocks = (int)((n_halos + 255) / 256); + int total_threads = n_threads * n_blocks; + + // Allocate memory for RNG states + curandState *d_states; + cudaMalloc((void **)&d_states, total_threads * sizeof(curandState)); + + // setup random states + setup_random_states<<>>(d_states, 1234ULL); + + // launch kernel grid + update_halo_constants<<>>(d_halo_masses, d_y_arr, x_min, x_width, n_halos, n_bin_y, hs_constants, HMF, d_states, d_halo_masses_out, star_rng_out, + sfr_rng_out, xray_rng_out, halo_coords_out); + + cudaDeviceSynchronize(); + + // Free device memory + cudaFree(d_halo_masses); + cudaFree(d_y_arr); + cudaFree(d_states); + + return 0; +} diff --git a/src/py21cmfast/src/cuda_utils.cuh b/src/py21cmfast/src/cuda_utils.cuh new file mode 100644 index 000000000..57e596cb9 --- /dev/null +++ b/src/py21cmfast/src/cuda_utils.cuh @@ -0,0 +1,18 @@ +#ifndef _CUDA_UTILS_CUH +#define _CUDA_UTILS_CUH + +#include +#include + +#define CALL_CUDA(x) \ + do \ + { \ + cudaError_t err = (x); \ + if (err != cudaSuccess) \ + { \ + printf("Error %s at %s:%d\n", cudaGetErrorString(err), __FILE__, __LINE__); \ + exit(EXIT_FAILURE); \ + } \ + } while (0) + +#endif \ No newline at end of file From f437ada731bc81ecf9b3a08b2902f8e60b6cc5ff Mon Sep 17 00:00:00 2001 From: JHu Date: Sun, 1 Dec 2024 20:37:36 +1100 Subject: [PATCH 042/145] add error check for cuda runtime functions --- src/py21cmfast/src/Stochasticity.cu | 112 +++++++++++++++++++++------- src/py21cmfast/src/interp_tables.cu | 30 ++++---- 2 files changed, 100 insertions(+), 42 deletions(-) diff --git a/src/py21cmfast/src/Stochasticity.cu b/src/py21cmfast/src/Stochasticity.cu index 3820754cd..e297a0bf6 100644 --- a/src/py21cmfast/src/Stochasticity.cu +++ b/src/py21cmfast/src/Stochasticity.cu @@ -10,6 +10,7 @@ #include "Stochasticity.h" // #include "tiger_checks.h" +#include "cuda_utils.cuh" #include "Stochasticity.cuh" #include "DeviceConstants.cuh" #include "hmf.cu" @@ -229,16 +230,29 @@ __device__ int stoc_mass_sample(struct HaloSamplingConstants *hs_constants, cura double tbl_arg = hs_constants->cond_val; - while (M_prog < exp_M){ - M_sample = sample_dndM_inverse(tbl_arg, hs_constants, state); + // tmp (start) + M_sample = sample_dndM_inverse(tbl_arg, hs_constants, state); - M_prog += M_sample; - M_out[n_halo_sampled++] = M_sample; - } + M_prog += M_sample; + *M_out = M_sample; + + + // tmp (end) + + // while (M_prog < exp_M){ + // M_sample = sample_dndM_inverse(tbl_arg, hs_constants, state); + + // M_prog += M_sample; + // M_out[n_halo_sampled++] = M_sample; + // } + // todo: enable fix_mass_sample // The above sample is above the expected mass, by up to 100%. I wish to make the average mass equal to exp_M - fix_mass_sample(state, exp_M, &n_halo_sampled, &M_prog, M_out); + // fix_mass_sample(state, exp_M, &n_halo_sampled, &M_prog, M_out); *n_halo_out = n_halo_sampled; + if (M_prog < exp_M){ + return 1; + } return 0; } @@ -369,9 +383,13 @@ __global__ void update_halo_constants(float *d_halo_masses, float *d_y_arr, doub unsigned long long int n_halos, int n_bin, struct HaloSamplingConstants d_hs_constants, int HMF, curandState *d_states, float *d_halo_masses_out, float *star_rng_out, - float *sfr_rng_out, float *xray_rng_out, float *halo_coords_out) + float *sfr_rng_out, float *xray_rng_out, float *halo_coords_out, int *d_sum_check) { + // Define shared memory for block-level reduction + // __shared__ int shared_check[256]; + // get thread idx + int tid = threadIdx.x; int ind = blockIdx.x * blockDim.x + threadIdx.x; if (ind >= n_halos) { @@ -407,14 +425,33 @@ __global__ void update_halo_constants(float *d_halo_masses, float *d_y_arr, doub // double tmp2 = 681273355217.0; // float tmp3 = 101976856.0; // remove_random_halo(&local_state, 59, &tmp1, &tmp2, &tmp3); - + // int check = stoc_sample(&d_hs_constants, &local_state, &n_prog, &d_halo_masses_out[ind]); d_states[ind] = local_state; - // Sample the CMF set by the descendant - // stoc_sample(&hs_constants, &local_state, &n_prog, prog_buf); - - // double sigma = EvaluateSigma(log(M), x_min, x_width, d_y_arr, n_bin); - // double delta = get_delta_crit(HMF, sigma, d_hs_constants.growth_in)\ + // shared_check[tid] = check; + // __syncthreads(); + + // Perform reduction within the block + // for (int stride = blockDim.x / 2; stride > 0; stride /= 2) + // { + // if (tid < stride) + // { + // shared_check[tid] += shared_check[tid + stride]; + // } + // __syncthreads(); // Ensure all threads have completed each stage of reduction + // } + + // Write the result from each block to the global sum + // if (tid == 0) + // { + // atomicAdd(d_sum_check, shared_check[0]); + // } + + // Sample the CMF set by the descendant + // stoc_sample(&hs_constants, &local_state, &n_prog, prog_buf); + + // double sigma = EvaluateSigma(log(M), x_min, x_width, d_y_arr, n_bin); + // double delta = get_delta_crit(HMF, sigma, d_hs_constants.growth_in)\ // / d_hs_constants.growth_in * d_hs_constants.growth_out; return; @@ -427,31 +464,36 @@ int updateHaloOut(float *halo_masses, unsigned long long int n_halos, float *y_a // allocate memory and copy halo_masses to the device size_t size_halo = sizeof(float) * n_halos; float *d_halo_masses; - cudaMalloc(&d_halo_masses, size_halo); - cudaMemcpy(d_halo_masses, halo_masses, size_halo, cudaMemcpyHostToDevice); + CALL_CUDA(cudaMalloc(&d_halo_masses, size_halo)); + CALL_CUDA(cudaMemcpy(d_halo_masses, halo_masses, size_halo, cudaMemcpyHostToDevice)); // allocate memory and copy y_arr of sigma_table to the device size_t size_yarr = sizeof(float) * n_bin_y; float *d_y_arr; - cudaMalloc(&d_y_arr, size_yarr); - cudaMemcpy(d_y_arr, y_arr, size_yarr, cudaMemcpyHostToDevice); + CALL_CUDA(cudaMalloc(&d_y_arr, size_yarr)); + CALL_CUDA(cudaMemcpy(d_y_arr, y_arr, size_yarr, cudaMemcpyHostToDevice)); + + // allocate memory for d_check_sum (tmp) + int *d_sum_check; + CALL_CUDA(cudaMalloc((void **)&d_sum_check, sizeof(int))); + CALL_CUDA(cudaMemset(d_sum_check, 0, sizeof(int))); // allocate memory for out halos size_t buffer_size = sizeof(float) * n_buffer; float *d_halo_masses_out; - cudaMalloc(&d_halo_masses_out, buffer_size); + CALL_CUDA(cudaMalloc(&d_halo_masses_out, buffer_size)); float *star_rng_out; - cudaMalloc(&star_rng_out, buffer_size); + CALL_CUDA(cudaMalloc(&star_rng_out, buffer_size)); float *sfr_rng_out; - cudaMalloc(&sfr_rng_out, buffer_size); + CALL_CUDA(cudaMalloc(&sfr_rng_out, buffer_size)); float *xray_rng_out; - cudaMalloc(&xray_rng_out, buffer_size); + CALL_CUDA(cudaMalloc(&xray_rng_out, buffer_size)); float *halo_coords_out; - cudaMalloc(&halo_coords_out, buffer_size * 3); + CALL_CUDA(cudaMalloc(&halo_coords_out, buffer_size * 3)); // get parameters needed by the kernel int HMF = user_params_global->HMF; @@ -463,21 +505,35 @@ int updateHaloOut(float *halo_masses, unsigned long long int n_halos, float *y_a // Allocate memory for RNG states curandState *d_states; - cudaMalloc((void **)&d_states, total_threads * sizeof(curandState)); + CALL_CUDA(cudaMalloc((void **)&d_states, total_threads * sizeof(curandState))); // setup random states setup_random_states<<>>(d_states, 1234ULL); + // Check kernel launch errors + CALL_CUDA(cudaGetLastError()); + // launch kernel grid update_halo_constants<<>>(d_halo_masses, d_y_arr, x_min, x_width, n_halos, n_bin_y, hs_constants, HMF, d_states, d_halo_masses_out, star_rng_out, - sfr_rng_out, xray_rng_out, halo_coords_out); + sfr_rng_out, xray_rng_out, halo_coords_out, d_sum_check); + + // Check kernel launch errors + CALL_CUDA(cudaGetLastError()); + + CALL_CUDA(cudaDeviceSynchronize()); + + // copy data from device to host + int h_sum_check; + CALL_CUDA(cudaMemcpy(&h_sum_check, d_sum_check, sizeof(int), cudaMemcpyDeviceToHost)); - cudaDeviceSynchronize(); + // float *h_halo_masses_out; + // cudaHostAlloc((void **)&h_halo_masses_out, buffer_size, cudaHostAllocDefault); + // cudaMemcpy(&h_halo_masses_out, d_halo_masses_out, buffer_size, cudaMemcpyDeviceToHost); // Free device memory - cudaFree(d_halo_masses); - cudaFree(d_y_arr); - cudaFree(d_states); + CALL_CUDA(cudaFree(d_halo_masses)); + CALL_CUDA(cudaFree(d_y_arr)); + CALL_CUDA(cudaFree(d_states)); return 0; } diff --git a/src/py21cmfast/src/interp_tables.cu b/src/py21cmfast/src/interp_tables.cu index 3f9033cbd..c47013c4a 100644 --- a/src/py21cmfast/src/interp_tables.cu +++ b/src/py21cmfast/src/interp_tables.cu @@ -6,6 +6,7 @@ // #include "InputParameters.h" #include "interpolation_types.h" +#include "cuda_utils.cuh" #include "interp_tables.cuh" #include "DeviceConstants.cuh" @@ -35,14 +36,14 @@ void copyTablesToDevice(RGTable1D h_Nhalo_table, RGTable1D h_Mcoll_table, RGTabl return; } else{ - cudaMemcpyToSymbol(d_Nhalo_yarr, h_Nhalo_table.y_arr, size_Nhalo_yarr, 0, cudaMemcpyHostToDevice); + CALL_CUDA(cudaMemcpyToSymbol(d_Nhalo_yarr, h_Nhalo_table.y_arr, size_Nhalo_yarr, 0, cudaMemcpyHostToDevice)); // get memory address on the device double *d_Nhalo_yarr_device; - cudaGetSymbolAddress((void **)&d_Nhalo_yarr_device, d_Nhalo_yarr); + CALL_CUDA(cudaGetSymbolAddress((void **)&d_Nhalo_yarr_device, d_Nhalo_yarr)); h_Nhalo_table_to_device.y_arr = d_Nhalo_yarr_device; } - cudaMemcpyToSymbol(d_Nhalo_table, &h_Nhalo_table_to_device, sizeof(RGTable1D), 0, cudaMemcpyHostToDevice); + CALL_CUDA(cudaMemcpyToSymbol(d_Nhalo_table, &h_Nhalo_table_to_device, sizeof(RGTable1D), 0, cudaMemcpyHostToDevice)); // copy Mcoll table and its member y_arr size_t size_Mcoll_yarr = sizeof(double) * h_Mcoll_table.n_bin; @@ -52,14 +53,14 @@ void copyTablesToDevice(RGTable1D h_Nhalo_table, RGTable1D h_Mcoll_table, RGTabl return; } else{ - cudaMemcpyToSymbol(d_Mcoll_yarr, h_Mcoll_table.y_arr, size_Mcoll_yarr, 0, cudaMemcpyHostToDevice); + CALL_CUDA(cudaMemcpyToSymbol(d_Mcoll_yarr, h_Mcoll_table.y_arr, size_Mcoll_yarr, 0, cudaMemcpyHostToDevice)); // get memory address on the device double *d_Mcoll_yarr_device; - cudaGetSymbolAddress((void **)&d_Mcoll_yarr_device, d_Mcoll_yarr); + CALL_CUDA(cudaGetSymbolAddress((void **)&d_Mcoll_yarr_device, d_Mcoll_yarr)); h_Mcoll_table_to_device.y_arr = d_Mcoll_yarr_device; } - cudaMemcpyToSymbol(d_Mcoll_table, &h_Mcoll_table_to_device, sizeof(RGTable1D), 0, cudaMemcpyHostToDevice); + CALL_CUDA(cudaMemcpyToSymbol(d_Mcoll_table, &h_Mcoll_table_to_device, sizeof(RGTable1D), 0, cudaMemcpyHostToDevice)); // copy Nhalo_inv table and its member flatten_data size_t size_Nhalo_inv_flatten_data = sizeof(double) * h_Nhalo_inv_table.nx_bin * h_Nhalo_inv_table.ny_bin; @@ -67,27 +68,28 @@ void copyTablesToDevice(RGTable1D h_Nhalo_table, RGTable1D h_Mcoll_table, RGTabl RGTable2D h_Nhalo_inv_table_to_device = h_Nhalo_inv_table; double *d_Nhalo_flatten_data; - cudaMalloc(&d_Nhalo_flatten_data, size_Nhalo_inv_flatten_data); - cudaMemcpy(d_Nhalo_flatten_data, h_Nhalo_inv_table.flatten_data, size_Nhalo_inv_flatten_data, cudaMemcpyHostToDevice); + CALL_CUDA(cudaMalloc(&d_Nhalo_flatten_data, size_Nhalo_inv_flatten_data)); + CALL_CUDA(cudaMemcpy(d_Nhalo_flatten_data, h_Nhalo_inv_table.flatten_data, size_Nhalo_inv_flatten_data, cudaMemcpyHostToDevice)); double **d_z_arr, **z_arr_to_device; size_t size_z_arr = sizeof(double *) * h_Nhalo_inv_table.nx_bin; - cudaHostAlloc((void **)&z_arr_to_device, size_z_arr, cudaHostAllocDefault); + CALL_CUDA(cudaHostAlloc((void **)&z_arr_to_device, size_z_arr, cudaHostAllocDefault)); // get the address of flatten data on the device int i; for (i=0;i Date: Mon, 2 Dec 2024 05:28:47 +1100 Subject: [PATCH 043/145] update cffi build --- build_cffi.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/build_cffi.py b/build_cffi.py index e2bfac87e..d9fccc1ea 100755 --- a/build_cffi.py +++ b/build_cffi.py @@ -27,6 +27,12 @@ if f.endswith(".c") ] +# compiled cuda code +extra_objects = [os.path.join(CLOC, "hello_world.o"), os.path.join(CLOC, "filtering_cuda.o"), os.path.join(CLOC, "Stochasticity_cuda.o") + , os.path.join(CLOC, "HaloField_cuda.o"), os.path.join(CLOC, "combined_cuda.o")] + # os.path.join(CLOC, "interp_tables_cuda.o")] +extra_link_args = ["-lcudart", "-lcudadevrt"] + # Set the C-code logging level. # If DEBUG is set, we default to the highest level, but if not, # we set it to the level just above no logging at all. @@ -78,6 +84,7 @@ libraries += ["profiler"] # we need this even if DEBUG is off extra_compile_args += ["-g"] + # extra_compile_args += ["-g", "-pg"] if compiler == "clang": libraries += ["omp"] @@ -111,6 +118,8 @@ library_dirs=library_dirs, libraries=libraries, extra_compile_args=extra_compile_args, + extra_objects=extra_objects, + extra_link_args=extra_link_args, ) # Header files containing types, globals and function prototypes @@ -130,4 +139,4 @@ ) if __name__ == "__main__": - ffi.compile() + ffi.compile(verbose=False) From 16550d47ac378c44b0225b19cd9bd7732666068b Mon Sep 17 00:00:00 2001 From: JHu Date: Mon, 2 Dec 2024 08:42:03 +1100 Subject: [PATCH 044/145] add filter function using thrust --- src/py21cmfast/src/Stochasticity.cu | 287 ++++++++++++++++++---------- 1 file changed, 190 insertions(+), 97 deletions(-) diff --git a/src/py21cmfast/src/Stochasticity.cu b/src/py21cmfast/src/Stochasticity.cu index e297a0bf6..6e39d7d99 100644 --- a/src/py21cmfast/src/Stochasticity.cu +++ b/src/py21cmfast/src/Stochasticity.cu @@ -5,6 +5,13 @@ #include // host-side header file #include // device-side header file +#include +#include +#include +#include +#include +#include + #include "Constants.h" #include "interpolation_types.h" #include "Stochasticity.h" @@ -18,18 +25,6 @@ -// define macros -// #ifndef JENKINS_a -// #define JENKINS_a (0.73) // Jenkins+01, SMT has 0.707 -// #endif - -// #ifndef JENKINS_b -// #define JENKINS_b (0.34) // Jenkins+01 fit from Barkana+01, SMT has 0.5 -// #endif - -// #ifndef JENKINS_c -// #define JENKINS_c (0.81) // Jenkins+01 from from Barkana+01, SMT has 0.6 -// #endif #ifndef MAX_DELTAC_FRAC #define MAX_DELTAC_FRAC (float)0.99 // max delta/deltac for the mass function integrals @@ -43,72 +38,94 @@ #define MAX_HALO_CELL (int)1e5 #endif -// device functions -// __device__ double sheth_delc_fixed(double del, double sig) -// { -// return sqrt(JENKINS_a) * del * (1. + JENKINS_b * pow(sig * sig / (JENKINS_a * del * del), JENKINS_c)); -// } - -// // Get the relevant excursion set barrier density given the user-specified HMF -// __device__ double get_delta_crit(int HMF, double sigma, double growthf) -// { -// if (HMF == 4) -// return DELTAC_DELOS; -// if (HMF == 1) -// return sheth_delc_fixed(Deltac / growthf, sigma) * growthf; +void validate_thrust() +{ + // Create a host vector with some values + thrust::host_vector h_vec(5); + h_vec[0] = 1; + h_vec[1] = 2; + h_vec[2] = 3; + h_vec[3] = 4; + h_vec[4] = 5; + + // Transfer data from host to device + thrust::device_vector d_vec = h_vec; + + // Calculate the sum of all elements in the device vector + int sum = thrust::reduce(d_vec.begin(), d_vec.end(), 0, thrust::plus()); + + // Print the result + std::cout << "Sum is: " << sum << std::endl; // Should print "Sum is: 15" +} -// return Deltac; -// } +void condense_device_vector() +{ + // Step 1: Create a device vector with some elements, including -1 + thrust::device_vector d_vec(10); + d_vec[0] = 1; + d_vec[1] = -1; + d_vec[2] = 3; + d_vec[3] = -1; + d_vec[4] = 5; + d_vec[5] = 6; + d_vec[6] = -1; + d_vec[7] = 7; + d_vec[8] = -1; + d_vec[9] = 9; + + // Step 2: Use thrust::remove_if to remove all occurrences of -1 + thrust::device_vector::iterator new_end = thrust::remove(d_vec.begin(), d_vec.end(), -1); + + // Step 3: Resize the vector to remove the trailing elements after the "new_end" iterator + d_vec.erase(new_end, d_vec.end()); + + // Step 4: Copy the result to the host to check + thrust::host_vector h_vec = d_vec; + + // Step 5: Print the result + std::cout << "Condensed Vector: "; + for (size_t i = 0; i < h_vec.size(); i++) + { + std::cout << h_vec[i] << " "; + } + std::cout << std::endl; +} -// __device__ double EvaluateRGTable1D_f(double x, RGTable1D_f *table) -// { -// double x_min = table->x_min; -// double x_width = table->x_width; -// int idx = (int)floor((x - x_min) / x_width); -// double table_val = x_min + x_width * (float)idx; -// double interp_point = (x - table_val) / x_width; +int condenseDeviceArray(int *d_array, int original_size) +{ + // Wrap the raw device pointer into a thrust device pointer + thrust::device_ptr d_array_ptr(d_array); -// return table->y_arr[idx] * (1 - interp_point) + table->y_arr[idx + 1] * (interp_point); -// } + // Remove elements with value 0 + // thrust::device_vector::iterator new_end = thrust::remove(d_array_ptr, d_array_ptr + original_size, 0); + // thrust::device_ptr new_end = thrust::remove(d_array_ptr, d_array_ptr + original_size, 0); + auto new_end = thrust::remove(d_array_ptr, d_array_ptr + original_size, 0); -// // assume use interpolation table is true at this stage, add the check later -// // todo: double check whether I should use float or double or x, it's been mixed used in c code -// __device__ double EvaluateSigma(float x, double x_min, double x_width, float *y_arr, int n_bin) -// { -// // using log units to make the fast option faster and the slow option slower -// // return EvaluateRGTable1D_f(lnM, table); -// int idx = (int)floor((x - x_min) / x_width); -// if (idx < 0 || idx >= n_bin - 1) -// { -// return 0.0; // Out-of-bounds handling -// } + // Calculate the number of valid elements + int valid_size = new_end - d_array_ptr; -// double table_val = x_min + x_width * (float)idx; -// double interp_point = (x - table_val) / x_width; + // Print results (on host side) + std::cout << "Valid elements count: " << valid_size << "\n"; + return valid_size; +} -// return y_arr[idx] * (1 - interp_point) + y_arr[idx + 1] * (interp_point); -// } +int filterWithMask(float *d_data, int *d_mask, int original_size) +{ + // Wrap the raw pointers into thrust device pointers + thrust::device_ptr d_data_ptr(d_data); + thrust::device_ptr d_mask_ptr(d_mask); -// double EvaluateRGTable1D(double x, RGTable1D *table) -// { -// double x_min = table->x_min; -// double x_width = table->x_width; -// int idx = (int)floor((x - x_min) / x_width); -// double table_val = x_min + x_width * (double)idx; -// double interp_point = (x - table_val) / x_width; + // Use the mask to select only elements that correspond to a value of 1 in the mask + auto end = thrust::copy_if(d_data_ptr, d_data_ptr + original_size, d_mask_ptr, d_data_ptr, thrust::identity()); -// // a + f(a-b) is one fewer operation but less precise -// double result = table->y_arr[idx] * (1 - interp_point) + table->y_arr[idx + 1] * (interp_point); + // Calculate the new valid size after filtering + int valid_size = end - d_data_ptr; -// return result; -// } + // Optionally, print the number of valid elements + std::cout << "Valid elements count: " << valid_size << "\n"; -// // assume use interpolation table is true at this stage, add the check later -// __device__ double EvaluateNhalo(double condition, double growthf, double lnMmin, double lnMmax, double M_cond, double sigma, double delta) -// { -// return EvaluateRGTable1D(condition, &Nhalo_table); - -// } + return valid_size; +} // 11-30: the following implementation works (before using any global params on gpu) __device__ void stoc_set_consts_cond(struct HaloSamplingConstants *const_struct, float cond_val, int HMF, double x_min, double x_width, float *d_y_arr, int n_bin) @@ -215,7 +232,7 @@ __device__ void fix_mass_sample(curandState *state, double exp_M, int *n_halo_pt } } -__device__ int stoc_mass_sample(struct HaloSamplingConstants *hs_constants, curandState *state, int *n_halo_out, float *M_out){ +__device__ int stoc_mass_sample(struct HaloSamplingConstants *hs_constants, curandState *state, int *n_halo_out, float *M_out, int *further_process){ double exp_M = hs_constants->expected_M; // The mass-limited sampling as-is has a slight bias to producing too many halos, @@ -234,9 +251,6 @@ __device__ int stoc_mass_sample(struct HaloSamplingConstants *hs_constants, cura M_sample = sample_dndM_inverse(tbl_arg, hs_constants, state); M_prog += M_sample; - *M_out = M_sample; - - // tmp (end) // while (M_prog < exp_M){ @@ -251,12 +265,14 @@ __device__ int stoc_mass_sample(struct HaloSamplingConstants *hs_constants, cura *n_halo_out = n_halo_sampled; if (M_prog < exp_M){ + *further_process = 1; return 1; } + *M_out = M_sample; return 0; } -__device__ int stoc_sample(struct HaloSamplingConstants *hs_constants, curandState *state, int *n_halo_out, float *M_out){ +__device__ int stoc_sample(struct HaloSamplingConstants *hs_constants, curandState *state, int *n_halo_out, float *M_out, int *further_process){ // TODO: really examine the case for number/mass sampling // The poisson sample fails spectacularly for high delta (from_catalogs or dense cells) // and excludes the correlation between number and mass (e.g many small halos or few large ones) @@ -291,7 +307,7 @@ __device__ int stoc_sample(struct HaloSamplingConstants *hs_constants, curandSta } else if (d_user_params.SAMPLE_METHOD == 0) { - err = stoc_mass_sample(hs_constants, state, n_halo_out, M_out); + err = stoc_mass_sample(hs_constants, state, n_halo_out, M_out, further_process); } else if (d_user_params.SAMPLE_METHOD == 2) { @@ -383,10 +399,11 @@ __global__ void update_halo_constants(float *d_halo_masses, float *d_y_arr, doub unsigned long long int n_halos, int n_bin, struct HaloSamplingConstants d_hs_constants, int HMF, curandState *d_states, float *d_halo_masses_out, float *star_rng_out, - float *sfr_rng_out, float *xray_rng_out, float *halo_coords_out, int *d_sum_check) + float *sfr_rng_out, float *xray_rng_out, float *halo_coords_out, int *d_sum_check, + int *further_process) { // Define shared memory for block-level reduction - // __shared__ int shared_check[256]; + __shared__ int shared_check[256]; // get thread idx int tid = threadIdx.x; @@ -425,27 +442,27 @@ __global__ void update_halo_constants(float *d_halo_masses, float *d_y_arr, doub // double tmp2 = 681273355217.0; // float tmp3 = 101976856.0; // remove_random_halo(&local_state, 59, &tmp1, &tmp2, &tmp3); - // int check = stoc_sample(&d_hs_constants, &local_state, &n_prog, &d_halo_masses_out[ind]); + int check = stoc_sample(&d_hs_constants, &local_state, &n_prog, &d_halo_masses_out[ind], &further_process[ind]); d_states[ind] = local_state; - // shared_check[tid] = check; - // __syncthreads(); + shared_check[tid] = check; + __syncthreads(); // Perform reduction within the block - // for (int stride = blockDim.x / 2; stride > 0; stride /= 2) - // { - // if (tid < stride) - // { - // shared_check[tid] += shared_check[tid + stride]; - // } - // __syncthreads(); // Ensure all threads have completed each stage of reduction - // } + for (int stride = blockDim.x / 2; stride > 0; stride /= 2) + { + if (tid < stride) + { + shared_check[tid] += shared_check[tid + stride]; + } + __syncthreads(); // Ensure all threads have completed each stage of reduction + } // Write the result from each block to the global sum - // if (tid == 0) - // { - // atomicAdd(d_sum_check, shared_check[0]); - // } + if (tid == 0) + { + atomicAdd(d_sum_check, shared_check[0]); + } // Sample the CMF set by the descendant // stoc_sample(&hs_constants, &local_state, &n_prog, prog_buf); @@ -457,6 +474,58 @@ __global__ void update_halo_constants(float *d_halo_masses, float *d_y_arr, doub return; } +__global__ void update_halo_constants_multi(float *d_halo_masses, float *d_y_arr, double x_min, double x_width, + unsigned long long int n_halos, int n_bin, struct HaloSamplingConstants d_hs_constants, + int HMF, curandState *d_states, + float *d_halo_masses_out, float *star_rng_out, + float *sfr_rng_out, float *xray_rng_out, float *halo_coords_out, int *d_sum_check, + int *further_process) +{ + // Define shared memory for block-level reduction + __shared__ int shared_check[256]; + + // get thread idx + int tid = threadIdx.x; + int ind = blockIdx.x * blockDim.x + threadIdx.x; + if (ind >= n_halos) + { + return; + } + + float M = d_halo_masses[ind]; + + int n_prog; // the value will be updated after calling stoc_sample + + // set condition-dependent variables for sampling + stoc_set_consts_cond(&d_hs_constants, M, HMF, x_min, x_width, d_y_arr, n_bin); + + // todo: each thread across different blocks has unique random state + curandState local_state = d_states[ind]; + int check = stoc_sample(&d_hs_constants, &local_state, &n_prog, &d_halo_masses_out[ind], &further_process[ind]); + d_states[ind] = local_state; + + shared_check[tid] = check; + __syncthreads(); + + // Perform reduction within the block + for (int stride = blockDim.x / 2; stride > 0; stride /= 2) + { + if (tid < stride) + { + shared_check[tid] += shared_check[tid + stride]; + } + __syncthreads(); // Ensure all threads have completed each stage of reduction + } + + // Write the result from each block to the global sum + if (tid == 0) + { + atomicAdd(d_sum_check, shared_check[0]); + } + + return; +} + // function to launch kernel grids int updateHaloOut(float *halo_masses, unsigned long long int n_halos, float *y_arr, int n_bin_y, double x_min, double x_width, struct HaloSamplingConstants hs_constants, unsigned long long int n_buffer) @@ -479,7 +548,7 @@ int updateHaloOut(float *halo_masses, unsigned long long int n_halos, float *y_a CALL_CUDA(cudaMemset(d_sum_check, 0, sizeof(int))); // allocate memory for out halos - size_t buffer_size = sizeof(float) * n_buffer; + size_t buffer_size = sizeof(float) * n_buffer * 2; float *d_halo_masses_out; CALL_CUDA(cudaMalloc(&d_halo_masses_out, buffer_size)); @@ -495,6 +564,10 @@ int updateHaloOut(float *halo_masses, unsigned long long int n_halos, float *y_a float *halo_coords_out; CALL_CUDA(cudaMalloc(&halo_coords_out, buffer_size * 3)); + // allocate memory to store list of halo index need further process + int *d_further_process; + CALL_CUDA(cudaMalloc(&d_further_process, size_halo)); + // get parameters needed by the kernel int HMF = user_params_global->HMF; @@ -515,25 +588,45 @@ int updateHaloOut(float *halo_masses, unsigned long long int n_halos, float *y_a // launch kernel grid update_halo_constants<<>>(d_halo_masses, d_y_arr, x_min, x_width, n_halos, n_bin_y, hs_constants, HMF, d_states, d_halo_masses_out, star_rng_out, - sfr_rng_out, xray_rng_out, halo_coords_out, d_sum_check); + sfr_rng_out, xray_rng_out, halo_coords_out, d_sum_check, d_further_process); // Check kernel launch errors CALL_CUDA(cudaGetLastError()); - + CALL_CUDA(cudaDeviceSynchronize()); + // filtered halos + int n_filter_halo = filterWithMask(d_halo_masses, d_further_process, n_halos); + float *h_filter_halos; + CALL_CUDA(cudaHostAlloc((void **)&h_filter_halos, sizeof(float)*n_filter_halo, cudaHostAllocDefault)); + CALL_CUDA(cudaMemcpy(h_filter_halos, d_halo_masses, sizeof(float)*n_filter_halo, cudaMemcpyDeviceToHost)); + + // launch second kernel + + + // copy data from device to host int h_sum_check; CALL_CUDA(cudaMemcpy(&h_sum_check, d_sum_check, sizeof(int), cudaMemcpyDeviceToHost)); - // float *h_halo_masses_out; - // cudaHostAlloc((void **)&h_halo_masses_out, buffer_size, cudaHostAllocDefault); - // cudaMemcpy(&h_halo_masses_out, d_halo_masses_out, buffer_size, cudaMemcpyDeviceToHost); + float *h_halo_masses_out; + CALL_CUDA(cudaHostAlloc((void **)&h_halo_masses_out, buffer_size, cudaHostAllocDefault)); + CALL_CUDA(cudaMemcpy(h_halo_masses_out, d_halo_masses_out, buffer_size, cudaMemcpyDeviceToHost)); // Free device memory CALL_CUDA(cudaFree(d_halo_masses)); CALL_CUDA(cudaFree(d_y_arr)); CALL_CUDA(cudaFree(d_states)); + CALL_CUDA(cudaFree(d_halo_masses_out)); + CALL_CUDA(cudaFree(star_rng_out)); + CALL_CUDA(cudaFree(sfr_rng_out)); + CALL_CUDA(cudaFree(xray_rng_out)); + CALL_CUDA(cudaFree(halo_coords_out)); + CALL_CUDA(cudaFree(d_further_process)); + + validate_thrust(); + + condense_device_vector(); return 0; } From 4c6e5c1ae1051cb6d330c21b35a63558e27c8b35 Mon Sep 17 00:00:00 2001 From: JHu Date: Mon, 2 Dec 2024 08:44:53 +1100 Subject: [PATCH 045/145] add cuda specific header; add device function for interpolation --- src/py21cmfast/src/Stochasticity.cuh | 14 +++++++++ src/py21cmfast/src/interpolation.cu | 43 ++++++++++++++++++++++++++++ src/py21cmfast/src/interpolation.cuh | 14 +++++++++ 3 files changed, 71 insertions(+) create mode 100644 src/py21cmfast/src/Stochasticity.cuh create mode 100644 src/py21cmfast/src/interpolation.cu create mode 100644 src/py21cmfast/src/interpolation.cuh diff --git a/src/py21cmfast/src/Stochasticity.cuh b/src/py21cmfast/src/Stochasticity.cuh new file mode 100644 index 000000000..887aa384d --- /dev/null +++ b/src/py21cmfast/src/Stochasticity.cuh @@ -0,0 +1,14 @@ +#ifndef _STOCHASTICITY_CUH +#define _STOCHASTICITY_CUH + +#ifdef __cplusplus +extern "C" +{ +#endif + int updateHaloOut(float *halo_masses, unsigned long long int n_halos, float *y_arr, int n_bin_y, double x_min, double x_width, + struct HaloSamplingConstants hs_constants, unsigned long long int n_buffer); +#ifdef __cplusplus +} +#endif + +#endif \ No newline at end of file diff --git a/src/py21cmfast/src/interpolation.cu b/src/py21cmfast/src/interpolation.cu new file mode 100644 index 000000000..fe057373d --- /dev/null +++ b/src/py21cmfast/src/interpolation.cu @@ -0,0 +1,43 @@ +#include + +#include "interpolation.cuh" + +__device__ double EvaluateRGTable1D(double x, RGTable1D *table) +{ + double x_min = table->x_min; + double x_width = table->x_width; + int idx = (int)floor((x - x_min) / x_width); + double table_val = x_min + x_width * (double)idx; + double interp_point = (x - table_val) / x_width; + + // a + f(a-b) is one fewer operation but less precise + double result = table->y_arr[idx] * (1 - interp_point) + table->y_arr[idx + 1] * (interp_point); + + return result; +} + +__device__ double EvaluateRGTable2D(double x, double y, RGTable2D *table) +{ + double x_min = table->x_min; + double x_width = table->x_width; + double y_min = table->y_min; + double y_width = table->y_width; + int x_idx = (int)floor((x - x_min) / x_width); + int y_idx = (int)floor((y - y_min) / y_width); + + double x_table = x_min + x_width * (double)x_idx; + double y_table = y_min + y_width * (double)y_idx; + + double interp_point_x = (x - x_table) / x_width; + double interp_point_y = (y - y_table) / y_width; + + double left_edge, right_edge, result; + + left_edge = table->z_arr[x_idx][y_idx] * (1 - interp_point_y) + table->z_arr[x_idx][y_idx + 1] * (interp_point_y); + right_edge = table->z_arr[x_idx + 1][y_idx] * (1 - interp_point_y) + table->z_arr[x_idx + 1][y_idx + 1] * (interp_point_y); + + result = left_edge * (1 - interp_point_x) + right_edge * (interp_point_x); + + return result; +} + diff --git a/src/py21cmfast/src/interpolation.cuh b/src/py21cmfast/src/interpolation.cuh new file mode 100644 index 000000000..1206ef8a1 --- /dev/null +++ b/src/py21cmfast/src/interpolation.cuh @@ -0,0 +1,14 @@ +#ifndef _INTERPOLATION_CUH +#define _INTERPOLATION_CUH + +#include +#include "interpolation_types.h" + +#ifdef __CUDA_ARCH__ + +__device__ double EvaluateRGTable1D(double x, RGTable1D *table); +__device__ double EvaluateRGTable2D(double x, double y, RGTable2D *table); + +#endif + +#endif \ No newline at end of file From e145625489d976b5d3a4abd5b92f63f5f7d89220 Mon Sep 17 00:00:00 2001 From: JHu Date: Mon, 2 Dec 2024 08:47:46 +1100 Subject: [PATCH 046/145] add functions to access interp tables outside the file --- src/py21cmfast/src/interp_tables.c | 26 ++++++++++++++++++++++++++ src/py21cmfast/src/interp_tables.h | 6 ++++++ 2 files changed, 32 insertions(+) diff --git a/src/py21cmfast/src/interp_tables.c b/src/py21cmfast/src/interp_tables.c index aaebab267..8a3940c01 100644 --- a/src/py21cmfast/src/interp_tables.c +++ b/src/py21cmfast/src/interp_tables.c @@ -925,6 +925,7 @@ double EvaluateMcoll(double condition, double growthf, double lnMmin, double lnM double extrapolate_dNdM_inverse(double condition, double lnp){ double x_min = Nhalo_inv_table.x_min; double x_width = Nhalo_inv_table.x_width; + // printf("condition: %f; lnp: %f \n", condition, lnp); //tmp int x_idx = (int)floor((condition - x_min)/x_width); double x_table = x_min + x_idx*x_width; double interp_point_x = (condition - x_table)/x_width; @@ -1046,3 +1047,28 @@ double EvaluatedSigmasqdm(double lnM){ } return dsigmasqdm_z0(exp(lnM)); } + +// todo: only return when it's been initialized +RGTable1D *GetNhaloTable() +{ + printf("The number of bins: %d; x_min: %f\n", Nhalo_table.n_bin, Nhalo_table.x_min); + return &Nhalo_table; +} + +RGTable1D *GetMcollTable() +{ + printf("The number of bins: %d; x_min: %f\n", Mcoll_table.n_bin, Mcoll_table.x_min); + return &Mcoll_table; +} + +RGTable2D *GetNhaloInvTable() +{ + printf("The number of nx bins: %d; the number of ny bins: %d \n", Nhalo_inv_table.nx_bin, Nhalo_inv_table.ny_bin); + return &Nhalo_inv_table; +} + +RGTable1D_f *GetSigmaInterpTable() +{ + printf("The number of bins: %d; x_min: %f\n", Sigma_InterpTable.n_bin, Sigma_InterpTable.x_min); + return &Sigma_InterpTable; +} \ No newline at end of file diff --git a/src/py21cmfast/src/interp_tables.h b/src/py21cmfast/src/interp_tables.h index 87e8fc4c6..1a3639446 100644 --- a/src/py21cmfast/src/interp_tables.h +++ b/src/py21cmfast/src/interp_tables.h @@ -2,6 +2,7 @@ #define _INTERP_TABLES_H #include "InputParameters.h" +#include "interpolation.h" //Functions within interp_tables.c need the parameter structures, but we don't want to pass them all down the chain, so we broadcast them //TODO: in future it would be better to use a context struct. See `HaloBox.c` @@ -62,4 +63,9 @@ void free_conditional_tables(); void free_global_tables(); void free_dNdM_tables(); +RGTable1D *GetNhaloTable(); +RGTable1D *GetMcollTable(); +RGTable2D *GetNhaloInvTable(); +RGTable1D_f *GetSigmaInterpTable(); + #endif From ea41f8ff7bef4bdfb38113f98b0094f714874fa3 Mon Sep 17 00:00:00 2001 From: JHu Date: Mon, 2 Dec 2024 08:48:37 +1100 Subject: [PATCH 047/145] separate type def from function prototypes (tmp solution) --- src/py21cmfast/src/interpolation.h | 75 +++++++++--------------------- 1 file changed, 22 insertions(+), 53 deletions(-) diff --git a/src/py21cmfast/src/interpolation.h b/src/py21cmfast/src/interpolation.h index 2fc9315ac..a1ae554fd 100644 --- a/src/py21cmfast/src/interpolation.h +++ b/src/py21cmfast/src/interpolation.h @@ -3,59 +3,28 @@ #include -typedef struct RGTable1D{ - int n_bin; - double x_min; - double x_width; +#include "interpolation_types.h" - double *y_arr; - bool allocated; -}RGTable1D; - -typedef struct RGTable2D{ - int nx_bin, ny_bin; - double x_min, y_min; - double x_width, y_width; - - double **z_arr; - - double saved_ll, saved_ul; //for future acceleration - bool allocated; -}RGTable2D; - -typedef struct RGTable1D_f{ - int n_bin; - double x_min; - double x_width; - - float *y_arr; - bool allocated; -}RGTable1D_f; - -typedef struct RGTable2D_f{ - int nx_bin, ny_bin; - double x_min, y_min; - double x_width, y_width; - - float **z_arr; - - double saved_ll, saved_ul; //for future acceleration - bool allocated; -}RGTable2D_f; - -void allocate_RGTable1D(int n_bin, RGTable1D * ptr); -void allocate_RGTable1D_f(int n_bin, RGTable1D_f * ptr); -void allocate_RGTable2D(int n_x, int n_y, RGTable2D * ptr); -void allocate_RGTable2D_f(int n_x, int n_y, RGTable2D_f * ptr); - -void free_RGTable1D(RGTable1D * ptr); -void free_RGTable1D_f(RGTable1D_f * ptr); -void free_RGTable2D(RGTable2D * ptr); -void free_RGTable2D_f(RGTable2D_f * ptr); - -double EvaluateRGTable1D(double x, RGTable1D *table); -double EvaluateRGTable2D(double x, double y, RGTable2D *table); -double EvaluateRGTable1D_f(double x, RGTable1D_f *table); -double EvaluateRGTable2D_f(double x, double y, RGTable2D_f *table); +#ifdef __cplusplus +extern "C" +{ +#endif + void allocate_RGTable1D(int n_bin, RGTable1D *ptr); + void allocate_RGTable1D_f(int n_bin, RGTable1D_f *ptr); + void allocate_RGTable2D(int n_x, int n_y, RGTable2D *ptr); + void allocate_RGTable2D_f(int n_x, int n_y, RGTable2D_f *ptr); + + void free_RGTable1D(RGTable1D *ptr); + void free_RGTable1D_f(RGTable1D_f *ptr); + void free_RGTable2D(RGTable2D *ptr); + void free_RGTable2D_f(RGTable2D_f *ptr); + + double EvaluateRGTable1D(double x, RGTable1D *table); + double EvaluateRGTable2D(double x, double y, RGTable2D *table); + double EvaluateRGTable1D_f(double x, RGTable1D_f *table); + double EvaluateRGTable2D_f(double x, double y, RGTable2D_f *table); +#ifdef __cplusplus +} +#endif #endif From 0d31ac7394e6d34137c94b6b03a4c815cae999fd Mon Sep 17 00:00:00 2001 From: JHu Date: Mon, 2 Dec 2024 13:05:29 +1100 Subject: [PATCH 048/145] update sample progenitor kernel function with sparsity condition --- src/py21cmfast/src/Stochasticity.cu | 169 ++++++++++++---------------- 1 file changed, 74 insertions(+), 95 deletions(-) diff --git a/src/py21cmfast/src/Stochasticity.cu b/src/py21cmfast/src/Stochasticity.cu index 6e39d7d99..6bbd3d44f 100644 --- a/src/py21cmfast/src/Stochasticity.cu +++ b/src/py21cmfast/src/Stochasticity.cu @@ -232,7 +232,7 @@ __device__ void fix_mass_sample(curandState *state, double exp_M, int *n_halo_pt } } -__device__ int stoc_mass_sample(struct HaloSamplingConstants *hs_constants, curandState *state, int *n_halo_out, float *M_out, int *further_process){ +__device__ int stoc_mass_sample(struct HaloSamplingConstants *hs_constants, curandState *state, float *M_out){ double exp_M = hs_constants->expected_M; // The mass-limited sampling as-is has a slight bias to producing too many halos, @@ -241,16 +241,16 @@ __device__ int stoc_mass_sample(struct HaloSamplingConstants *hs_constants, cura // exp_M *= user_params_global->HALOMASS_CORRECTION; exp_M *= d_user_params.HALOMASS_CORRECTION; - int n_halo_sampled = 0; - double M_prog = 0; - double M_sample; + // int n_halo_sampled = 0; + // double M_prog = 0; + // double M_sample; double tbl_arg = hs_constants->cond_val; // tmp (start) - M_sample = sample_dndM_inverse(tbl_arg, hs_constants, state); + double M_sample = sample_dndM_inverse(tbl_arg, hs_constants, state); - M_prog += M_sample; + // M_prog += M_sample; // tmp (end) // while (M_prog < exp_M){ @@ -263,16 +263,16 @@ __device__ int stoc_mass_sample(struct HaloSamplingConstants *hs_constants, cura // The above sample is above the expected mass, by up to 100%. I wish to make the average mass equal to exp_M // fix_mass_sample(state, exp_M, &n_halo_sampled, &M_prog, M_out); - *n_halo_out = n_halo_sampled; - if (M_prog < exp_M){ - *further_process = 1; - return 1; - } + // *n_halo_out = n_halo_sampled; + // if (M_prog < exp_M){ + // *further_process = 1; + // return 1; + // } *M_out = M_sample; return 0; } -__device__ int stoc_sample(struct HaloSamplingConstants *hs_constants, curandState *state, int *n_halo_out, float *M_out, int *further_process){ +__device__ int stoc_sample(struct HaloSamplingConstants *hs_constants, curandState *state, float *M_out){ // TODO: really examine the case for number/mass sampling // The poisson sample fails spectacularly for high delta (from_catalogs or dense cells) // and excludes the correlation between number and mass (e.g many small halos or few large ones) @@ -286,12 +286,12 @@ __device__ int stoc_sample(struct HaloSamplingConstants *hs_constants, curandSta // NOTE: some of these conditions are redundant with set_consts_cond() if (hs_constants->delta <= DELTA_MIN || hs_constants->expected_M < d_user_params.SAMPLER_MIN_MASS) { - *n_halo_out = 0; + // *n_halo_out = 0; return 0; } // if delta is above critical, form one big halo if (hs_constants->delta >= MAX_DELTAC_FRAC * get_delta_crit(d_user_params.HMF, hs_constants->sigma_cond, hs_constants->growth_out)){ - *n_halo_out = 1; + // *n_halo_out = 1; // Expected mass takes into account potential dexm overlap M_out[0] = hs_constants->expected_M; @@ -307,7 +307,7 @@ __device__ int stoc_sample(struct HaloSamplingConstants *hs_constants, curandSta } else if (d_user_params.SAMPLE_METHOD == 0) { - err = stoc_mass_sample(hs_constants, state, n_halo_out, M_out, further_process); + err = stoc_mass_sample(hs_constants, state, M_out); } else if (d_user_params.SAMPLE_METHOD == 2) { @@ -327,13 +327,13 @@ __device__ int stoc_sample(struct HaloSamplingConstants *hs_constants, curandSta // LOG_ERROR("Invalid sampling method"); // Throw(ValueError); } - if (*n_halo_out > MAX_HALO_CELL) - { - printf("too many halos in conditin, buffer overflow"); - // todo: check how to throw error in cuda - // LOG_ERROR("too many halos in condition, buffer overflow"); - // Throw(ValueError); - } + // if (*n_halo_out > MAX_HALO_CELL) + // { + // printf("too many halos in conditin, buffer overflow\n"); + // // todo: check how to throw error in cuda + // // LOG_ERROR("too many halos in condition, buffer overflow"); + // // Throw(ValueError); + // } return err; } @@ -397,13 +397,13 @@ __global__ void setup_random_states(curandState *d_states, unsigned long long in __global__ void update_halo_constants(float *d_halo_masses, float *d_y_arr, double x_min, double x_width, unsigned long long int n_halos, int n_bin, struct HaloSamplingConstants d_hs_constants, - int HMF, curandState *d_states, + int HMF, curandState *d_states, float *d_halo_masses_out, float *star_rng_out, float *sfr_rng_out, float *xray_rng_out, float *halo_coords_out, int *d_sum_check, - int *further_process) + int *d_further_process, int sparsity, unsigned long long int write_offset) { // Define shared memory for block-level reduction - __shared__ int shared_check[256]; + __shared__ float shared_mass[256]; // get thread idx int tid = threadIdx.x; @@ -413,9 +413,14 @@ __global__ void update_halo_constants(float *d_halo_masses, float *d_y_arr, doub return; } - float M = d_halo_masses[ind]; + // determine which halo mass to access + int hid = ind / sparsity; + float M = d_halo_masses[hid]; - int n_prog; // the value will be updated after calling stoc_sample + // idx of d_halo_masses_out + int out_id = write_offset + ind; + + // int n_prog = 0; // the value will be updated after calling stoc_sample // set condition-dependent variables for sampling stoc_set_consts_cond(&d_hs_constants, M, HMF, x_min, x_width, d_y_arr, n_bin); @@ -442,27 +447,42 @@ __global__ void update_halo_constants(float *d_halo_masses, float *d_y_arr, doub // double tmp2 = 681273355217.0; // float tmp3 = 101976856.0; // remove_random_halo(&local_state, 59, &tmp1, &tmp2, &tmp3); - int check = stoc_sample(&d_hs_constants, &local_state, &n_prog, &d_halo_masses_out[ind], &further_process[ind]); + stoc_sample(&d_hs_constants, &local_state, &shared_mass[tid]); d_states[ind] = local_state; - shared_check[tid] = check; __syncthreads(); - // Perform reduction within the block - for (int stride = blockDim.x / 2; stride > 0; stride /= 2) - { - if (tid < stride) - { - shared_check[tid] += shared_check[tid + stride]; + // passing value to arrays in global memory is done by one thread per group + if (tid % sparsity == 0){ + float Mprog = 0.0; + for (int i = 0; i < sparsity; ++i){ + if (Mprog >= d_hs_constants.expected_M) + { + break; + } + Mprog += shared_mass[tid+i]; + d_halo_masses_out[out_id+i] = shared_mass[tid+i]; + } + if (Mprog < d_hs_constants.expected_M){ + d_further_process[hid] = 1; } - __syncthreads(); // Ensure all threads have completed each stage of reduction } + // Perform reduction within the block + // for (int stride = blockDim.x / 2; stride > 0; stride /= 2) + // { + // if (tid < stride) + // { + // shared_check[tid] += shared_check[tid + stride]; + // } + // __syncthreads(); // Ensure all threads have completed each stage of reduction + // } + // Write the result from each block to the global sum - if (tid == 0) - { - atomicAdd(d_sum_check, shared_check[0]); - } + // if (tid == 0) + // { + // atomicAdd(d_sum_check, shared_check[0]); + // } // Sample the CMF set by the descendant // stoc_sample(&hs_constants, &local_state, &n_prog, prog_buf); @@ -474,58 +494,6 @@ __global__ void update_halo_constants(float *d_halo_masses, float *d_y_arr, doub return; } -__global__ void update_halo_constants_multi(float *d_halo_masses, float *d_y_arr, double x_min, double x_width, - unsigned long long int n_halos, int n_bin, struct HaloSamplingConstants d_hs_constants, - int HMF, curandState *d_states, - float *d_halo_masses_out, float *star_rng_out, - float *sfr_rng_out, float *xray_rng_out, float *halo_coords_out, int *d_sum_check, - int *further_process) -{ - // Define shared memory for block-level reduction - __shared__ int shared_check[256]; - - // get thread idx - int tid = threadIdx.x; - int ind = blockIdx.x * blockDim.x + threadIdx.x; - if (ind >= n_halos) - { - return; - } - - float M = d_halo_masses[ind]; - - int n_prog; // the value will be updated after calling stoc_sample - - // set condition-dependent variables for sampling - stoc_set_consts_cond(&d_hs_constants, M, HMF, x_min, x_width, d_y_arr, n_bin); - - // todo: each thread across different blocks has unique random state - curandState local_state = d_states[ind]; - int check = stoc_sample(&d_hs_constants, &local_state, &n_prog, &d_halo_masses_out[ind], &further_process[ind]); - d_states[ind] = local_state; - - shared_check[tid] = check; - __syncthreads(); - - // Perform reduction within the block - for (int stride = blockDim.x / 2; stride > 0; stride /= 2) - { - if (tid < stride) - { - shared_check[tid] += shared_check[tid + stride]; - } - __syncthreads(); // Ensure all threads have completed each stage of reduction - } - - // Write the result from each block to the global sum - if (tid == 0) - { - atomicAdd(d_sum_check, shared_check[0]); - } - - return; -} - // function to launch kernel grids int updateHaloOut(float *halo_masses, unsigned long long int n_halos, float *y_arr, int n_bin_y, double x_min, double x_width, struct HaloSamplingConstants hs_constants, unsigned long long int n_buffer) @@ -585,26 +553,37 @@ int updateHaloOut(float *halo_masses, unsigned long long int n_halos, float *y_a // Check kernel launch errors CALL_CUDA(cudaGetLastError()); + + // start with one thread work with one halo + int sparsity = 1; + + // initiate n_halo check + unsigned long long int n_halo_check = n_halos; + + // initiate offset for writing output data + unsigned long long int write_offset = 0; // launch kernel grid update_halo_constants<<>>(d_halo_masses, d_y_arr, x_min, x_width, n_halos, n_bin_y, hs_constants, HMF, d_states, d_halo_masses_out, star_rng_out, - sfr_rng_out, xray_rng_out, halo_coords_out, d_sum_check, d_further_process); + sfr_rng_out, xray_rng_out, halo_coords_out, d_sum_check, d_further_process, sparsity, write_offset); // Check kernel launch errors CALL_CUDA(cudaGetLastError()); CALL_CUDA(cudaDeviceSynchronize()); - // filtered halos + // filter device halo masses in-place int n_filter_halo = filterWithMask(d_halo_masses, d_further_process, n_halos); + + // tmp: the following is just needed for debugging purpose float *h_filter_halos; CALL_CUDA(cudaHostAlloc((void **)&h_filter_halos, sizeof(float)*n_filter_halo, cudaHostAllocDefault)); CALL_CUDA(cudaMemcpy(h_filter_halos, d_halo_masses, sizeof(float)*n_filter_halo, cudaMemcpyDeviceToHost)); - // launch second kernel + // copy data from device to host int h_sum_check; CALL_CUDA(cudaMemcpy(&h_sum_check, d_sum_check, sizeof(int), cudaMemcpyDeviceToHost)); From 1f8807bfd3548568c5ca6580dfbde3a4b146dec9 Mon Sep 17 00:00:00 2001 From: JHu Date: Mon, 2 Dec 2024 14:15:28 +1100 Subject: [PATCH 049/145] add a basic while loop to launch kernels iteratively --- src/py21cmfast/src/Stochasticity.cu | 58 +++++++++++++++++++++-------- 1 file changed, 42 insertions(+), 16 deletions(-) diff --git a/src/py21cmfast/src/Stochasticity.cu b/src/py21cmfast/src/Stochasticity.cu index 6bbd3d44f..50d0b0b3f 100644 --- a/src/py21cmfast/src/Stochasticity.cu +++ b/src/py21cmfast/src/Stochasticity.cu @@ -127,6 +127,13 @@ int filterWithMask(float *d_data, int *d_mask, int original_size) return valid_size; } +// decide the number of sparsity +int getSparsity(int n_buffer, int n_halo){ + int power = floor(log2(n_buffer / n_halo)); + int sparsity = 1 << power; + return sparsity; +} + // 11-30: the following implementation works (before using any global params on gpu) __device__ void stoc_set_consts_cond(struct HaloSamplingConstants *const_struct, float cond_val, int HMF, double x_min, double x_width, float *d_y_arr, int n_bin) { @@ -534,7 +541,8 @@ int updateHaloOut(float *halo_masses, unsigned long long int n_halos, float *y_a // allocate memory to store list of halo index need further process int *d_further_process; - CALL_CUDA(cudaMalloc(&d_further_process, size_halo)); + CALL_CUDA(cudaMalloc(&d_further_process, sizeof(int)*n_halos)); + CALL_CUDA(cudaMemset(d_further_process, 0, sizeof(int)*n_halos)); // get parameters needed by the kernel int HMF = user_params_global->HMF; @@ -563,35 +571,53 @@ int updateHaloOut(float *halo_masses, unsigned long long int n_halos, float *y_a // initiate offset for writing output data unsigned long long int write_offset = 0; + // initialize n filter halo + unsigned long long int n_filter_halo = n_halos; + // launch kernel grid - update_halo_constants<<>>(d_halo_masses, d_y_arr, x_min, x_width, n_halos, n_bin_y, hs_constants, HMF, d_states, d_halo_masses_out, star_rng_out, - sfr_rng_out, xray_rng_out, halo_coords_out, d_sum_check, d_further_process, sparsity, write_offset); + while (n_filter_halo > 0){ + update_halo_constants<<>>(d_halo_masses, d_y_arr, x_min, x_width, n_halos, n_bin_y, hs_constants, HMF, d_states, d_halo_masses_out, star_rng_out, + sfr_rng_out, xray_rng_out, halo_coords_out, d_sum_check, d_further_process, sparsity, write_offset); - // Check kernel launch errors - CALL_CUDA(cudaGetLastError()); + // Check kernel launch errors + CALL_CUDA(cudaGetLastError()); - CALL_CUDA(cudaDeviceSynchronize()); + CALL_CUDA(cudaDeviceSynchronize()); - // filter device halo masses in-place - int n_filter_halo = filterWithMask(d_halo_masses, d_further_process, n_halos); + // filter device halo masses in-place + n_filter_halo = filterWithMask(d_halo_masses, d_further_process, n_halos); - // tmp: the following is just needed for debugging purpose - float *h_filter_halos; - CALL_CUDA(cudaHostAlloc((void **)&h_filter_halos, sizeof(float)*n_filter_halo, cudaHostAllocDefault)); - CALL_CUDA(cudaMemcpy(h_filter_halos, d_halo_masses, sizeof(float)*n_filter_halo, cudaMemcpyDeviceToHost)); + // tmp: the following is just needed for debugging purpose + float *h_filter_halos; + CALL_CUDA(cudaHostAlloc((void **)&h_filter_halos, sizeof(float)*n_filter_halo, cudaHostAllocDefault)); + CALL_CUDA(cudaMemcpy(h_filter_halos, d_halo_masses, sizeof(float)*n_filter_halo, cudaMemcpyDeviceToHost)); - + + // update sparsity value + sparsity = getSparsity(n_halos, n_filter_halo); + // update write offset + write_offset += total_threads; + // reset mask array + CALL_CUDA(cudaMemset(d_further_process, 0, sizeof(int) * n_halos)); + // copy data from device to host + int h_sum_check; + CALL_CUDA(cudaMemcpy(&h_sum_check, d_sum_check, sizeof(int), cudaMemcpyDeviceToHost)); - // copy data from device to host - int h_sum_check; - CALL_CUDA(cudaMemcpy(&h_sum_check, d_sum_check, sizeof(int), cudaMemcpyDeviceToHost)); + // tmp: for debug only + CALL_CUDA(cudaFreeHost(h_filter_halos)); + // CALL_CUDA(cudaFreeHost(h_sum_check)); + + } float *h_halo_masses_out; CALL_CUDA(cudaHostAlloc((void **)&h_halo_masses_out, buffer_size, cudaHostAllocDefault)); CALL_CUDA(cudaMemcpy(h_halo_masses_out, d_halo_masses_out, buffer_size, cudaMemcpyDeviceToHost)); + + + // Free device memory CALL_CUDA(cudaFree(d_halo_masses)); CALL_CUDA(cudaFree(d_y_arr)); From df8d7aa1d27f05bb7d381b7f09aa8fdc8aa568f1 Mon Sep 17 00:00:00 2001 From: alserene Date: Thu, 5 Dec 2024 06:50:36 +1100 Subject: [PATCH 050/145] Remove original hello-world cuda test code. --- build_cffi.py | 5 +---- src/py21cmfast/src/InitialConditions.c | 4 ---- src/py21cmfast/src/hello_world.cu | 11 ----------- src/py21cmfast/src/hello_world.h | 13 ------------- 4 files changed, 1 insertion(+), 32 deletions(-) delete mode 100644 src/py21cmfast/src/hello_world.cu delete mode 100644 src/py21cmfast/src/hello_world.h diff --git a/build_cffi.py b/build_cffi.py index dff686551..2b5667805 100755 --- a/build_cffi.py +++ b/build_cffi.py @@ -27,15 +27,12 @@ if f.endswith(".c") ] -# Compiled cuda code +# Compiled CUDA code extra_objects = [ - os.path.join(CLOC, "hello_world.o"), os.path.join(CLOC, "filtering.o"), os.path.join(CLOC, "PerturbField.o"), os.path.join(CLOC, "SpinTemperatureBox.o"), ] -# extra_link_args = ["-lcudart"] -# extra_link_args = ["-lcudart", "-no-undefined"] extra_link_args = ["-lcudart", "-lstdc++"] # Set the C-code logging level. diff --git a/src/py21cmfast/src/InitialConditions.c b/src/py21cmfast/src/InitialConditions.c index 1e437e806..fab923e3d 100644 --- a/src/py21cmfast/src/InitialConditions.c +++ b/src/py21cmfast/src/InitialConditions.c @@ -19,7 +19,6 @@ #include "indexing.h" #include "dft.h" #include "filtering.h" -#include "hello_world.h" #include "rng.h" #include "InitialConditions.h" @@ -89,9 +88,6 @@ int ComputeInitialConditions( int status; - printf("Begin computing initial conditions"); - call_cuda(); - Try{ // This Try wraps the entire function so we don't indent. // Makes the parameter structs visible to a variety of functions/macros diff --git a/src/py21cmfast/src/hello_world.cu b/src/py21cmfast/src/hello_world.cu deleted file mode 100644 index 0d9e4a0fa..000000000 --- a/src/py21cmfast/src/hello_world.cu +++ /dev/null @@ -1,11 +0,0 @@ -#include - -__global__ void hello_kernel() { - printf("Hello World from GPU! BlockIdx: %d, ThreadIdx: %d\n", blockIdx.x, threadIdx.x); -} - -int call_cuda() { - hello_kernel<<<3, 3>>>(); - cudaDeviceSynchronize(); - return 0; -} \ No newline at end of file diff --git a/src/py21cmfast/src/hello_world.h b/src/py21cmfast/src/hello_world.h deleted file mode 100644 index a4a096113..000000000 --- a/src/py21cmfast/src/hello_world.h +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef _HELLO_WORLD_H -#define _HELLO_WORLD_H - -#ifdef __cplusplus -extern "C" { -#endif - -int call_cuda(); - -#ifdef __cplusplus -} -#endif -#endif // HELLO_WORLD_H \ No newline at end of file From b25bfc2a855ef1ac97af0bc94c0ff8144a71d777 Mon Sep 17 00:00:00 2001 From: alserene Date: Thu, 5 Dec 2024 06:52:51 +1100 Subject: [PATCH 051/145] Add bound check for thread ids to fix small memory leak. --- src/py21cmfast/src/filtering.cu | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/py21cmfast/src/filtering.cu b/src/py21cmfast/src/filtering.cu index 6383b5edf..9ac1ce448 100644 --- a/src/py21cmfast/src/filtering.cu +++ b/src/py21cmfast/src/filtering.cu @@ -73,14 +73,15 @@ __device__ inline double spherical_shell_filter(double k, double R_outer, double - sin(kR_inner) + cos(kR_inner) * kR_inner); } -__global__ void filter_box_kernel(cuFloatComplex *box, size_t size, int dimension, int midpoint, int midpoint_para, double delta_k, float R, float R_param, double R_const, int filter_type) { +__global__ void filter_box_kernel(cuFloatComplex *box, int num_pixels, int dimension, int midpoint, int midpoint_para, double delta_k, float R, float R_param, double R_const, int filter_type) { // Get index of box (flattened k-box) unsigned long long idx = blockIdx.x * blockDim.x + threadIdx.x; - // TODO: Do we need a bound check? (in case number of threads != multiple of block size) - // while (idx < size) { - + // Bound check (in case number of threads != multiple of block size) + if (idx >= num_pixels) { + return; + } // Compute the 3D indices (n_x, n_y, n_z) for the k-box from the flattened index (idx) // Based on convenience macros in indexing.h int n_z = idx % (midpoint_para + 1); @@ -174,7 +175,7 @@ void filter_box_gpu(fftwf_complex *box, int RES, int filter_type, float R, float int threadsPerBlock = 256; int numBlocks = (num_pixels + threadsPerBlock - 1) / threadsPerBlock; // d_box must be cast to cuFloatComplex (from fftwf_complex) for CUDA - filter_box_kernel<<>>(reinterpret_cast(d_box), size, dimension, midpoint, midpoint_para, delta_k, R, R_param, R_const, filter_type); + filter_box_kernel<<>>(reinterpret_cast(d_box), num_pixels, dimension, midpoint, midpoint_para, delta_k, R, R_param, R_const, filter_type); cudaError_t err; From 5ef38e979abb2d18b04230275b6ae6e8f7a01c74 Mon Sep 17 00:00:00 2001 From: alserene Date: Thu, 5 Dec 2024 07:00:54 +1100 Subject: [PATCH 052/145] Correct numBlocks calculation; add extra bound check in reduction kernel. --- src/py21cmfast/src/SpinTemperatureBox.cu | 43 ++++++++++++++---------- 1 file changed, 25 insertions(+), 18 deletions(-) diff --git a/src/py21cmfast/src/SpinTemperatureBox.cu b/src/py21cmfast/src/SpinTemperatureBox.cu index 201becdc4..8cc778c6d 100644 --- a/src/py21cmfast/src/SpinTemperatureBox.cu +++ b/src/py21cmfast/src/SpinTemperatureBox.cu @@ -49,14 +49,17 @@ template __device__ void warp_reduce(volatile double *sdata, unsigned int tid) { // Reduce by half // No syncing required with threads < 32 - if (threadsPerBlock >= 64) sdata[tid] += sdata[tid + 32]; - if (threadsPerBlock >= 32) sdata[tid] += sdata[tid + 16]; - if (threadsPerBlock >= 16) sdata[tid] += sdata[tid + 8]; - if (threadsPerBlock >= 8) sdata[tid] += sdata[tid + 4]; - if (threadsPerBlock >= 4) sdata[tid] += sdata[tid + 2]; - if (threadsPerBlock >= 2) sdata[tid] += sdata[tid + 1]; + if (threadsPerBlock >= 64) { sdata[tid] += sdata[tid + 32]; } + if (threadsPerBlock >= 32) { sdata[tid] += sdata[tid + 16]; } + if (threadsPerBlock >= 16) { sdata[tid] += sdata[tid + 8]; } + if (threadsPerBlock >= 8) { sdata[tid] += sdata[tid + 4]; } + if (threadsPerBlock >= 4) { sdata[tid] += sdata[tid + 2]; } + if (threadsPerBlock >= 2) { sdata[tid] += sdata[tid + 1]; } } +// As seen in talk by Mark Harris, NVIDIA. +// https://developer.download.nvidia.com/assets/cuda/files/reduction.pdf +// https://www.youtube.com/watch?v=NrWhZMHrP4w template __global__ void compute_and_reduce( double x_min, // reference @@ -89,18 +92,23 @@ __global__ void compute_and_reduce( while (i < num_pixels) { // Compute current density from density grid value * redshift-scaled growth factor curr_dens_i = dens_R_grid[i] * zpp_growth_R_ct; - curr_dens_j = dens_R_grid[i + threadsPerBlock] * zpp_growth_R_ct; // Compute fraction of mass that has collapsed to form stars/other structures fcoll_i = exp(EvaluateRGTable1D_f_gpu(curr_dens_i, x_min, x_width, y_arr)); - fcoll_j = exp(EvaluateRGTable1D_f_gpu(curr_dens_j, x_min, x_width, y_arr)); // Update the shared buffer with the collapse fractions - sdata[tid] += fcoll_i + fcoll_j; + sdata[tid] += fcoll_i; // Update the relevant cells in the star formation rate density grid sfrd_grid[i] = (1. + curr_dens_i) * fcoll_i; - sfrd_grid[i + threadsPerBlock] = (1. + curr_dens_j) * fcoll_j; + + // Repeat for i + threadsPerBlock + if ((i + threadsPerBlock) < num_pixels) { + curr_dens_j = dens_R_grid[i + threadsPerBlock] * zpp_growth_R_ct; + fcoll_j = exp(EvaluateRGTable1D_f_gpu(curr_dens_j, x_min, x_width, y_arr)); + sdata[tid] += fcoll_j; + sfrd_grid[i + threadsPerBlock] = (1. + curr_dens_j) * fcoll_j; + } i += gridSize; } @@ -112,10 +120,10 @@ __global__ void compute_and_reduce( if (threadsPerBlock >= 128) { if (tid < 64) { sdata[tid] += sdata[tid + 64]; } __syncthreads(); } // Final reduction by separate kernel - if (tid < 32) warp_reduce(sdata, tid); + if (tid < 32) { warp_reduce(sdata, tid); } // The first thread of each block updates the block totals - if (tid == 0) ave_sfrd_buf[blockIdx.x] = sdata[0]; + if (tid == 0) { ave_sfrd_buf[blockIdx.x] = sdata[0]; } } unsigned int init_sfrd_gpu_data( @@ -178,8 +186,8 @@ unsigned int init_sfrd_gpu_data( // Allocate memory for SFRD sum buffer and initialise to 0 only for initial filter step; // reuse memory for remaining filter steps. - unsigned int buffer_length = ceil(num_pixels / (threadsPerBlock * 2)); - err = cudaMalloc((void**)d_ave_sfrd_buf, sizeof(double) * buffer_length); // already pointer to a pointer (no & needed) ...91m & 256 -> 177979 + unsigned int numBlocks = ceil(num_pixels / (threadsPerBlock * 2)); + err = cudaMalloc((void**)d_ave_sfrd_buf, sizeof(double) * numBlocks); // already pointer to a pointer (no & needed) ...91m & 256 -> 177979 if (err != cudaSuccess) { LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); Throw(CUDAError); @@ -187,7 +195,7 @@ unsigned int init_sfrd_gpu_data( LOG_INFO("SFRD sum reduction buffer allocated on device."); // Initialise buffer to 0 (fill with byte=0) - err = cudaMemset(*d_ave_sfrd_buf, 0, sizeof(double) * buffer_length); // dereference the pointer to a pointer (*) + err = cudaMemset(*d_ave_sfrd_buf, 0, sizeof(double) * numBlocks); // dereference the pointer to a pointer (*) if (err != cudaSuccess) { LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); Throw(CUDAError); @@ -228,9 +236,8 @@ double calculate_sfrd_from_grid_gpu( } LOG_INFO("SFRD_conditional_table.y_arr and density grid copied to device."); - unsigned int numBlocks = (num_pixels + threadsPerBlock - 1) / threadsPerBlock; // 91m & 256 -> 355959 + unsigned int numBlocks = ceil(num_pixels / (threadsPerBlock * 2)); unsigned int smemSize = threadsPerBlock * sizeof(double); // shared memory - unsigned int buffer_length = ceil(num_pixels / (threadsPerBlock * 2)); // Invoke kernel switch (threadsPerBlock) { @@ -270,7 +277,7 @@ double calculate_sfrd_from_grid_gpu( // Wrap device pointer in a thrust::device_ptr thrust::device_ptr d_ave_sfrd_buf_ptr(d_ave_sfrd_buf); // Reduce final buffer sums to one value - double ave_sfrd_buf = thrust::reduce(d_ave_sfrd_buf_ptr, d_ave_sfrd_buf_ptr + buffer_length, 0., thrust::plus()); + double ave_sfrd_buf = thrust::reduce(d_ave_sfrd_buf_ptr, d_ave_sfrd_buf_ptr + numBlocks, 0., thrust::plus()); LOG_INFO("SFRD sum reduced to single value by thrust::reduce operation."); // Copy results from device to host From 49e25539dac0c78990a7fa8e734c60bf27b308df Mon Sep 17 00:00:00 2001 From: JHu Date: Thu, 5 Dec 2024 11:44:42 +1100 Subject: [PATCH 053/145] get device properties; condense array with input mask value --- src/py21cmfast/src/Stochasticity.cu | 152 +++++++++++++++++++--------- 1 file changed, 103 insertions(+), 49 deletions(-) diff --git a/src/py21cmfast/src/Stochasticity.cu b/src/py21cmfast/src/Stochasticity.cu index 50d0b0b3f..a87118230 100644 --- a/src/py21cmfast/src/Stochasticity.cu +++ b/src/py21cmfast/src/Stochasticity.cu @@ -10,6 +10,7 @@ #include #include #include +#include #include #include "Constants.h" @@ -91,21 +92,22 @@ void condense_device_vector() std::cout << std::endl; } -int condenseDeviceArray(int *d_array, int original_size) +int condenseDeviceArray(float *d_array, int original_size, float mask_value) { // Wrap the raw device pointer into a thrust device pointer - thrust::device_ptr d_array_ptr(d_array); + thrust::device_ptr d_array_ptr(d_array); // Remove elements with value 0 // thrust::device_vector::iterator new_end = thrust::remove(d_array_ptr, d_array_ptr + original_size, 0); // thrust::device_ptr new_end = thrust::remove(d_array_ptr, d_array_ptr + original_size, 0); - auto new_end = thrust::remove(d_array_ptr, d_array_ptr + original_size, 0); + auto new_end = thrust::remove(d_array_ptr, d_array_ptr + original_size, mask_value); // Calculate the number of valid elements int valid_size = new_end - d_array_ptr; + thrust::fill(new_end, d_array_ptr + original_size, mask_value); // Print results (on host side) - std::cout << "Valid elements count: " << valid_size << "\n"; + // std::cout << "Valid elements count: " << valid_size << "\n"; return valid_size; } @@ -122,7 +124,7 @@ int filterWithMask(float *d_data, int *d_mask, int original_size) int valid_size = end - d_data_ptr; // Optionally, print the number of valid elements - std::cout << "Valid elements count: " << valid_size << "\n"; + // std::cout << "Valid elements count: " << valid_size << "\n"; return valid_size; } @@ -134,8 +136,29 @@ int getSparsity(int n_buffer, int n_halo){ return sparsity; } +// initialize device array with given value +void initializeArray(float *d_array, int n_elements, float value){ + thrust::device_ptr d_array_ptr(d_array); + thrust::fill(d_array_ptr, d_array_ptr + n_elements, value); +} +// more members of deviceprop can be found in cura_runtime_api documentation +void getDeviceProperties(){ + int device; + CALL_CUDA(cudaGetDevice(&device)); + cudaDeviceProp deviceProp; + CALL_CUDA(cudaGetDeviceProperties(&deviceProp, device)); + printf("name: %s\n", deviceProp.name); + // printf("uuid: %s\n", deviceProp.uuid); + printf("total global memory: %zu bytes \n", deviceProp.totalGlobalMem); + printf("Shared memory per block: %zu bytes\n", deviceProp.sharedMemPerBlock); + printf("registers per block: %d\n", deviceProp.regsPerBlock); + printf("warp size: %d \n", deviceProp.warpSize); + printf("memory pitch: %zu bytes \n", deviceProp.memPitch); + printf("max threads per block: %d \n", deviceProp.maxThreadsPerBlock); + printf("total constant memory: %zu bytes \n", deviceProp.totalConstMem); +} // 11-30: the following implementation works (before using any global params on gpu) -__device__ void stoc_set_consts_cond(struct HaloSamplingConstants *const_struct, float cond_val, int HMF, double x_min, double x_width, float *d_y_arr, int n_bin) +__device__ void stoc_set_consts_cond(struct HaloSamplingConstants *const_struct, float cond_val, int HMF, double x_min, double x_width, float *d_y_arr, int n_bin, double *expected_mass) { double m_exp, n_exp; // Here the condition is a mass, volume is the Lagrangian volume and delta_l is set by the @@ -177,6 +200,7 @@ __device__ void stoc_set_consts_cond(struct HaloSamplingConstants *const_struct, const_struct->expected_N = n_exp * const_struct->M_cond; const_struct->expected_M = m_exp * const_struct->M_cond; } + *expected_mass = const_struct->expected_M; return; } @@ -407,7 +431,7 @@ __global__ void update_halo_constants(float *d_halo_masses, float *d_y_arr, doub int HMF, curandState *d_states, float *d_halo_masses_out, float *star_rng_out, float *sfr_rng_out, float *xray_rng_out, float *halo_coords_out, int *d_sum_check, - int *d_further_process, int sparsity, unsigned long long int write_offset) + int *d_further_process, int *d_nprog_predict, int sparsity, unsigned long long int write_offset, double *expected_mass) { // Define shared memory for block-level reduction __shared__ float shared_mass[256]; @@ -430,7 +454,7 @@ __global__ void update_halo_constants(float *d_halo_masses, float *d_y_arr, doub // int n_prog = 0; // the value will be updated after calling stoc_sample // set condition-dependent variables for sampling - stoc_set_consts_cond(&d_hs_constants, M, HMF, x_min, x_width, d_y_arr, n_bin); + stoc_set_consts_cond(&d_hs_constants, M, HMF, x_min, x_width, d_y_arr, n_bin, &expected_mass[hid]); // tmp: just to verify the tables have been copied correctly if (ind == 0) @@ -472,6 +496,8 @@ __global__ void update_halo_constants(float *d_halo_masses, float *d_y_arr, doub } if (Mprog < d_hs_constants.expected_M){ d_further_process[hid] = 1; + d_nprog_predict[hid] = ceil(d_hs_constants.expected_M * sparsity / Mprog); + } } @@ -522,10 +548,39 @@ int updateHaloOut(float *halo_masses, unsigned long long int n_halos, float *y_a CALL_CUDA(cudaMalloc((void **)&d_sum_check, sizeof(int))); CALL_CUDA(cudaMemset(d_sum_check, 0, sizeof(int))); + // allocate memory to store list of halo index need further process + int *d_further_process; + CALL_CUDA(cudaMalloc(&d_further_process, sizeof(int)*n_halos)); + CALL_CUDA(cudaMemset(d_further_process, 0, sizeof(int)*n_halos)); + + // allocate memory to store estimated n_prog after the first kernel launch + int *d_nprog_predict; + CALL_CUDA(cudaMalloc(&d_nprog_predict, sizeof(int) * n_halos)); + CALL_CUDA(cudaMemset(d_nprog_predict, 0, sizeof(int) * n_halos)); + + // tmp: check expected_M + double *d_expected_mass, *h_expected_mass; + CALL_CUDA(cudaMalloc(&d_expected_mass, sizeof(double) * n_halos)); + CALL_CUDA(cudaMemset(d_expected_mass, 0, sizeof(double) * n_halos)); + CALL_CUDA(cudaHostAlloc((void **)&h_expected_mass, sizeof(double) * n_halos, cudaHostAllocDefault)); + + // get parameters needed by the kernel + int HMF = user_params_global->HMF; + + // define threads layout + int n_threads = 256; + int n_blocks = (int)((n_halos + 255) / 256); + int total_threads = n_threads * n_blocks; + // allocate memory for out halos - size_t buffer_size = sizeof(float) * n_buffer * 2; + + // size_t buffer_size = sizeof(float) * max(total_threads * 2, n_buffer) * 2; + size_t d_n_buffer = total_threads * 4 + 10; + size_t buffer_size = sizeof(float) * d_n_buffer; float *d_halo_masses_out; CALL_CUDA(cudaMalloc(&d_halo_masses_out, buffer_size)); + // CALL_CUDA(cudaMemset(d_halo_masses_out, 0, buffer_size)); + initializeArray(d_halo_masses_out, d_n_buffer, -1.0f); float *star_rng_out; CALL_CUDA(cudaMalloc(&star_rng_out, buffer_size)); @@ -539,19 +594,6 @@ int updateHaloOut(float *halo_masses, unsigned long long int n_halos, float *y_a float *halo_coords_out; CALL_CUDA(cudaMalloc(&halo_coords_out, buffer_size * 3)); - // allocate memory to store list of halo index need further process - int *d_further_process; - CALL_CUDA(cudaMalloc(&d_further_process, sizeof(int)*n_halos)); - CALL_CUDA(cudaMemset(d_further_process, 0, sizeof(int)*n_halos)); - - // get parameters needed by the kernel - int HMF = user_params_global->HMF; - - // define threads layout - int n_threads = 256; - int n_blocks = (int)((n_halos + 255) / 256); - int total_threads = n_threads * n_blocks; - // Allocate memory for RNG states curandState *d_states; CALL_CUDA(cudaMalloc((void **)&d_states, total_threads * sizeof(curandState))); @@ -563,7 +605,7 @@ int updateHaloOut(float *halo_masses, unsigned long long int n_halos, float *y_a CALL_CUDA(cudaGetLastError()); // start with one thread work with one halo - int sparsity = 1; + int sparsity = 4; // initiate n_halo check unsigned long long int n_halo_check = n_halos; @@ -574,42 +616,54 @@ int updateHaloOut(float *halo_masses, unsigned long long int n_halos, float *y_a // initialize n filter halo unsigned long long int n_filter_halo = n_halos; + getDeviceProperties(); + // launch kernel grid - while (n_filter_halo > 0){ - update_halo_constants<<>>(d_halo_masses, d_y_arr, x_min, x_width, n_halos, n_bin_y, hs_constants, HMF, d_states, d_halo_masses_out, star_rng_out, - sfr_rng_out, xray_rng_out, halo_coords_out, d_sum_check, d_further_process, sparsity, write_offset); + // while (n_filter_halo > 0){ + update_halo_constants<<>>(d_halo_masses, d_y_arr, x_min, x_width, n_halos, n_bin_y, hs_constants, HMF, d_states, d_halo_masses_out, star_rng_out, + sfr_rng_out, xray_rng_out, halo_coords_out, d_sum_check, d_further_process, d_nprog_predict, sparsity, write_offset, d_expected_mass); - // Check kernel launch errors - CALL_CUDA(cudaGetLastError()); + // Check kernel launch errors + CALL_CUDA(cudaGetLastError()); - CALL_CUDA(cudaDeviceSynchronize()); + CALL_CUDA(cudaDeviceSynchronize()); - // filter device halo masses in-place - n_filter_halo = filterWithMask(d_halo_masses, d_further_process, n_halos); + // filter device halo masses in-place + n_filter_halo = filterWithMask(d_halo_masses, d_further_process, n_halos); + printf("The number of halos for further processing: %d \n", n_filter_halo); - // tmp: the following is just needed for debugging purpose - float *h_filter_halos; - CALL_CUDA(cudaHostAlloc((void **)&h_filter_halos, sizeof(float)*n_filter_halo, cudaHostAllocDefault)); - CALL_CUDA(cudaMemcpy(h_filter_halos, d_halo_masses, sizeof(float)*n_filter_halo, cudaMemcpyDeviceToHost)); + // condense out halo mass array + unsigned long long int n_processed_prog = condenseDeviceArray(d_halo_masses_out, d_n_buffer, -1.0f); + printf("The number of progenitors written in out halo field so far: %d \n", n_processed_prog); - - // update sparsity value - sparsity = getSparsity(n_halos, n_filter_halo); - // update write offset - write_offset += total_threads; + // tmp: the following is just needed for debugging purpose + float *h_filter_halos; + CALL_CUDA(cudaHostAlloc((void **)&h_filter_halos, sizeof(float) * n_filter_halo, cudaHostAllocDefault)); + CALL_CUDA(cudaMemcpy(h_filter_halos, d_halo_masses, sizeof(float) * n_filter_halo, cudaMemcpyDeviceToHost)); - // reset mask array - CALL_CUDA(cudaMemset(d_further_process, 0, sizeof(int) * n_halos)); + int *h_nprog_predict; + CALL_CUDA(cudaHostAlloc((void **)&h_nprog_predict, sizeof(int) * n_halos, cudaHostAllocDefault)); + CALL_CUDA(cudaMemcpy(h_nprog_predict, d_nprog_predict, sizeof(int) * n_halos, cudaMemcpyDeviceToHost)); - // copy data from device to host - int h_sum_check; - CALL_CUDA(cudaMemcpy(&h_sum_check, d_sum_check, sizeof(int), cudaMemcpyDeviceToHost)); + // update sparsity value + unsigned long long int available_n_buffer = d_n_buffer - n_processed_prog; + sparsity = getSparsity(available_n_buffer, n_filter_halo); - // tmp: for debug only - CALL_CUDA(cudaFreeHost(h_filter_halos)); - // CALL_CUDA(cudaFreeHost(h_sum_check)); + // update write offset + write_offset = n_processed_prog; - } + // reset mask array + CALL_CUDA(cudaMemset(d_further_process, 0, sizeof(int) * n_halos)); + + // copy data from device to host + int h_sum_check; + CALL_CUDA(cudaMemcpy(&h_sum_check, d_sum_check, sizeof(int), cudaMemcpyDeviceToHost)); + + // tmp: for debug only + CALL_CUDA(cudaFreeHost(h_filter_halos)); + // CALL_CUDA(cudaFreeHost(h_sum_check)); + + // } float *h_halo_masses_out; CALL_CUDA(cudaHostAlloc((void **)&h_halo_masses_out, buffer_size, cudaHostAllocDefault)); From 90037bbad8a213b15a8211679fb22780fe66ce9c Mon Sep 17 00:00:00 2001 From: alserene Date: Tue, 10 Dec 2024 08:28:57 +1100 Subject: [PATCH 054/145] Add ionisation CUDA object to build file. --- build_cffi.py | 1 + 1 file changed, 1 insertion(+) diff --git a/build_cffi.py b/build_cffi.py index 2b5667805..5d9792888 100755 --- a/build_cffi.py +++ b/build_cffi.py @@ -32,6 +32,7 @@ os.path.join(CLOC, "filtering.o"), os.path.join(CLOC, "PerturbField.o"), os.path.join(CLOC, "SpinTemperatureBox.o"), + os.path.join(CLOC, "IonisationBox.o"), ] extra_link_args = ["-lcudart", "-lstdc++"] From ec1afc03ca4e812fb14c2a88b17e704a524a9a65 Mon Sep 17 00:00:00 2001 From: alserene Date: Tue, 10 Dec 2024 08:32:50 +1100 Subject: [PATCH 055/145] Add CUDA functionality to ComputeIonisation function. --- src/py21cmfast/src/IonisationBox.c | 76 +++++- src/py21cmfast/src/IonisationBox.cu | 356 ++++++++++++++++++++++++++++ src/py21cmfast/src/IonisationBox.h | 39 +++ 3 files changed, 466 insertions(+), 5 deletions(-) create mode 100644 src/py21cmfast/src/IonisationBox.cu diff --git a/src/py21cmfast/src/IonisationBox.c b/src/py21cmfast/src/IonisationBox.c index f8956ccba..aca758d36 100644 --- a/src/py21cmfast/src/IonisationBox.c +++ b/src/py21cmfast/src/IonisationBox.c @@ -693,7 +693,7 @@ void setup_integration_tables(struct FilteredGrids *fg_struct, struct IonBoxCons //TODO: We should speed test different configurations, separating grids, parallel sections etc. // See the note above copy_filter_transform() for the general idea // If we separate by grid we can reuse the clipping function above -void calculate_fcoll_grid(IonizedBox *box, IonizedBox *previous_ionize_box, struct FilteredGrids *fg_struct, struct IonBoxConstants *consts, +void calculate_fcoll_grid(IonizedBox *box, IonizedBox *previous_ionize_box, struct FilteredGrids *fg_struct, struct IonBoxConstants *consts, // <-------- HERE struct RadiusSpec *rspec){ double f_coll_total,f_coll_MINI_total; //TODO: make proper error tracking through the parallel region @@ -763,7 +763,7 @@ void calculate_fcoll_grid(IonizedBox *box, IonizedBox *previous_ionize_box, stru if (previous_ionize_box->mean_f_coll_MINI * consts->ion_eff_factor_mini_gl + previous_ionize_box->mean_f_coll * consts->ion_eff_factor_gl > 1e-4){ prev_dens = *((float *)fg_struct->prev_deltax_filtered + HII_R_FFT_INDEX(x,y,z)); - prev_Splined_Fcoll = EvaluateNion_Conditional(prev_dens,log10_Mturnover,consts->prev_growth_factor, + prev_Splined_Fcoll = EvaluateNion_Conditional(prev_dens,log10_Mturnover,consts->prev_growth_factor, // <-------- HERE consts->M_min,rspec->M_max_R,rspec->M_max_R, rspec->sigma_maxmass,consts->Mlim_Fstar,consts->Mlim_Fesc,true); prev_Splined_Fcoll_MINI = EvaluateNion_Conditional_MINI(prev_dens,log10_Mturnover_MINI,consts->prev_growth_factor,consts->M_min, @@ -775,7 +775,7 @@ void calculate_fcoll_grid(IonizedBox *box, IonizedBox *previous_ionize_box, stru prev_Splined_Fcoll_MINI = 0.; } } - Splined_Fcoll = EvaluateNion_Conditional(curr_dens,log10_Mturnover,consts->growth_factor, + Splined_Fcoll = EvaluateNion_Conditional(curr_dens,log10_Mturnover,consts->growth_factor, // <-------- HERE consts->M_min,rspec->M_max_R,rspec->M_max_R, rspec->sigma_maxmass,consts->Mlim_Fstar,consts->Mlim_Fesc,false); } @@ -815,7 +815,7 @@ void calculate_fcoll_grid(IonizedBox *box, IonizedBox *previous_ionize_box, stru box->Fcoll_MINI[fc_r_idx * HII_TOT_NUM_PIXELS + HII_R_INDEX(x,y,z)] = \ previous_ionize_box->Fcoll_MINI[fc_r_idx * HII_TOT_NUM_PIXELS + HII_R_INDEX(x,y,z)] + Splined_Fcoll_MINI - prev_Splined_Fcoll_MINI; - if (box->Fcoll_MINI[fc_r_idx * HII_TOT_NUM_PIXELS + HII_R_INDEX(x,y,z)] >1.) box->Fcoll_MINI[fc_r_idx * HII_TOT_NUM_PIXELS + HII_R_INDEX(x,y,z)] = 1.; + if (box->Fcoll_MINI[fc_r_idx * HII_TOT_NUM_PIXELS + HII_R_INDEX(x,y,z)] > 1.) box->Fcoll_MINI[fc_r_idx * HII_TOT_NUM_PIXELS + HII_R_INDEX(x,y,z)] = 1.; //if (box->Fcoll_MINI[counter * HII_TOT_NUM_PIXELS + HII_R_INDEX(x,y,z)] <0.) box->Fcoll_MINI[counter * HII_TOT_NUM_PIXELS + HII_R_INDEX(x,y,z)] = 1e-40; //if (box->Fcoll_MINI[counter * HII_TOT_NUM_PIXELS + HII_R_INDEX(x,y,z)] < previous_ionize_box->Fcoll_MINI[counter * HII_TOT_NUM_PIXELS + HII_R_INDEX(x,y,z)]) // box->Fcoll_MINI[counter * HII_TOT_NUM_PIXELS + HII_R_INDEX(x,y,z)] = previous_ionize_box->Fcoll_MINI[counter * HII_TOT_NUM_PIXELS + HII_R_INDEX(x,y,z)]; @@ -1317,6 +1317,34 @@ int ComputeIonizedBox(float redshift, float prev_redshift, UserParams *user_para // set the max radius we will use, making sure we are always sampling the same values of radius // (this avoids aliasing differences w redshift) + fftwf_complex *d_deltax_filtered = NULL; + fftwf_complex *d_N_rec_filtered = NULL; + fftwf_complex *d_xe_filtered = NULL; + float *d_y_arr = NULL; + float *d_Fcoll = NULL; //_outputstructs_wrapper.h + + unsigned int threadsPerBlock = NULL; + unsigned int numBlocks = NULL; + + // If GPU & flags call init_ionbox_gpu_data() + if (flag_options_global->USE_MASS_DEPENDENT_ZETA && !flag_options_global->USE_MINI_HALOS && !flag_options_global->USE_HALO_FIELD) { + + unsigned int Nion_nbins = get_nbins(); + + init_ionbox_gpu_data( + &d_deltax_filtered, + &d_N_rec_filtered, + &d_xe_filtered, + &d_y_arr, + &d_Fcoll, + Nion_nbins, + HII_TOT_NUM_PIXELS, + HII_KSPACE_NUM_PIXELS, + &threadsPerBlock, + &numBlocks + ); + } + int R_ct; struct RadiusSpec curr_radius; for(R_ct=n_radii;R_ct--;){ @@ -1343,7 +1371,34 @@ int ComputeIonizedBox(float redshift, float prev_redshift, UserParams *user_para setup_integration_tables(grid_struct,&ionbox_constants,curr_radius,need_prev_ion); } - calculate_fcoll_grid(box,previous_ionize_box,grid_struct,&ionbox_constants,&curr_radius); + // If GPU & flags, call gpu version of calculate_fcoll_grid() + if (flag_options_global->USE_MASS_DEPENDENT_ZETA && !flag_options_global->USE_MINI_HALOS && !flag_options_global->USE_HALO_FIELD) { + calculate_fcoll_grid_gpu( + box, + // grid_struct, + grid_struct->deltax_filtered, + grid_struct->N_rec_filtered, + grid_struct->xe_filtered, + // &ionbox_constants, + &ionbox_constants.filter_recombinations, + // &curr_radius, + &curr_radius.f_coll_grid_mean, + d_deltax_filtered, + d_N_rec_filtered, + d_xe_filtered, + d_Fcoll, + d_y_arr, + HII_TOT_NUM_PIXELS, + HII_KSPACE_NUM_PIXELS, + &threadsPerBlock, + &numBlocks + ); + } else { + calculate_fcoll_grid(box, previous_ionize_box, grid_struct, &ionbox_constants, &curr_radius); // <-------- HERE + } + // calculate_fcoll_grid(box, previous_ionize_box, grid_struct, &ionbox_constants, &curr_radius); // <-------- HERE + + // To avoid ST_over_PS becoming nan when f_coll = 0, I set f_coll = FRACT_FLOAT_ERR. // TODO: This was the previous behaviour, but is this right? // setting the *total* to the minimum for the adjustment factor, @@ -1366,6 +1421,17 @@ int ComputeIonizedBox(float redshift, float prev_redshift, UserParams *user_para debugSummarizeBox(box->z_re_box, user_params->HII_DIM, user_params->NON_CUBIC_FACTOR, " "); #endif } + // If GPU & flags, call free_ionbox_gpu_data() + if (flag_options_global->USE_MASS_DEPENDENT_ZETA && !flag_options_global->USE_MINI_HALOS && !flag_options_global->USE_HALO_FIELD) { + free_ionbox_gpu_data( + &d_deltax_filtered, + &d_N_rec_filtered, + &d_xe_filtered, + &d_y_arr, + &d_Fcoll + ); + } + set_ionized_temperatures(box,perturbed_field,spin_temp,&ionbox_constants); // find the neutral fraction diff --git a/src/py21cmfast/src/IonisationBox.cu b/src/py21cmfast/src/IonisationBox.cu new file mode 100644 index 000000000..ddacc82e1 --- /dev/null +++ b/src/py21cmfast/src/IonisationBox.cu @@ -0,0 +1,356 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "cexcept.h" +#include "exceptions.h" +#include "logger.h" + +// GPU +#include +#include +#include +// We use thrust for reduction +#include +#include +#include // thrust::plus + +#include "Constants.h" +#include "InputParameters.h" +#include "OutputStructs.h" +#include "cosmology.h" +#include "hmf.h" +#include "indexing.h" +#include "dft.h" +#include "recombinations.h" +#include "debugging.h" +#include "heating_helper_progs.h" +#include "photoncons.h" +#include "thermochem.h" +#include "interp_tables.h" +#include "filtering.h" +#include "bubble_helper_progs.h" +#include "InitialConditions.h" + +#include "IonisationBox.h" + + +__device__ inline double EvaluateRGTable1D_f_gpu(double x, double x_min, double x_width, float *y_arr) { + + int idx = (int)floor((x - x_min) / x_width); + + double table_val = x_min + x_width * (float)idx; + double interp_point = (x - table_val) / x_width; + + return y_arr[idx] * (1 - interp_point) + y_arr[idx + 1] * (interp_point); +} + +// template +__global__ void compute_Fcoll( + cuFloatComplex *deltax_filtered, // fg_struct + cuFloatComplex *N_rec_filtered, // fg_struct + cuFloatComplex *xe_filtered, // fg_struct + float *y_arr, // Nion_conditional_table1D + double x_min, // Nion_conditional_table1D + double x_width, // Nion_conditional_table1D + double fract_float_err, // FRACT_FLOAT_ERR + bool filter_recomb, // consts->filter_recombinations + bool use_ts_fluct, // flag_options_global->USE_TS_FLUCT + unsigned long long hii_tot_num_pixels, // HII_TOT_NUM_PIXELS + long long hii_d, // HII_D + long long hii_d_para, // HII_D_PARA + long long hii_mid_para, // HII_MID_PARA + float *Fcoll // box +) { + + // Get index of grids + unsigned long long idx = blockIdx.x * blockDim.x + threadIdx.x; + + // Bound check + if (idx >= hii_tot_num_pixels) { + return; + } + + // Get x, y, z from idx using HII_R_INDEX macro formula + int z = idx % hii_d_para; + unsigned long long remaining = idx / hii_d_para; + int y = remaining % hii_d; + int x = remaining / hii_d; + + // Get FFT index using HII_R_FFT_INDEX macro formula + unsigned long long fft_idx = z + 2 * (hii_mid_para + 1) * (y + hii_d * x); + + // These clippings could be made in the calling function, using thrust, rather than here... + + // Clip the filtered grids to physical values + // delta cannot be less than -1 + // deltax_filtered[fft_idx] = fmaxf(deltax_filtered[fft_idx], -1. + fract_float_err); + *((float *) deltax_filtered + fft_idx) = fmaxf(*((float *) deltax_filtered + fft_idx), -1. + fract_float_err); + // cannot be less than zero + if (filter_recomb) { + // N_rec_filtered[fft_idx] = fmaxf(N_rec_filtered[fft_idx], 0.0); + *((float *) N_rec_filtered + fft_idx) = fmaxf(*((float *) N_rec_filtered + fft_idx), 0.0); + } + // x_e has to be between zero and unity + if (use_ts_fluct) { + // xe_filtered[fft_idx] = fmaxf(xe_filtered[fft_idx], 0.0); + // xe_filtered[fft_idx] = fminf(xe_filtered[fft_idx], 0.999); + *((float *) xe_filtered + fft_idx) = fmaxf(*((float *) xe_filtered + fft_idx), 0.0); + *((float *) xe_filtered + fft_idx) = fminf(*((float *) xe_filtered + fft_idx), 0.999); + } + + // Compute collapse fraction + // Fcoll[idx] = exp(EvaluateRGTable1D_f_gpu(deltax_filtered[fft_idx], x_min, x_width, y_arr)); + Fcoll[idx] = exp(EvaluateRGTable1D_f_gpu(*((float *) deltax_filtered + fft_idx), x_min, x_width, y_arr)); +} + +void init_ionbox_gpu_data( + fftwf_complex **d_deltax_filtered, // copies of pointers to pointers + fftwf_complex **d_N_rec_filtered, + fftwf_complex **d_xe_filtered, + float **d_y_arr, + float **d_Fcoll, + unsigned int nbins, // nbins for Nion_conditional_table1D->y + unsigned long long hii_tot_num_pixels, // HII_TOT_NUM_PIXELS + unsigned long long hii_kspace_num_pixels, // HII_KSPACE_NUM_PIXELS + unsigned int *threadsPerBlock, + unsigned int *numBlocks +) { + cudaError_t err = cudaGetLastError(); + + // deltax_filtered, N_rec_filtered & xe_filtered are of length HII_KSPACE_NUM_PIXELS + // Fcoll is of length HII_TOT_NUM_PIXELS (outputs.py) + + err = cudaMalloc((void**)d_deltax_filtered, sizeof(fftwf_complex) * hii_kspace_num_pixels); // already pointers to pointers (no & needed) + if (err != cudaSuccess) { + LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); + Throw(CUDAError); + } + err = cudaMalloc((void**)d_N_rec_filtered, sizeof(fftwf_complex) * hii_kspace_num_pixels); + if (err != cudaSuccess) { + LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); + Throw(CUDAError); + } + err = cudaMalloc((void**)d_xe_filtered, sizeof(fftwf_complex) * hii_kspace_num_pixels); + if (err != cudaSuccess) { + LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); + Throw(CUDAError); + } + err = cudaMalloc((void**)d_y_arr, sizeof(float) * nbins); + if (err != cudaSuccess) { + LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); + Throw(CUDAError); + } + err = cudaMalloc((void**)d_Fcoll, sizeof(float) * hii_tot_num_pixels); + if (err != cudaSuccess) { + LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); + Throw(CUDAError); + } + LOG_INFO("Ionisation grids allocated on device."); + + err = cudaMemset(*d_deltax_filtered, 0, sizeof(fftwf_complex) * hii_kspace_num_pixels); // dereference the pointer to a pointer (*) + if (err != cudaSuccess) { + LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); + Throw(CUDAError); + } + err = cudaMemset(*d_N_rec_filtered, 0, sizeof(fftwf_complex) * hii_kspace_num_pixels); + if (err != cudaSuccess) { + LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); + Throw(CUDAError); + } + err = cudaMemset(*d_xe_filtered, 0, sizeof(fftwf_complex) * hii_kspace_num_pixels); + if (err != cudaSuccess) { + LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); + Throw(CUDAError); + } + err = cudaMemset(*d_y_arr, 0, sizeof(float) * nbins); + if (err != cudaSuccess) { + LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); + Throw(CUDAError); + } + err = cudaMemset(*d_Fcoll, 0, sizeof(float) * hii_tot_num_pixels); + if (err != cudaSuccess) { + LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); + Throw(CUDAError); + } + LOG_INFO("Ionisation grids initialised on device."); + + // Get max threads/block for device + int maxThreadsPerBlock; + cudaDeviceGetAttribute(&maxThreadsPerBlock, cudaDevAttrMaxThreadsPerBlock, 0); + + // Set threads/block based on device max + if (maxThreadsPerBlock >= 512) { + *threadsPerBlock = 512; + } else if (maxThreadsPerBlock >= 256) { + *threadsPerBlock = 256; + } else if (maxThreadsPerBlock >= 128) { + *threadsPerBlock = 128; + } else if (maxThreadsPerBlock >= 64) { + *threadsPerBlock = 64; + } else if (maxThreadsPerBlock >= 32) { + *threadsPerBlock = 32; + } else { + *threadsPerBlock = 16; + } + + *numBlocks = ceil(hii_tot_num_pixels / (*threadsPerBlock * 2)); +} + +void calculate_fcoll_grid_gpu( + IonizedBox *box, // for box->Fcoll + fftwf_complex *h_deltax_filtered, // members of fg_struct + fftwf_complex *h_N_rec_filtered, + fftwf_complex *h_xe_filtered, + bool filter_recombinations, // member of consts + double *f_coll_grid_mean, // member of rspec + fftwf_complex *d_deltax_filtered, // device pointers + fftwf_complex *d_N_rec_filtered, + fftwf_complex *d_xe_filtered, + float *d_Fcoll, + float *d_y_arr, + unsigned long long hii_tot_num_pixels, // HII_TOT_NUM_PIXELS + unsigned long long hii_kspace_num_pixels, // HII_KSPACE_NUM_PIXELS + unsigned int *threadsPerBlock, + unsigned int *numBlocks +) { + cudaError_t err = cudaGetLastError(); + + // TODO: Potentially use thrust to clip grids here instead of in kernel... + + RGTable1D_f* Nion_conditional_table1D = get_Nion_conditional_table1D(); + // unsigned long long hii_tot_num_pixels = HII_TOT_NUM_PIXELS; + // unsigned long long hii_tot_fft_num_pixels = HII_TOT_FFT_NUM_PIXELS; + + // Copy grids from host to device + err = cudaMemcpy(d_deltax_filtered, h_deltax_filtered, sizeof(fftwf_complex) * hii_kspace_num_pixels, cudaMemcpyHostToDevice); + if (err != cudaSuccess) { + LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); + Throw(CUDAError); + } + err = cudaMemcpy(d_N_rec_filtered, h_N_rec_filtered, sizeof(fftwf_complex) * hii_kspace_num_pixels, cudaMemcpyHostToDevice); + if (err != cudaSuccess) { + LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); + Throw(CUDAError); + } + err = cudaMemcpy(d_xe_filtered, h_xe_filtered, sizeof(fftwf_complex) * hii_kspace_num_pixels, cudaMemcpyHostToDevice); + if (err != cudaSuccess) { + LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); + Throw(CUDAError); + } + err = cudaMemcpy(d_y_arr, Nion_conditional_table1D->y_arr, sizeof(float) * Nion_conditional_table1D->n_bin, cudaMemcpyHostToDevice); + if (err != cudaSuccess) { + LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); + Throw(CUDAError); + } + LOG_INFO("Ionisation grids copied to device."); + + // TODO: Can I pass these straight to kernel? (or access in kernel w/ Tiger's method) + double fract_float_err = FRACT_FLOAT_ERR; + bool use_ts_fluct = flag_options_global->USE_TS_FLUCT; + long long hii_d = HII_D; + long long hii_d_para = HII_D_PARA; + long long hii_mid_para = HII_MID_PARA; + + // Invoke kernel + compute_Fcoll<<< *numBlocks, *threadsPerBlock >>>( + reinterpret_cast(d_deltax_filtered), + reinterpret_cast(d_N_rec_filtered), + reinterpret_cast(d_xe_filtered), + d_y_arr, + Nion_conditional_table1D->x_min, + Nion_conditional_table1D->x_width, + fract_float_err, + filter_recombinations, + use_ts_fluct, + hii_tot_num_pixels, + hii_d, + hii_d_para, + hii_mid_para, + d_Fcoll + ); + LOG_INFO("IonisationBox compute_Fcoll kernel called."); + + err = cudaDeviceSynchronize(); + CATCH_CUDA_ERROR(err); + + err = cudaGetLastError(); + if (err != cudaSuccess) { + LOG_ERROR("Kernel launch error: %s", cudaGetErrorString(err)); + Throw(CUDAError); + } + + // Use thrust to reduce computed sums to one value + + // Wrap device pointer in a thrust::device_ptr + thrust::device_ptr d_Fcoll_ptr(d_Fcoll); + // Reduce final buffer sums to one value + *f_coll_grid_mean = thrust::reduce(d_Fcoll_ptr, d_Fcoll_ptr + hii_tot_num_pixels, 0., thrust::plus()); + LOG_INFO("Fcoll sum reduced to single value by thrust::reduce operation."); + + // Copy results from device to host + err = cudaMemcpy(box->Fcoll, d_Fcoll, sizeof(float) * hii_tot_num_pixels, cudaMemcpyDeviceToHost); + if (err != cudaSuccess) { + LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); + Throw(CUDAError); + } + err = cudaMemcpy(h_deltax_filtered, d_deltax_filtered, sizeof(fftwf_complex) * hii_kspace_num_pixels, cudaMemcpyDeviceToHost); + if (err != cudaSuccess) { + LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); + Throw(CUDAError); + } + err = cudaMemcpy(h_N_rec_filtered, d_N_rec_filtered, sizeof(fftwf_complex) * hii_kspace_num_pixels, cudaMemcpyDeviceToHost); + if (err != cudaSuccess) { + LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); + Throw(CUDAError); + } + err = cudaMemcpy(h_xe_filtered, d_xe_filtered, sizeof(fftwf_complex) * hii_kspace_num_pixels, cudaMemcpyDeviceToHost); + if (err != cudaSuccess) { + LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); + Throw(CUDAError); + } + LOG_INFO("Fcoll grid copied to host."); +} + +void free_ionbox_gpu_data( + fftwf_complex **d_deltax_filtered, // copies of pointers to pointers + fftwf_complex **d_N_rec_filtered, + fftwf_complex **d_xe_filtered, + float **d_y_arr, + float **d_Fcoll +) { + cudaError_t err = cudaGetLastError(); + + cudaFree(*d_deltax_filtered); // Need to dereference the pointers to pointers (*) + if (err != cudaSuccess) { + LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); + Throw(CUDAError); + } + cudaFree(*d_N_rec_filtered); + if (err != cudaSuccess) { + LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); + Throw(CUDAError); + } + cudaFree(*d_xe_filtered); + if (err != cudaSuccess) { + LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); + Throw(CUDAError); + } + cudaFree(*d_y_arr); + if (err != cudaSuccess) { + LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); + Throw(CUDAError); + } + cudaFree(*d_Fcoll); + if (err != cudaSuccess) { + LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); + Throw(CUDAError); + } + + LOG_INFO("Device memory freed."); +} diff --git a/src/py21cmfast/src/IonisationBox.h b/src/py21cmfast/src/IonisationBox.h index 03fe6ed8b..f4507debb 100644 --- a/src/py21cmfast/src/IonisationBox.h +++ b/src/py21cmfast/src/IonisationBox.h @@ -1,6 +1,9 @@ #ifndef _IONBOX_H #define _IONBOX_H +#include +#include + #include "InputParameters.h" #include "OutputStructs.h" @@ -13,6 +16,42 @@ int ComputeIonizedBox(float redshift, float prev_redshift, UserParams *user_para IonizedBox *previous_ionize_box, TsBox *spin_temp, HaloBox *halos, InitialConditions *ini_boxes, IonizedBox *box); +void init_ionbox_gpu_data( + fftwf_complex **d_deltax_filtered, // copies of pointers to pointers + fftwf_complex **d_N_rec_filtered, + fftwf_complex **d_xe_filtered, + float **d_y_arr, + float **d_Fcoll, + unsigned int nbins, // nbins for Nion_conditional_table1D->y + unsigned long long hii_tot_num_pixels, // HII_TOT_NUM_PIXELS + unsigned long long hii_kspace_num_pixels, // HII_KSPACE_NUM_PIXELS + unsigned int *threadsPerBlock, + unsigned int *numBlocks +); +void calculate_fcoll_grid_gpu( + IonizedBox *box, // for box->Fcoll + fftwf_complex *h_deltax_filtered, // members of fg_struct + fftwf_complex *h_N_rec_filtered, + fftwf_complex *h_xe_filtered, + bool filter_recombinations, // member of consts + double *f_coll_grid_mean, // member of rspec + fftwf_complex *d_deltax_filtered, // device pointers + fftwf_complex *d_N_rec_filtered, + fftwf_complex *d_xe_filtered, + float *d_Fcoll, + float *d_y_arr, + unsigned long long hii_tot_num_pixels, // HII_TOT_NUM_PIXELS + unsigned long long hii_kspace_num_pixels, // HII_KSPACE_NUM_PIXELS + unsigned int *threadsPerBlock, + unsigned int *numBlocks +); +void free_ionbox_gpu_data( + fftwf_complex **d_deltax_filtered, // copies of pointers to pointers + fftwf_complex **d_N_rec_filtered, + fftwf_complex **d_xe_filtered, + float **d_y_arr, + float **d_Fcoll +); #ifdef __cplusplus } From 7b3a2e594deaa7953e3ea3fc8d33ed1de695228b Mon Sep 17 00:00:00 2001 From: alserene Date: Tue, 10 Dec 2024 08:33:34 +1100 Subject: [PATCH 056/145] Add accessor function for Nion conditional table. --- src/py21cmfast/src/interp_tables.c | 5 +++++ src/py21cmfast/src/interp_tables.h | 1 + 2 files changed, 6 insertions(+) diff --git a/src/py21cmfast/src/interp_tables.c b/src/py21cmfast/src/interp_tables.c index a8157b940..db608ca02 100644 --- a/src/py21cmfast/src/interp_tables.c +++ b/src/py21cmfast/src/interp_tables.c @@ -1056,6 +1056,11 @@ RGTable1D_f* get_SFRD_conditional_table(void) { return &SFRD_conditional_table; } +// Accessor function for the GPU Ionisation kernel to access table. +RGTable1D_f* get_Nion_conditional_table1D(void) { + return &Nion_conditional_table1D; +} + // Accessor function for the GPU SpinTemp memory allocation function to access nbins. int get_nbins(void) { return NDELTA; diff --git a/src/py21cmfast/src/interp_tables.h b/src/py21cmfast/src/interp_tables.h index 86482f082..576a33add 100644 --- a/src/py21cmfast/src/interp_tables.h +++ b/src/py21cmfast/src/interp_tables.h @@ -67,6 +67,7 @@ void free_global_tables(); void free_dNdM_tables(); RGTable1D_f* get_SFRD_conditional_table(void); +RGTable1D_f* get_Nion_conditional_table1D(void); int get_nbins(void); #ifdef __cplusplus From d59714802347be0f5835dca468c82ded55dcd542 Mon Sep 17 00:00:00 2001 From: alserene Date: Tue, 10 Dec 2024 08:34:18 +1100 Subject: [PATCH 057/145] Make indent friendly to collapse in IDE. --- src/py21cmfast/src/SpinTemperatureBox.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/py21cmfast/src/SpinTemperatureBox.cu b/src/py21cmfast/src/SpinTemperatureBox.cu index 8cc778c6d..504dabb64 100644 --- a/src/py21cmfast/src/SpinTemperatureBox.cu +++ b/src/py21cmfast/src/SpinTemperatureBox.cu @@ -70,7 +70,7 @@ __global__ void compute_and_reduce( float *sfrd_grid, // star formation rate density grid to be updated double *ave_sfrd_buf, // output buffer of length ceil(n / (threadsPerBlock * 2)) unsigned long long num_pixels // length of input data - ) { +) { // An array to store intermediate summations // Shared between all threads in block From 3c84bdeb614994f1563ad0f7718c01fe5151622d Mon Sep 17 00:00:00 2001 From: alserene Date: Wed, 11 Dec 2024 08:01:48 +1100 Subject: [PATCH 058/145] Add error catching for individual CUDA calls + synchronisation. --- src/py21cmfast/src/filtering.cu | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/src/py21cmfast/src/filtering.cu b/src/py21cmfast/src/filtering.cu index 9ac1ce448..1886fe42f 100644 --- a/src/py21cmfast/src/filtering.cu +++ b/src/py21cmfast/src/filtering.cu @@ -164,12 +164,22 @@ void filter_box_gpu(fftwf_complex *box, int RES, int filter_type, float R, float // Get size of flattened array size_t size = num_pixels * sizeof(fftwf_complex); + cudaError_t err; + // Allocate device memory fftwf_complex* d_box; - cudaMalloc(&d_box, size); + err = cudaMalloc(&d_box, size); + if (err != cudaSuccess) { + LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); + Throw(CUDAError); + } // Copy array from host to device - cudaMemcpy(d_box, box, size, cudaMemcpyHostToDevice); + err = cudaMemcpy(d_box, box, size, cudaMemcpyHostToDevice); + if (err != cudaSuccess) { + LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); + Throw(CUDAError); + } // Invoke kernel int threadsPerBlock = 256; @@ -177,11 +187,9 @@ void filter_box_gpu(fftwf_complex *box, int RES, int filter_type, float R, float // d_box must be cast to cuFloatComplex (from fftwf_complex) for CUDA filter_box_kernel<<>>(reinterpret_cast(d_box), num_pixels, dimension, midpoint, midpoint_para, delta_k, R, R_param, R_const, filter_type); - cudaError_t err; - // // Only use during development! - // err = cudaDeviceSynchronize(); - // CATCH_CUDA_ERROR(err); + err = cudaDeviceSynchronize(); + CATCH_CUDA_ERROR(err); err = cudaGetLastError(); if (err != cudaSuccess) { @@ -190,10 +198,18 @@ void filter_box_gpu(fftwf_complex *box, int RES, int filter_type, float R, float } // Copy results from device to host - cudaMemcpy(box, d_box, size, cudaMemcpyDeviceToHost); + err = cudaMemcpy(box, d_box, size, cudaMemcpyDeviceToHost); + if (err != cudaSuccess) { + LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); + Throw(CUDAError); + } // Deallocate device memory - cudaFree(d_box); + err = cudaFree(d_box); + if (err != cudaSuccess) { + LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); + Throw(CUDAError); + } } // Test function to filter a box without computing a whole output box From bb54adf0c7cc5522b32992601b21e152bf4569b4 Mon Sep 17 00:00:00 2001 From: alserene Date: Thu, 12 Dec 2024 04:03:21 +1100 Subject: [PATCH 059/145] Wrap CUDA calls in error checking function. --- src/py21cmfast/src/IonisationBox.cu | 164 +++++----------------------- 1 file changed, 29 insertions(+), 135 deletions(-) diff --git a/src/py21cmfast/src/IonisationBox.cu b/src/py21cmfast/src/IonisationBox.cu index ddacc82e1..91920b4e9 100644 --- a/src/py21cmfast/src/IonisationBox.cu +++ b/src/py21cmfast/src/IonisationBox.cu @@ -38,6 +38,7 @@ #include "InitialConditions.h" #include "IonisationBox.h" +#include "cuda_utils.cuh" __device__ inline double EvaluateRGTable1D_f_gpu(double x, double x_min, double x_width, float *y_arr) { @@ -121,68 +122,26 @@ void init_ionbox_gpu_data( unsigned int *threadsPerBlock, unsigned int *numBlocks ) { - cudaError_t err = cudaGetLastError(); - // deltax_filtered, N_rec_filtered & xe_filtered are of length HII_KSPACE_NUM_PIXELS // Fcoll is of length HII_TOT_NUM_PIXELS (outputs.py) - err = cudaMalloc((void**)d_deltax_filtered, sizeof(fftwf_complex) * hii_kspace_num_pixels); // already pointers to pointers (no & needed) - if (err != cudaSuccess) { - LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); - Throw(CUDAError); - } - err = cudaMalloc((void**)d_N_rec_filtered, sizeof(fftwf_complex) * hii_kspace_num_pixels); - if (err != cudaSuccess) { - LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); - Throw(CUDAError); - } - err = cudaMalloc((void**)d_xe_filtered, sizeof(fftwf_complex) * hii_kspace_num_pixels); - if (err != cudaSuccess) { - LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); - Throw(CUDAError); - } - err = cudaMalloc((void**)d_y_arr, sizeof(float) * nbins); - if (err != cudaSuccess) { - LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); - Throw(CUDAError); - } - err = cudaMalloc((void**)d_Fcoll, sizeof(float) * hii_tot_num_pixels); - if (err != cudaSuccess) { - LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); - Throw(CUDAError); - } + CALL_CUDA(cudaMalloc((void**)d_deltax_filtered, sizeof(fftwf_complex) * hii_kspace_num_pixels)); // already pointers to pointers (no & needed) + CALL_CUDA(cudaMalloc((void**)d_N_rec_filtered, sizeof(fftwf_complex) * hii_kspace_num_pixels)); + CALL_CUDA(cudaMalloc((void**)d_xe_filtered, sizeof(fftwf_complex) * hii_kspace_num_pixels)); + CALL_CUDA(cudaMalloc((void**)d_y_arr, sizeof(float) * nbins)); + CALL_CUDA(cudaMalloc((void**)d_Fcoll, sizeof(float) * hii_tot_num_pixels)); LOG_INFO("Ionisation grids allocated on device."); - err = cudaMemset(*d_deltax_filtered, 0, sizeof(fftwf_complex) * hii_kspace_num_pixels); // dereference the pointer to a pointer (*) - if (err != cudaSuccess) { - LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); - Throw(CUDAError); - } - err = cudaMemset(*d_N_rec_filtered, 0, sizeof(fftwf_complex) * hii_kspace_num_pixels); - if (err != cudaSuccess) { - LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); - Throw(CUDAError); - } - err = cudaMemset(*d_xe_filtered, 0, sizeof(fftwf_complex) * hii_kspace_num_pixels); - if (err != cudaSuccess) { - LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); - Throw(CUDAError); - } - err = cudaMemset(*d_y_arr, 0, sizeof(float) * nbins); - if (err != cudaSuccess) { - LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); - Throw(CUDAError); - } - err = cudaMemset(*d_Fcoll, 0, sizeof(float) * hii_tot_num_pixels); - if (err != cudaSuccess) { - LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); - Throw(CUDAError); - } + CALL_CUDA(cudaMemset(*d_deltax_filtered, 0, sizeof(fftwf_complex) * hii_kspace_num_pixels)); // dereference the pointer to a pointer (*) + CALL_CUDA(cudaMemset(*d_N_rec_filtered, 0, sizeof(fftwf_complex) * hii_kspace_num_pixels)); + CALL_CUDA(cudaMemset(*d_xe_filtered, 0, sizeof(fftwf_complex) * hii_kspace_num_pixels)); + CALL_CUDA(cudaMemset(*d_y_arr, 0, sizeof(float) * nbins)); + CALL_CUDA(cudaMemset(*d_Fcoll, 0, sizeof(float) * hii_tot_num_pixels)); LOG_INFO("Ionisation grids initialised on device."); // Get max threads/block for device int maxThreadsPerBlock; - cudaDeviceGetAttribute(&maxThreadsPerBlock, cudaDevAttrMaxThreadsPerBlock, 0); + CALL_CUDA(cudaDeviceGetAttribute(&maxThreadsPerBlock, cudaDevAttrMaxThreadsPerBlock, 0)); // Set threads/block based on device max if (maxThreadsPerBlock >= 512) { @@ -219,8 +178,6 @@ void calculate_fcoll_grid_gpu( unsigned int *threadsPerBlock, unsigned int *numBlocks ) { - cudaError_t err = cudaGetLastError(); - // TODO: Potentially use thrust to clip grids here instead of in kernel... RGTable1D_f* Nion_conditional_table1D = get_Nion_conditional_table1D(); @@ -228,26 +185,10 @@ void calculate_fcoll_grid_gpu( // unsigned long long hii_tot_fft_num_pixels = HII_TOT_FFT_NUM_PIXELS; // Copy grids from host to device - err = cudaMemcpy(d_deltax_filtered, h_deltax_filtered, sizeof(fftwf_complex) * hii_kspace_num_pixels, cudaMemcpyHostToDevice); - if (err != cudaSuccess) { - LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); - Throw(CUDAError); - } - err = cudaMemcpy(d_N_rec_filtered, h_N_rec_filtered, sizeof(fftwf_complex) * hii_kspace_num_pixels, cudaMemcpyHostToDevice); - if (err != cudaSuccess) { - LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); - Throw(CUDAError); - } - err = cudaMemcpy(d_xe_filtered, h_xe_filtered, sizeof(fftwf_complex) * hii_kspace_num_pixels, cudaMemcpyHostToDevice); - if (err != cudaSuccess) { - LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); - Throw(CUDAError); - } - err = cudaMemcpy(d_y_arr, Nion_conditional_table1D->y_arr, sizeof(float) * Nion_conditional_table1D->n_bin, cudaMemcpyHostToDevice); - if (err != cudaSuccess) { - LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); - Throw(CUDAError); - } + CALL_CUDA(cudaMemcpy(d_deltax_filtered, h_deltax_filtered, sizeof(fftwf_complex) * hii_kspace_num_pixels, cudaMemcpyHostToDevice)); + CALL_CUDA(cudaMemcpy(d_N_rec_filtered, h_N_rec_filtered, sizeof(fftwf_complex) * hii_kspace_num_pixels, cudaMemcpyHostToDevice)); + CALL_CUDA(cudaMemcpy(d_xe_filtered, h_xe_filtered, sizeof(fftwf_complex) * hii_kspace_num_pixels, cudaMemcpyHostToDevice)); + CALL_CUDA(cudaMemcpy(d_y_arr, Nion_conditional_table1D->y_arr, sizeof(float) * Nion_conditional_table1D->n_bin, cudaMemcpyHostToDevice)); LOG_INFO("Ionisation grids copied to device."); // TODO: Can I pass these straight to kernel? (or access in kernel w/ Tiger's method) @@ -274,47 +215,23 @@ void calculate_fcoll_grid_gpu( hii_mid_para, d_Fcoll ); + CALL_CUDA(cudaDeviceSynchronize()); LOG_INFO("IonisationBox compute_Fcoll kernel called."); - err = cudaDeviceSynchronize(); - CATCH_CUDA_ERROR(err); - - err = cudaGetLastError(); - if (err != cudaSuccess) { - LOG_ERROR("Kernel launch error: %s", cudaGetErrorString(err)); - Throw(CUDAError); - } - - // Use thrust to reduce computed sums to one value - + // Use thrust to reduce computed sums to one value. // Wrap device pointer in a thrust::device_ptr thrust::device_ptr d_Fcoll_ptr(d_Fcoll); // Reduce final buffer sums to one value *f_coll_grid_mean = thrust::reduce(d_Fcoll_ptr, d_Fcoll_ptr + hii_tot_num_pixels, 0., thrust::plus()); + CALL_CUDA(cudaDeviceSynchronize()); LOG_INFO("Fcoll sum reduced to single value by thrust::reduce operation."); // Copy results from device to host - err = cudaMemcpy(box->Fcoll, d_Fcoll, sizeof(float) * hii_tot_num_pixels, cudaMemcpyDeviceToHost); - if (err != cudaSuccess) { - LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); - Throw(CUDAError); - } - err = cudaMemcpy(h_deltax_filtered, d_deltax_filtered, sizeof(fftwf_complex) * hii_kspace_num_pixels, cudaMemcpyDeviceToHost); - if (err != cudaSuccess) { - LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); - Throw(CUDAError); - } - err = cudaMemcpy(h_N_rec_filtered, d_N_rec_filtered, sizeof(fftwf_complex) * hii_kspace_num_pixels, cudaMemcpyDeviceToHost); - if (err != cudaSuccess) { - LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); - Throw(CUDAError); - } - err = cudaMemcpy(h_xe_filtered, d_xe_filtered, sizeof(fftwf_complex) * hii_kspace_num_pixels, cudaMemcpyDeviceToHost); - if (err != cudaSuccess) { - LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); - Throw(CUDAError); - } - LOG_INFO("Fcoll grid copied to host."); + CALL_CUDA(cudaMemcpy(box->Fcoll, d_Fcoll, sizeof(float) * hii_tot_num_pixels, cudaMemcpyDeviceToHost)); + CALL_CUDA(cudaMemcpy(h_deltax_filtered, d_deltax_filtered, sizeof(fftwf_complex) * hii_kspace_num_pixels, cudaMemcpyDeviceToHost)); + CALL_CUDA(cudaMemcpy(h_N_rec_filtered, d_N_rec_filtered, sizeof(fftwf_complex) * hii_kspace_num_pixels, cudaMemcpyDeviceToHost)); + CALL_CUDA(cudaMemcpy(h_xe_filtered, d_xe_filtered, sizeof(fftwf_complex) * hii_kspace_num_pixels, cudaMemcpyDeviceToHost)); + LOG_INFO("Grids copied to host."); } void free_ionbox_gpu_data( @@ -324,33 +241,10 @@ void free_ionbox_gpu_data( float **d_y_arr, float **d_Fcoll ) { - cudaError_t err = cudaGetLastError(); - - cudaFree(*d_deltax_filtered); // Need to dereference the pointers to pointers (*) - if (err != cudaSuccess) { - LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); - Throw(CUDAError); - } - cudaFree(*d_N_rec_filtered); - if (err != cudaSuccess) { - LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); - Throw(CUDAError); - } - cudaFree(*d_xe_filtered); - if (err != cudaSuccess) { - LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); - Throw(CUDAError); - } - cudaFree(*d_y_arr); - if (err != cudaSuccess) { - LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); - Throw(CUDAError); - } - cudaFree(*d_Fcoll); - if (err != cudaSuccess) { - LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); - Throw(CUDAError); - } - + CALL_CUDA(cudaFree(*d_deltax_filtered)); // Need to dereference the pointers to pointers (*) + CALL_CUDA(cudaFree(*d_N_rec_filtered)); + CALL_CUDA(cudaFree(*d_xe_filtered)); + CALL_CUDA(cudaFree(*d_y_arr)); + CALL_CUDA(cudaFree(*d_Fcoll)); LOG_INFO("Device memory freed."); } From 8c1e8e626b5aed0bc452e0a325a9126c0b9244ff Mon Sep 17 00:00:00 2001 From: alserene Date: Thu, 12 Dec 2024 04:04:21 +1100 Subject: [PATCH 060/145] Add DEBUG messages throuhgout ComputeIonizedBox. --- src/py21cmfast/src/IonisationBox.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/py21cmfast/src/IonisationBox.c b/src/py21cmfast/src/IonisationBox.c index aca758d36..c9ac5603a 100644 --- a/src/py21cmfast/src/IonisationBox.c +++ b/src/py21cmfast/src/IonisationBox.c @@ -1317,11 +1317,13 @@ int ComputeIonizedBox(float redshift, float prev_redshift, UserParams *user_para // set the max radius we will use, making sure we are always sampling the same values of radius // (this avoids aliasing differences w redshift) + LOG_DEBUG("ION device pointers about to be allocated."); fftwf_complex *d_deltax_filtered = NULL; fftwf_complex *d_N_rec_filtered = NULL; fftwf_complex *d_xe_filtered = NULL; float *d_y_arr = NULL; float *d_Fcoll = NULL; //_outputstructs_wrapper.h + LOG_DEBUG("ION device pointers allocated."); unsigned int threadsPerBlock = NULL; unsigned int numBlocks = NULL; @@ -1330,7 +1332,7 @@ int ComputeIonizedBox(float redshift, float prev_redshift, UserParams *user_para if (flag_options_global->USE_MASS_DEPENDENT_ZETA && !flag_options_global->USE_MINI_HALOS && !flag_options_global->USE_HALO_FIELD) { unsigned int Nion_nbins = get_nbins(); - + LOG_DEBUG("ION init_ionbox_gpu_data about to be called."); init_ionbox_gpu_data( &d_deltax_filtered, &d_N_rec_filtered, @@ -1343,12 +1345,14 @@ int ComputeIonizedBox(float redshift, float prev_redshift, UserParams *user_para &threadsPerBlock, &numBlocks ); + LOG_DEBUG("ION init_ionbox_gpu_data called."); } int R_ct; struct RadiusSpec curr_radius; for(R_ct=n_radii;R_ct--;){ curr_radius = radii_spec[R_ct]; + LOG_DEBUG("ION loop: R_ct=%u.",R_ct); //TODO: As far as I can tell, This was the previous behaviour with the while loop // So if the cell size is smaller than the minimum mass (rare) we still filter the last step @@ -1373,15 +1377,13 @@ int ComputeIonizedBox(float redshift, float prev_redshift, UserParams *user_para // If GPU & flags, call gpu version of calculate_fcoll_grid() if (flag_options_global->USE_MASS_DEPENDENT_ZETA && !flag_options_global->USE_MINI_HALOS && !flag_options_global->USE_HALO_FIELD) { + LOG_DEBUG("ION calculate_fcoll_grid_gpu about to be called."); calculate_fcoll_grid_gpu( box, - // grid_struct, grid_struct->deltax_filtered, grid_struct->N_rec_filtered, grid_struct->xe_filtered, - // &ionbox_constants, &ionbox_constants.filter_recombinations, - // &curr_radius, &curr_radius.f_coll_grid_mean, d_deltax_filtered, d_N_rec_filtered, @@ -1393,6 +1395,7 @@ int ComputeIonizedBox(float redshift, float prev_redshift, UserParams *user_para &threadsPerBlock, &numBlocks ); + LOG_DEBUG("ION calculate_fcoll_grid_gpu called."); } else { calculate_fcoll_grid(box, previous_ionize_box, grid_struct, &ionbox_constants, &curr_radius); // <-------- HERE } @@ -1423,6 +1426,7 @@ int ComputeIonizedBox(float redshift, float prev_redshift, UserParams *user_para } // If GPU & flags, call free_ionbox_gpu_data() if (flag_options_global->USE_MASS_DEPENDENT_ZETA && !flag_options_global->USE_MINI_HALOS && !flag_options_global->USE_HALO_FIELD) { + LOG_DEBUG("ION free_ionbox_gpu_data about to be called."); free_ionbox_gpu_data( &d_deltax_filtered, &d_N_rec_filtered, @@ -1430,6 +1434,7 @@ int ComputeIonizedBox(float redshift, float prev_redshift, UserParams *user_para &d_y_arr, &d_Fcoll ); + LOG_DEBUG("ION free_ionbox_gpu_data called."); } set_ionized_temperatures(box,perturbed_field,spin_temp,&ionbox_constants); From d432663b7fed34912bacfdc6c01f2a3ef4758faa Mon Sep 17 00:00:00 2001 From: alserene Date: Thu, 12 Dec 2024 04:05:58 +1100 Subject: [PATCH 061/145] Add CUDA wrapper for errors. --- src/py21cmfast/src/cuda_utils.cuh | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 src/py21cmfast/src/cuda_utils.cuh diff --git a/src/py21cmfast/src/cuda_utils.cuh b/src/py21cmfast/src/cuda_utils.cuh new file mode 100644 index 000000000..f2e992651 --- /dev/null +++ b/src/py21cmfast/src/cuda_utils.cuh @@ -0,0 +1,18 @@ +#ifndef _CUDA_UTILS_CUH +#define _CUDA_UTILS_CUH + +#include +#include + +#define CALL_CUDA(x) \ + do \ + { \ + cudaError_t err = (x); \ + if (err != cudaSuccess) \ + { \ + printf("Error %s at %s:%d\n", cudaGetErrorString(err), __FILE__, __LINE__); \ + exit(EXIT_FAILURE); \ + } \ + } while (0) + +#endif From 3317702852af428aab86ae41139c062df1395e7c Mon Sep 17 00:00:00 2001 From: alserene Date: Thu, 12 Dec 2024 07:57:30 +1100 Subject: [PATCH 062/145] Fix bug with flag passing. --- src/py21cmfast/src/IonisationBox.c | 10 +++--- src/py21cmfast/src/IonisationBox.cu | 52 ++++++++++++++++++++--------- src/py21cmfast/src/IonisationBox.h | 4 ++- 3 files changed, 46 insertions(+), 20 deletions(-) diff --git a/src/py21cmfast/src/IonisationBox.c b/src/py21cmfast/src/IonisationBox.c index c9ac5603a..7d93bae8a 100644 --- a/src/py21cmfast/src/IonisationBox.c +++ b/src/py21cmfast/src/IonisationBox.c @@ -1325,8 +1325,8 @@ int ComputeIonizedBox(float redshift, float prev_redshift, UserParams *user_para float *d_Fcoll = NULL; //_outputstructs_wrapper.h LOG_DEBUG("ION device pointers allocated."); - unsigned int threadsPerBlock = NULL; - unsigned int numBlocks = NULL; + unsigned int threadsPerBlock; + unsigned int numBlocks; // If GPU & flags call init_ionbox_gpu_data() if (flag_options_global->USE_MASS_DEPENDENT_ZETA && !flag_options_global->USE_MINI_HALOS && !flag_options_global->USE_HALO_FIELD) { @@ -1339,6 +1339,7 @@ int ComputeIonizedBox(float redshift, float prev_redshift, UserParams *user_para &d_xe_filtered, &d_y_arr, &d_Fcoll, + ionbox_constants.filter_recombinations, Nion_nbins, HII_TOT_NUM_PIXELS, HII_KSPACE_NUM_PIXELS, @@ -1383,7 +1384,7 @@ int ComputeIonizedBox(float redshift, float prev_redshift, UserParams *user_para grid_struct->deltax_filtered, grid_struct->N_rec_filtered, grid_struct->xe_filtered, - &ionbox_constants.filter_recombinations, + ionbox_constants.filter_recombinations, &curr_radius.f_coll_grid_mean, d_deltax_filtered, d_N_rec_filtered, @@ -1432,7 +1433,8 @@ int ComputeIonizedBox(float redshift, float prev_redshift, UserParams *user_para &d_N_rec_filtered, &d_xe_filtered, &d_y_arr, - &d_Fcoll + &d_Fcoll, + ionbox_constants.filter_recombinations ); LOG_DEBUG("ION free_ionbox_gpu_data called."); } diff --git a/src/py21cmfast/src/IonisationBox.cu b/src/py21cmfast/src/IonisationBox.cu index 91920b4e9..2fb7940a6 100644 --- a/src/py21cmfast/src/IonisationBox.cu +++ b/src/py21cmfast/src/IonisationBox.cu @@ -116,6 +116,7 @@ void init_ionbox_gpu_data( fftwf_complex **d_xe_filtered, float **d_y_arr, float **d_Fcoll, + bool filter_recombinations, // member of consts unsigned int nbins, // nbins for Nion_conditional_table1D->y unsigned long long hii_tot_num_pixels, // HII_TOT_NUM_PIXELS unsigned long long hii_kspace_num_pixels, // HII_KSPACE_NUM_PIXELS @@ -126,17 +127,25 @@ void init_ionbox_gpu_data( // Fcoll is of length HII_TOT_NUM_PIXELS (outputs.py) CALL_CUDA(cudaMalloc((void**)d_deltax_filtered, sizeof(fftwf_complex) * hii_kspace_num_pixels)); // already pointers to pointers (no & needed) - CALL_CUDA(cudaMalloc((void**)d_N_rec_filtered, sizeof(fftwf_complex) * hii_kspace_num_pixels)); - CALL_CUDA(cudaMalloc((void**)d_xe_filtered, sizeof(fftwf_complex) * hii_kspace_num_pixels)); - CALL_CUDA(cudaMalloc((void**)d_y_arr, sizeof(float) * nbins)); - CALL_CUDA(cudaMalloc((void**)d_Fcoll, sizeof(float) * hii_tot_num_pixels)); - LOG_INFO("Ionisation grids allocated on device."); - CALL_CUDA(cudaMemset(*d_deltax_filtered, 0, sizeof(fftwf_complex) * hii_kspace_num_pixels)); // dereference the pointer to a pointer (*) - CALL_CUDA(cudaMemset(*d_N_rec_filtered, 0, sizeof(fftwf_complex) * hii_kspace_num_pixels)); - CALL_CUDA(cudaMemset(*d_xe_filtered, 0, sizeof(fftwf_complex) * hii_kspace_num_pixels)); + + if (filter_recombinations) { + CALL_CUDA(cudaMalloc((void**)d_N_rec_filtered, sizeof(fftwf_complex) * hii_kspace_num_pixels)); + CALL_CUDA(cudaMemset(*d_N_rec_filtered, 0, sizeof(fftwf_complex) * hii_kspace_num_pixels)); + } + + if (flag_options_global->USE_TS_FLUCT) { + CALL_CUDA(cudaMalloc((void**)d_xe_filtered, sizeof(fftwf_complex) * hii_kspace_num_pixels)); + CALL_CUDA(cudaMemset(*d_xe_filtered, 0, sizeof(fftwf_complex) * hii_kspace_num_pixels)); + } + + CALL_CUDA(cudaMalloc((void**)d_y_arr, sizeof(float) * nbins)); CALL_CUDA(cudaMemset(*d_y_arr, 0, sizeof(float) * nbins)); + + CALL_CUDA(cudaMalloc((void**)d_Fcoll, sizeof(float) * hii_tot_num_pixels)); CALL_CUDA(cudaMemset(*d_Fcoll, 0, sizeof(float) * hii_tot_num_pixels)); + + LOG_INFO("Ionisation grids allocated on device."); LOG_INFO("Ionisation grids initialised on device."); // Get max threads/block for device @@ -186,8 +195,12 @@ void calculate_fcoll_grid_gpu( // Copy grids from host to device CALL_CUDA(cudaMemcpy(d_deltax_filtered, h_deltax_filtered, sizeof(fftwf_complex) * hii_kspace_num_pixels, cudaMemcpyHostToDevice)); - CALL_CUDA(cudaMemcpy(d_N_rec_filtered, h_N_rec_filtered, sizeof(fftwf_complex) * hii_kspace_num_pixels, cudaMemcpyHostToDevice)); - CALL_CUDA(cudaMemcpy(d_xe_filtered, h_xe_filtered, sizeof(fftwf_complex) * hii_kspace_num_pixels, cudaMemcpyHostToDevice)); + if (filter_recombinations) { + CALL_CUDA(cudaMemcpy(d_N_rec_filtered, h_N_rec_filtered, sizeof(fftwf_complex) * hii_kspace_num_pixels, cudaMemcpyHostToDevice)); + } + if (flag_options_global->USE_TS_FLUCT) { + CALL_CUDA(cudaMemcpy(d_xe_filtered, h_xe_filtered, sizeof(fftwf_complex) * hii_kspace_num_pixels, cudaMemcpyHostToDevice)); + } CALL_CUDA(cudaMemcpy(d_y_arr, Nion_conditional_table1D->y_arr, sizeof(float) * Nion_conditional_table1D->n_bin, cudaMemcpyHostToDevice)); LOG_INFO("Ionisation grids copied to device."); @@ -229,8 +242,12 @@ void calculate_fcoll_grid_gpu( // Copy results from device to host CALL_CUDA(cudaMemcpy(box->Fcoll, d_Fcoll, sizeof(float) * hii_tot_num_pixels, cudaMemcpyDeviceToHost)); CALL_CUDA(cudaMemcpy(h_deltax_filtered, d_deltax_filtered, sizeof(fftwf_complex) * hii_kspace_num_pixels, cudaMemcpyDeviceToHost)); - CALL_CUDA(cudaMemcpy(h_N_rec_filtered, d_N_rec_filtered, sizeof(fftwf_complex) * hii_kspace_num_pixels, cudaMemcpyDeviceToHost)); - CALL_CUDA(cudaMemcpy(h_xe_filtered, d_xe_filtered, sizeof(fftwf_complex) * hii_kspace_num_pixels, cudaMemcpyDeviceToHost)); + if (filter_recombinations) { + CALL_CUDA(cudaMemcpy(h_N_rec_filtered, d_N_rec_filtered, sizeof(fftwf_complex) * hii_kspace_num_pixels, cudaMemcpyDeviceToHost)); + } + if (flag_options_global->USE_TS_FLUCT) { + CALL_CUDA(cudaMemcpy(h_xe_filtered, d_xe_filtered, sizeof(fftwf_complex) * hii_kspace_num_pixels, cudaMemcpyDeviceToHost)); + } LOG_INFO("Grids copied to host."); } @@ -239,11 +256,16 @@ void free_ionbox_gpu_data( fftwf_complex **d_N_rec_filtered, fftwf_complex **d_xe_filtered, float **d_y_arr, - float **d_Fcoll + float **d_Fcoll, + bool filter_recombinations // member of consts ) { CALL_CUDA(cudaFree(*d_deltax_filtered)); // Need to dereference the pointers to pointers (*) - CALL_CUDA(cudaFree(*d_N_rec_filtered)); - CALL_CUDA(cudaFree(*d_xe_filtered)); + if (filter_recombinations) { + CALL_CUDA(cudaFree(*d_N_rec_filtered)); + } + if (flag_options_global->USE_TS_FLUCT) { + CALL_CUDA(cudaFree(*d_xe_filtered)); + } CALL_CUDA(cudaFree(*d_y_arr)); CALL_CUDA(cudaFree(*d_Fcoll)); LOG_INFO("Device memory freed."); diff --git a/src/py21cmfast/src/IonisationBox.h b/src/py21cmfast/src/IonisationBox.h index f4507debb..cd95b0d78 100644 --- a/src/py21cmfast/src/IonisationBox.h +++ b/src/py21cmfast/src/IonisationBox.h @@ -22,6 +22,7 @@ void init_ionbox_gpu_data( fftwf_complex **d_xe_filtered, float **d_y_arr, float **d_Fcoll, + bool filter_recombinations, // member of consts unsigned int nbins, // nbins for Nion_conditional_table1D->y unsigned long long hii_tot_num_pixels, // HII_TOT_NUM_PIXELS unsigned long long hii_kspace_num_pixels, // HII_KSPACE_NUM_PIXELS @@ -50,7 +51,8 @@ void free_ionbox_gpu_data( fftwf_complex **d_N_rec_filtered, fftwf_complex **d_xe_filtered, float **d_y_arr, - float **d_Fcoll + float **d_Fcoll, + bool filter_recombinations // member of consts ); #ifdef __cplusplus From 9e5c5e5c05168552af4fc0fc8f297ba7ee26f8f3 Mon Sep 17 00:00:00 2001 From: JHu Date: Sun, 15 Dec 2024 10:22:18 +1100 Subject: [PATCH 063/145] add sample correction algorithm --- src/py21cmfast/src/Stochasticity.cu | 135 +++++++++++++++++++++------- 1 file changed, 104 insertions(+), 31 deletions(-) diff --git a/src/py21cmfast/src/Stochasticity.cu b/src/py21cmfast/src/Stochasticity.cu index a87118230..929f02cca 100644 --- a/src/py21cmfast/src/Stochasticity.cu +++ b/src/py21cmfast/src/Stochasticity.cu @@ -12,6 +12,7 @@ #include #include #include +#include #include "Constants.h" #include "interpolation_types.h" @@ -157,6 +158,35 @@ void getDeviceProperties(){ printf("max threads per block: %d \n", deviceProp.maxThreadsPerBlock); printf("total constant memory: %zu bytes \n", deviceProp.totalConstMem); } + +// void getKernelAttr(){ +// cudaFuncAttributes attr; +// cudaFuncGetAttributes(&attr, myKernel); +// printf("Kernel Shared Memory per Block: %zu bytes\n", attr.sharedSizeBytes); +// printf("Kernel Registers per Thread: %d\n", attr.numRegs); +// printf("Kernel Max Threads per Block: %d\n", attr.maxThreadsPerBlock); +// } + +struct GridLayout{ + int n_threads; + int n_blocks; +}; +// calculate workload for the second iteration +GridLayout getWorkload(int sparsity, unsigned long long int n_halos){ + GridLayout res; + int n_threads, n_blocks; + if (sparsity == 4){ + n_threads = 256; + } + else { + n_threads = std::min(sparsity,512); + } + res.n_threads = n_threads; + n_blocks = (n_halos * sparsity + n_threads -1)/n_threads; + res.n_blocks = n_blocks; + return res; +} + // 11-30: the following implementation works (before using any global params on gpu) __device__ void stoc_set_consts_cond(struct HaloSamplingConstants *const_struct, float cond_val, int HMF, double x_min, double x_width, float *d_y_arr, int n_bin, double *expected_mass) { @@ -214,52 +244,52 @@ __device__ double sample_dndM_inverse(double condition, struct HaloSamplingConst return result; } -__device__ double remove_random_halo(curandState *state, int n_halo, int *idx, double *M_prog, float *M_out){ +__device__ double remove_random_halo(curandState *state, int n_halo, int *idx, float *M_prog, float *M_out){ double last_M_del; int random_idx; do { random_idx = (int)(curand_uniform(state) * n_halo); - } while (M_out[random_idx] == 0); + } while (M_out[random_idx] == -1.f); last_M_del = M_out[random_idx]; *M_prog -= last_M_del; - M_out[random_idx] = 0; // zero mass halos are skipped and not counted + M_out[random_idx] = -1.f; // -1 mass halos are skipped and not counted *idx = random_idx; return last_M_del; } -__device__ void fix_mass_sample(curandState *state, double exp_M, int *n_halo_pt, double *M_tot_pt, float *M_out){ +__device__ void fix_mass_sample(curandState *state, double exp_M, float *M_prog, float *M_out, int write_limit){ // Keep the last halo if it brings us closer to the expected mass // This is done by addition or subtraction over the limit to balance // the bias of the last halo being larger int random_idx; double last_M_del; int sel = curand(state) % 2; - // bool sel = gsl_rng_uniform_int(rng, 2); - // int sel = 1; + // int sel = 1; //tmp: implement the first case if (sel) { - if (fabs(*M_tot_pt - M_out[*n_halo_pt - 1] - exp_M) < fabs(*M_tot_pt - exp_M)) + if (fabs(*M_prog - M_out[write_limit] - exp_M) < fabs(*M_prog - exp_M)) { - *M_tot_pt -= M_out[*n_halo_pt - 1]; + // *M_tot_pt -= M_out[*n_halo_pt - 1]; // here we remove by setting the counter one lower so it isn't read - (*n_halo_pt)--; // increment has preference over dereference + M_out[write_limit] = -1.f; } } else { do { // here we remove by setting halo mass to zero, skipping it during the consolidation - last_M_del = remove_random_halo(state, *n_halo_pt, &random_idx, M_tot_pt, M_out); - } while (*M_tot_pt > exp_M); + last_M_del = remove_random_halo(state, write_limit+1, &random_idx, M_prog, M_out); + } while (*M_prog > exp_M); // if the sample with the last subtracted halo is closer to the expected mass, keep it // LOG_ULTRA_DEBUG("Deciding to keep last halo M %.3e tot %.3e exp %.3e",last_M_del,*M_tot_pt,exp_M); - if (fabs(*M_tot_pt + last_M_del - exp_M) < fabs(*M_tot_pt - exp_M)) + if (fabs(*M_prog + last_M_del - exp_M) < fabs(*M_prog - exp_M)) { M_out[random_idx] = last_M_del; - *M_tot_pt += last_M_del; + *M_prog += last_M_del; } + } } @@ -434,6 +464,7 @@ __global__ void update_halo_constants(float *d_halo_masses, float *d_y_arr, doub int *d_further_process, int *d_nprog_predict, int sparsity, unsigned long long int write_offset, double *expected_mass) { // Define shared memory for block-level reduction + // extern __shared__ float shared_mass[]; __shared__ float shared_mass[256]; // get thread idx @@ -483,18 +514,36 @@ __global__ void update_halo_constants(float *d_halo_masses, float *d_y_arr, doub __syncthreads(); + // printf("the first element of shared mass: %f \n", shared_mass[0]); // passing value to arrays in global memory is done by one thread per group if (tid % sparsity == 0){ float Mprog = 0.0; + int write_limit = 0; + int meetCondition = 0; + for (int i = 0; i < sparsity; ++i){ + Mprog += shared_mass[tid + i]; if (Mprog >= d_hs_constants.expected_M) { + write_limit = i; + meetCondition = 1; break; } - Mprog += shared_mass[tid+i]; - d_halo_masses_out[out_id+i] = shared_mass[tid+i]; - } - if (Mprog < d_hs_constants.expected_M){ + + // d_halo_masses_out[out_id+i] = shared_mass[tid+i]; + } + if (meetCondition){ + // correct the mass samples + fix_mass_sample(&local_state, d_hs_constants.expected_M, &Mprog, &shared_mass[tid], write_limit); + + for (int i = 0; i < write_limit; ++i) + { + + // write the final mass sample to array in global memory + d_halo_masses_out[out_id + i] = shared_mass[tid + i]; + } + } + else{ d_further_process[hid] = 1; d_nprog_predict[hid] = ceil(d_hs_constants.expected_M * sparsity / Mprog); @@ -567,12 +616,18 @@ int updateHaloOut(float *halo_masses, unsigned long long int n_halos, float *y_a // get parameters needed by the kernel int HMF = user_params_global->HMF; - // define threads layout - int n_threads = 256; - int n_blocks = (int)((n_halos + 255) / 256); - int total_threads = n_threads * n_blocks; + // start with 4 threads work with one halo + int sparsity = 4; + + // define threads layout for starting + // int n_threads = 256; + // int n_blocks = (int)((n_halos*sparsity + 255) / 256); + // int total_threads = n_threads * n_blocks; - // allocate memory for out halos + GridLayout grids = getWorkload(sparsity, n_halos); + int total_threads = grids.n_threads * grids.n_blocks; + + // allocate memory for out halos (just allocate once at each call of this grid launch function) // size_t buffer_size = sizeof(float) * max(total_threads * 2, n_buffer) * 2; size_t d_n_buffer = total_threads * 4 + 10; @@ -599,13 +654,10 @@ int updateHaloOut(float *halo_masses, unsigned long long int n_halos, float *y_a CALL_CUDA(cudaMalloc((void **)&d_states, total_threads * sizeof(curandState))); // setup random states - setup_random_states<<>>(d_states, 1234ULL); + setup_random_states<<>>(d_states, 1234ULL); // Check kernel launch errors CALL_CUDA(cudaGetLastError()); - - // start with one thread work with one halo - int sparsity = 4; // initiate n_halo check unsigned long long int n_halo_check = n_halos; @@ -618,9 +670,16 @@ int updateHaloOut(float *halo_masses, unsigned long long int n_halos, float *y_a getDeviceProperties(); + cudaFuncAttributes attr; + cudaFuncGetAttributes(&attr, update_halo_constants); + printf("Kernel Shared Memory per Block: %zu bytes\n", attr.sharedSizeBytes); + printf("Kernel Registers per Thread: %d\n", attr.numRegs); + printf("Kernel Max Threads per Block: %d\n", attr.maxThreadsPerBlock); + // launch kernel grid - // while (n_filter_halo > 0){ - update_halo_constants<<>>(d_halo_masses, d_y_arr, x_min, x_width, n_halos, n_bin_y, hs_constants, HMF, d_states, d_halo_masses_out, star_rng_out, + while (n_filter_halo > 0){ + size_t shared_size = grids.n_threads*sizeof(float); + update_halo_constants<<>>(d_halo_masses, d_y_arr, x_min, x_width, n_halos, n_bin_y, hs_constants, HMF, d_states, d_halo_masses_out, star_rng_out, sfr_rng_out, xray_rng_out, halo_coords_out, d_sum_check, d_further_process, d_nprog_predict, sparsity, write_offset, d_expected_mass); // Check kernel launch errors @@ -649,6 +708,19 @@ int updateHaloOut(float *halo_masses, unsigned long long int n_halos, float *y_a unsigned long long int available_n_buffer = d_n_buffer - n_processed_prog; sparsity = getSparsity(available_n_buffer, n_filter_halo); + // check max threadblock size + int device; + CALL_CUDA(cudaGetDevice(&device)); + cudaDeviceProp deviceProp; + CALL_CUDA(cudaGetDeviceProperties(&deviceProp, device)); + int max_threads_pb = deviceProp.maxThreadsPerBlock; + + // sparsity should not exceed the max threads per block + sparsity = std::min(sparsity, 512); + + // reset grids layout + grids = getWorkload(sparsity, n_filter_halo); + // update write offset write_offset = n_processed_prog; @@ -663,14 +735,15 @@ int updateHaloOut(float *halo_masses, unsigned long long int n_halos, float *y_a CALL_CUDA(cudaFreeHost(h_filter_halos)); // CALL_CUDA(cudaFreeHost(h_sum_check)); - // } + } + // tmp: for debugging purpose; out halo need to copy back to host after all halos being processed float *h_halo_masses_out; CALL_CUDA(cudaHostAlloc((void **)&h_halo_masses_out, buffer_size, cudaHostAllocDefault)); CALL_CUDA(cudaMemcpy(h_halo_masses_out, d_halo_masses_out, buffer_size, cudaMemcpyDeviceToHost)); - - + CALL_CUDA(cudaFreeHost(h_halo_masses_out)); + // } // Free device memory CALL_CUDA(cudaFree(d_halo_masses)); From f727a81e1f78fbf0c4146e8855fd9a459f26b6b8 Mon Sep 17 00:00:00 2001 From: alserene Date: Tue, 17 Dec 2024 08:07:01 +1100 Subject: [PATCH 064/145] Correct average calculation. --- src/py21cmfast/src/IonisationBox.cu | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/py21cmfast/src/IonisationBox.cu b/src/py21cmfast/src/IonisationBox.cu index 2fb7940a6..8d904268f 100644 --- a/src/py21cmfast/src/IonisationBox.cu +++ b/src/py21cmfast/src/IonisationBox.cu @@ -235,7 +235,8 @@ void calculate_fcoll_grid_gpu( // Wrap device pointer in a thrust::device_ptr thrust::device_ptr d_Fcoll_ptr(d_Fcoll); // Reduce final buffer sums to one value - *f_coll_grid_mean = thrust::reduce(d_Fcoll_ptr, d_Fcoll_ptr + hii_tot_num_pixels, 0., thrust::plus()); + double f_coll_grid_total = thrust::reduce(d_Fcoll_ptr, d_Fcoll_ptr + hii_tot_num_pixels, 0., thrust::plus()); + *f_coll_grid_mean = f_coll_grid_total / (double) hii_tot_num_pixels; CALL_CUDA(cudaDeviceSynchronize()); LOG_INFO("Fcoll sum reduced to single value by thrust::reduce operation."); From c4b984f90fc3386fe0bf5c3923d88a8e74db6392 Mon Sep 17 00:00:00 2001 From: alserene Date: Fri, 20 Dec 2024 07:14:13 +1100 Subject: [PATCH 065/145] Correct NumBlocks bug. --- src/py21cmfast/src/IonisationBox.cu | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/py21cmfast/src/IonisationBox.cu b/src/py21cmfast/src/IonisationBox.cu index 8d904268f..e9b95a29d 100644 --- a/src/py21cmfast/src/IonisationBox.cu +++ b/src/py21cmfast/src/IonisationBox.cu @@ -167,7 +167,7 @@ void init_ionbox_gpu_data( *threadsPerBlock = 16; } - *numBlocks = ceil(hii_tot_num_pixels / (*threadsPerBlock * 2)); + *numBlocks = ceil(hii_tot_num_pixels / *threadsPerBlock) + 1; } void calculate_fcoll_grid_gpu( @@ -235,7 +235,7 @@ void calculate_fcoll_grid_gpu( // Wrap device pointer in a thrust::device_ptr thrust::device_ptr d_Fcoll_ptr(d_Fcoll); // Reduce final buffer sums to one value - double f_coll_grid_total = thrust::reduce(d_Fcoll_ptr, d_Fcoll_ptr + hii_tot_num_pixels, 0., thrust::plus()); + double f_coll_grid_total = thrust::reduce(d_Fcoll_ptr, d_Fcoll_ptr + hii_tot_num_pixels, 0., thrust::plus()); *f_coll_grid_mean = f_coll_grid_total / (double) hii_tot_num_pixels; CALL_CUDA(cudaDeviceSynchronize()); LOG_INFO("Fcoll sum reduced to single value by thrust::reduce operation."); From 679c897468261c65f90f435a1360746047b2085b Mon Sep 17 00:00:00 2001 From: alserene Date: Fri, 20 Dec 2024 08:01:17 +1100 Subject: [PATCH 066/145] Remove unneeded lines. --- src/py21cmfast/src/IonisationBox.c | 18 ++----------- src/py21cmfast/src/IonisationBox.cu | 39 +---------------------------- src/py21cmfast/src/IonisationBox.h | 9 +------ 3 files changed, 4 insertions(+), 62 deletions(-) diff --git a/src/py21cmfast/src/IonisationBox.c b/src/py21cmfast/src/IonisationBox.c index 7d93bae8a..3d9d3f3d5 100644 --- a/src/py21cmfast/src/IonisationBox.c +++ b/src/py21cmfast/src/IonisationBox.c @@ -1317,13 +1317,10 @@ int ComputeIonizedBox(float redshift, float prev_redshift, UserParams *user_para // set the max radius we will use, making sure we are always sampling the same values of radius // (this avoids aliasing differences w redshift) - LOG_DEBUG("ION device pointers about to be allocated."); fftwf_complex *d_deltax_filtered = NULL; - fftwf_complex *d_N_rec_filtered = NULL; fftwf_complex *d_xe_filtered = NULL; float *d_y_arr = NULL; float *d_Fcoll = NULL; //_outputstructs_wrapper.h - LOG_DEBUG("ION device pointers allocated."); unsigned int threadsPerBlock; unsigned int numBlocks; @@ -1332,28 +1329,24 @@ int ComputeIonizedBox(float redshift, float prev_redshift, UserParams *user_para if (flag_options_global->USE_MASS_DEPENDENT_ZETA && !flag_options_global->USE_MINI_HALOS && !flag_options_global->USE_HALO_FIELD) { unsigned int Nion_nbins = get_nbins(); - LOG_DEBUG("ION init_ionbox_gpu_data about to be called."); init_ionbox_gpu_data( &d_deltax_filtered, - &d_N_rec_filtered, &d_xe_filtered, &d_y_arr, &d_Fcoll, - ionbox_constants.filter_recombinations, Nion_nbins, HII_TOT_NUM_PIXELS, HII_KSPACE_NUM_PIXELS, &threadsPerBlock, &numBlocks ); - LOG_DEBUG("ION init_ionbox_gpu_data called."); } int R_ct; struct RadiusSpec curr_radius; for(R_ct=n_radii;R_ct--;){ curr_radius = radii_spec[R_ct]; - LOG_DEBUG("ION loop: R_ct=%u.",R_ct); + LOG_DEBUG("ION loop: R_ct=%u.",R_ct); // TODO: Remove this //TODO: As far as I can tell, This was the previous behaviour with the while loop // So if the cell size is smaller than the minimum mass (rare) we still filter the last step @@ -1378,16 +1371,12 @@ int ComputeIonizedBox(float redshift, float prev_redshift, UserParams *user_para // If GPU & flags, call gpu version of calculate_fcoll_grid() if (flag_options_global->USE_MASS_DEPENDENT_ZETA && !flag_options_global->USE_MINI_HALOS && !flag_options_global->USE_HALO_FIELD) { - LOG_DEBUG("ION calculate_fcoll_grid_gpu about to be called."); calculate_fcoll_grid_gpu( box, grid_struct->deltax_filtered, - grid_struct->N_rec_filtered, grid_struct->xe_filtered, - ionbox_constants.filter_recombinations, &curr_radius.f_coll_grid_mean, d_deltax_filtered, - d_N_rec_filtered, d_xe_filtered, d_Fcoll, d_y_arr, @@ -1396,7 +1385,6 @@ int ComputeIonizedBox(float redshift, float prev_redshift, UserParams *user_para &threadsPerBlock, &numBlocks ); - LOG_DEBUG("ION calculate_fcoll_grid_gpu called."); } else { calculate_fcoll_grid(box, previous_ionize_box, grid_struct, &ionbox_constants, &curr_radius); // <-------- HERE } @@ -1430,11 +1418,9 @@ int ComputeIonizedBox(float redshift, float prev_redshift, UserParams *user_para LOG_DEBUG("ION free_ionbox_gpu_data about to be called."); free_ionbox_gpu_data( &d_deltax_filtered, - &d_N_rec_filtered, &d_xe_filtered, &d_y_arr, - &d_Fcoll, - ionbox_constants.filter_recombinations + &d_Fcoll ); LOG_DEBUG("ION free_ionbox_gpu_data called."); } diff --git a/src/py21cmfast/src/IonisationBox.cu b/src/py21cmfast/src/IonisationBox.cu index e9b95a29d..bbc8269a7 100644 --- a/src/py21cmfast/src/IonisationBox.cu +++ b/src/py21cmfast/src/IonisationBox.cu @@ -54,13 +54,11 @@ __device__ inline double EvaluateRGTable1D_f_gpu(double x, double x_min, double // template __global__ void compute_Fcoll( cuFloatComplex *deltax_filtered, // fg_struct - cuFloatComplex *N_rec_filtered, // fg_struct cuFloatComplex *xe_filtered, // fg_struct float *y_arr, // Nion_conditional_table1D double x_min, // Nion_conditional_table1D double x_width, // Nion_conditional_table1D double fract_float_err, // FRACT_FLOAT_ERR - bool filter_recomb, // consts->filter_recombinations bool use_ts_fluct, // flag_options_global->USE_TS_FLUCT unsigned long long hii_tot_num_pixels, // HII_TOT_NUM_PIXELS long long hii_d, // HII_D @@ -68,7 +66,6 @@ __global__ void compute_Fcoll( long long hii_mid_para, // HII_MID_PARA float *Fcoll // box ) { - // Get index of grids unsigned long long idx = blockIdx.x * blockDim.x + threadIdx.x; @@ -90,50 +87,32 @@ __global__ void compute_Fcoll( // Clip the filtered grids to physical values // delta cannot be less than -1 - // deltax_filtered[fft_idx] = fmaxf(deltax_filtered[fft_idx], -1. + fract_float_err); *((float *) deltax_filtered + fft_idx) = fmaxf(*((float *) deltax_filtered + fft_idx), -1. + fract_float_err); // cannot be less than zero - if (filter_recomb) { - // N_rec_filtered[fft_idx] = fmaxf(N_rec_filtered[fft_idx], 0.0); - *((float *) N_rec_filtered + fft_idx) = fmaxf(*((float *) N_rec_filtered + fft_idx), 0.0); - } // x_e has to be between zero and unity if (use_ts_fluct) { - // xe_filtered[fft_idx] = fmaxf(xe_filtered[fft_idx], 0.0); - // xe_filtered[fft_idx] = fminf(xe_filtered[fft_idx], 0.999); *((float *) xe_filtered + fft_idx) = fmaxf(*((float *) xe_filtered + fft_idx), 0.0); *((float *) xe_filtered + fft_idx) = fminf(*((float *) xe_filtered + fft_idx), 0.999); } // Compute collapse fraction - // Fcoll[idx] = exp(EvaluateRGTable1D_f_gpu(deltax_filtered[fft_idx], x_min, x_width, y_arr)); Fcoll[idx] = exp(EvaluateRGTable1D_f_gpu(*((float *) deltax_filtered + fft_idx), x_min, x_width, y_arr)); } void init_ionbox_gpu_data( fftwf_complex **d_deltax_filtered, // copies of pointers to pointers - fftwf_complex **d_N_rec_filtered, fftwf_complex **d_xe_filtered, float **d_y_arr, float **d_Fcoll, - bool filter_recombinations, // member of consts unsigned int nbins, // nbins for Nion_conditional_table1D->y unsigned long long hii_tot_num_pixels, // HII_TOT_NUM_PIXELS unsigned long long hii_kspace_num_pixels, // HII_KSPACE_NUM_PIXELS unsigned int *threadsPerBlock, unsigned int *numBlocks ) { - // deltax_filtered, N_rec_filtered & xe_filtered are of length HII_KSPACE_NUM_PIXELS - // Fcoll is of length HII_TOT_NUM_PIXELS (outputs.py) - CALL_CUDA(cudaMalloc((void**)d_deltax_filtered, sizeof(fftwf_complex) * hii_kspace_num_pixels)); // already pointers to pointers (no & needed) CALL_CUDA(cudaMemset(*d_deltax_filtered, 0, sizeof(fftwf_complex) * hii_kspace_num_pixels)); // dereference the pointer to a pointer (*) - if (filter_recombinations) { - CALL_CUDA(cudaMalloc((void**)d_N_rec_filtered, sizeof(fftwf_complex) * hii_kspace_num_pixels)); - CALL_CUDA(cudaMemset(*d_N_rec_filtered, 0, sizeof(fftwf_complex) * hii_kspace_num_pixels)); - } - if (flag_options_global->USE_TS_FLUCT) { CALL_CUDA(cudaMalloc((void**)d_xe_filtered, sizeof(fftwf_complex) * hii_kspace_num_pixels)); CALL_CUDA(cudaMemset(*d_xe_filtered, 0, sizeof(fftwf_complex) * hii_kspace_num_pixels)); @@ -173,12 +152,9 @@ void init_ionbox_gpu_data( void calculate_fcoll_grid_gpu( IonizedBox *box, // for box->Fcoll fftwf_complex *h_deltax_filtered, // members of fg_struct - fftwf_complex *h_N_rec_filtered, fftwf_complex *h_xe_filtered, - bool filter_recombinations, // member of consts double *f_coll_grid_mean, // member of rspec fftwf_complex *d_deltax_filtered, // device pointers - fftwf_complex *d_N_rec_filtered, fftwf_complex *d_xe_filtered, float *d_Fcoll, float *d_y_arr, @@ -195,9 +171,6 @@ void calculate_fcoll_grid_gpu( // Copy grids from host to device CALL_CUDA(cudaMemcpy(d_deltax_filtered, h_deltax_filtered, sizeof(fftwf_complex) * hii_kspace_num_pixels, cudaMemcpyHostToDevice)); - if (filter_recombinations) { - CALL_CUDA(cudaMemcpy(d_N_rec_filtered, h_N_rec_filtered, sizeof(fftwf_complex) * hii_kspace_num_pixels, cudaMemcpyHostToDevice)); - } if (flag_options_global->USE_TS_FLUCT) { CALL_CUDA(cudaMemcpy(d_xe_filtered, h_xe_filtered, sizeof(fftwf_complex) * hii_kspace_num_pixels, cudaMemcpyHostToDevice)); } @@ -214,13 +187,11 @@ void calculate_fcoll_grid_gpu( // Invoke kernel compute_Fcoll<<< *numBlocks, *threadsPerBlock >>>( reinterpret_cast(d_deltax_filtered), - reinterpret_cast(d_N_rec_filtered), reinterpret_cast(d_xe_filtered), d_y_arr, Nion_conditional_table1D->x_min, Nion_conditional_table1D->x_width, fract_float_err, - filter_recombinations, use_ts_fluct, hii_tot_num_pixels, hii_d, @@ -243,9 +214,6 @@ void calculate_fcoll_grid_gpu( // Copy results from device to host CALL_CUDA(cudaMemcpy(box->Fcoll, d_Fcoll, sizeof(float) * hii_tot_num_pixels, cudaMemcpyDeviceToHost)); CALL_CUDA(cudaMemcpy(h_deltax_filtered, d_deltax_filtered, sizeof(fftwf_complex) * hii_kspace_num_pixels, cudaMemcpyDeviceToHost)); - if (filter_recombinations) { - CALL_CUDA(cudaMemcpy(h_N_rec_filtered, d_N_rec_filtered, sizeof(fftwf_complex) * hii_kspace_num_pixels, cudaMemcpyDeviceToHost)); - } if (flag_options_global->USE_TS_FLUCT) { CALL_CUDA(cudaMemcpy(h_xe_filtered, d_xe_filtered, sizeof(fftwf_complex) * hii_kspace_num_pixels, cudaMemcpyDeviceToHost)); } @@ -254,16 +222,11 @@ void calculate_fcoll_grid_gpu( void free_ionbox_gpu_data( fftwf_complex **d_deltax_filtered, // copies of pointers to pointers - fftwf_complex **d_N_rec_filtered, fftwf_complex **d_xe_filtered, float **d_y_arr, - float **d_Fcoll, - bool filter_recombinations // member of consts + float **d_Fcoll ) { CALL_CUDA(cudaFree(*d_deltax_filtered)); // Need to dereference the pointers to pointers (*) - if (filter_recombinations) { - CALL_CUDA(cudaFree(*d_N_rec_filtered)); - } if (flag_options_global->USE_TS_FLUCT) { CALL_CUDA(cudaFree(*d_xe_filtered)); } diff --git a/src/py21cmfast/src/IonisationBox.h b/src/py21cmfast/src/IonisationBox.h index cd95b0d78..287928408 100644 --- a/src/py21cmfast/src/IonisationBox.h +++ b/src/py21cmfast/src/IonisationBox.h @@ -18,11 +18,9 @@ int ComputeIonizedBox(float redshift, float prev_redshift, UserParams *user_para IonizedBox *box); void init_ionbox_gpu_data( fftwf_complex **d_deltax_filtered, // copies of pointers to pointers - fftwf_complex **d_N_rec_filtered, fftwf_complex **d_xe_filtered, float **d_y_arr, float **d_Fcoll, - bool filter_recombinations, // member of consts unsigned int nbins, // nbins for Nion_conditional_table1D->y unsigned long long hii_tot_num_pixels, // HII_TOT_NUM_PIXELS unsigned long long hii_kspace_num_pixels, // HII_KSPACE_NUM_PIXELS @@ -32,12 +30,9 @@ void init_ionbox_gpu_data( void calculate_fcoll_grid_gpu( IonizedBox *box, // for box->Fcoll fftwf_complex *h_deltax_filtered, // members of fg_struct - fftwf_complex *h_N_rec_filtered, fftwf_complex *h_xe_filtered, - bool filter_recombinations, // member of consts double *f_coll_grid_mean, // member of rspec fftwf_complex *d_deltax_filtered, // device pointers - fftwf_complex *d_N_rec_filtered, fftwf_complex *d_xe_filtered, float *d_Fcoll, float *d_y_arr, @@ -48,11 +43,9 @@ void calculate_fcoll_grid_gpu( ); void free_ionbox_gpu_data( fftwf_complex **d_deltax_filtered, // copies of pointers to pointers - fftwf_complex **d_N_rec_filtered, fftwf_complex **d_xe_filtered, float **d_y_arr, - float **d_Fcoll, - bool filter_recombinations // member of consts + float **d_Fcoll ); #ifdef __cplusplus From 2eb2bf35be8d179fb47c15f16e7657b304ad3ec0 Mon Sep 17 00:00:00 2001 From: alserene Date: Thu, 26 Dec 2024 05:25:00 +1100 Subject: [PATCH 067/145] Remove synchronisation after thrust call. --- src/py21cmfast/src/IonisationBox.cu | 1 - 1 file changed, 1 deletion(-) diff --git a/src/py21cmfast/src/IonisationBox.cu b/src/py21cmfast/src/IonisationBox.cu index bbc8269a7..8bb4a4ec8 100644 --- a/src/py21cmfast/src/IonisationBox.cu +++ b/src/py21cmfast/src/IonisationBox.cu @@ -208,7 +208,6 @@ void calculate_fcoll_grid_gpu( // Reduce final buffer sums to one value double f_coll_grid_total = thrust::reduce(d_Fcoll_ptr, d_Fcoll_ptr + hii_tot_num_pixels, 0., thrust::plus()); *f_coll_grid_mean = f_coll_grid_total / (double) hii_tot_num_pixels; - CALL_CUDA(cudaDeviceSynchronize()); LOG_INFO("Fcoll sum reduced to single value by thrust::reduce operation."); // Copy results from device to host From 9111e2c3132e745c86e89101f156230a2c0d0c20 Mon Sep 17 00:00:00 2001 From: alserene Date: Fri, 27 Dec 2024 00:00:13 +1100 Subject: [PATCH 068/145] Slight change to calculation of numBlocks. --- src/py21cmfast/src/IonisationBox.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/py21cmfast/src/IonisationBox.cu b/src/py21cmfast/src/IonisationBox.cu index 8bb4a4ec8..2fe37953a 100644 --- a/src/py21cmfast/src/IonisationBox.cu +++ b/src/py21cmfast/src/IonisationBox.cu @@ -146,7 +146,7 @@ void init_ionbox_gpu_data( *threadsPerBlock = 16; } - *numBlocks = ceil(hii_tot_num_pixels / *threadsPerBlock) + 1; + *numBlocks = (hii_tot_num_pixels + *threadsPerBlock - 1) / *threadsPerBlock; } void calculate_fcoll_grid_gpu( From 2527ad114c485c208208cdf2e5c8214056482f84 Mon Sep 17 00:00:00 2001 From: alserene Date: Fri, 27 Dec 2024 00:01:11 +1100 Subject: [PATCH 069/145] Remove comments. --- src/py21cmfast/src/IonisationBox.cu | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/py21cmfast/src/IonisationBox.cu b/src/py21cmfast/src/IonisationBox.cu index 2fe37953a..948ee0efa 100644 --- a/src/py21cmfast/src/IonisationBox.cu +++ b/src/py21cmfast/src/IonisationBox.cu @@ -163,11 +163,7 @@ void calculate_fcoll_grid_gpu( unsigned int *threadsPerBlock, unsigned int *numBlocks ) { - // TODO: Potentially use thrust to clip grids here instead of in kernel... - RGTable1D_f* Nion_conditional_table1D = get_Nion_conditional_table1D(); - // unsigned long long hii_tot_num_pixels = HII_TOT_NUM_PIXELS; - // unsigned long long hii_tot_fft_num_pixels = HII_TOT_FFT_NUM_PIXELS; // Copy grids from host to device CALL_CUDA(cudaMemcpy(d_deltax_filtered, h_deltax_filtered, sizeof(fftwf_complex) * hii_kspace_num_pixels, cudaMemcpyHostToDevice)); From d3941569a85c7ed4bdea843861f174b466a8a033 Mon Sep 17 00:00:00 2001 From: alserene Date: Fri, 27 Dec 2024 00:04:21 +1100 Subject: [PATCH 070/145] Remove comments and debug statements. --- src/py21cmfast/src/IonisationBox.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/src/py21cmfast/src/IonisationBox.c b/src/py21cmfast/src/IonisationBox.c index 3d9d3f3d5..9e15bbe39 100644 --- a/src/py21cmfast/src/IonisationBox.c +++ b/src/py21cmfast/src/IonisationBox.c @@ -693,7 +693,7 @@ void setup_integration_tables(struct FilteredGrids *fg_struct, struct IonBoxCons //TODO: We should speed test different configurations, separating grids, parallel sections etc. // See the note above copy_filter_transform() for the general idea // If we separate by grid we can reuse the clipping function above -void calculate_fcoll_grid(IonizedBox *box, IonizedBox *previous_ionize_box, struct FilteredGrids *fg_struct, struct IonBoxConstants *consts, // <-------- HERE +void calculate_fcoll_grid(IonizedBox *box, IonizedBox *previous_ionize_box, struct FilteredGrids *fg_struct, struct IonBoxConstants *consts, struct RadiusSpec *rspec){ double f_coll_total,f_coll_MINI_total; //TODO: make proper error tracking through the parallel region @@ -763,7 +763,7 @@ void calculate_fcoll_grid(IonizedBox *box, IonizedBox *previous_ionize_box, stru if (previous_ionize_box->mean_f_coll_MINI * consts->ion_eff_factor_mini_gl + previous_ionize_box->mean_f_coll * consts->ion_eff_factor_gl > 1e-4){ prev_dens = *((float *)fg_struct->prev_deltax_filtered + HII_R_FFT_INDEX(x,y,z)); - prev_Splined_Fcoll = EvaluateNion_Conditional(prev_dens,log10_Mturnover,consts->prev_growth_factor, // <-------- HERE + prev_Splined_Fcoll = EvaluateNion_Conditional(prev_dens,log10_Mturnover,consts->prev_growth_factor, consts->M_min,rspec->M_max_R,rspec->M_max_R, rspec->sigma_maxmass,consts->Mlim_Fstar,consts->Mlim_Fesc,true); prev_Splined_Fcoll_MINI = EvaluateNion_Conditional_MINI(prev_dens,log10_Mturnover_MINI,consts->prev_growth_factor,consts->M_min, @@ -775,7 +775,7 @@ void calculate_fcoll_grid(IonizedBox *box, IonizedBox *previous_ionize_box, stru prev_Splined_Fcoll_MINI = 0.; } } - Splined_Fcoll = EvaluateNion_Conditional(curr_dens,log10_Mturnover,consts->growth_factor, // <-------- HERE + Splined_Fcoll = EvaluateNion_Conditional(curr_dens,log10_Mturnover,consts->growth_factor, consts->M_min,rspec->M_max_R,rspec->M_max_R, rspec->sigma_maxmass,consts->Mlim_Fstar,consts->Mlim_Fesc,false); } @@ -1346,7 +1346,6 @@ int ComputeIonizedBox(float redshift, float prev_redshift, UserParams *user_para struct RadiusSpec curr_radius; for(R_ct=n_radii;R_ct--;){ curr_radius = radii_spec[R_ct]; - LOG_DEBUG("ION loop: R_ct=%u.",R_ct); // TODO: Remove this //TODO: As far as I can tell, This was the previous behaviour with the while loop // So if the cell size is smaller than the minimum mass (rare) we still filter the last step @@ -1386,9 +1385,9 @@ int ComputeIonizedBox(float redshift, float prev_redshift, UserParams *user_para &numBlocks ); } else { - calculate_fcoll_grid(box, previous_ionize_box, grid_struct, &ionbox_constants, &curr_radius); // <-------- HERE + calculate_fcoll_grid(box, previous_ionize_box, grid_struct, &ionbox_constants, &curr_radius); } - // calculate_fcoll_grid(box, previous_ionize_box, grid_struct, &ionbox_constants, &curr_radius); // <-------- HERE + // calculate_fcoll_grid(box, previous_ionize_box, grid_struct, &ionbox_constants, &curr_radius); // To avoid ST_over_PS becoming nan when f_coll = 0, I set f_coll = FRACT_FLOAT_ERR. @@ -1415,14 +1414,12 @@ int ComputeIonizedBox(float redshift, float prev_redshift, UserParams *user_para } // If GPU & flags, call free_ionbox_gpu_data() if (flag_options_global->USE_MASS_DEPENDENT_ZETA && !flag_options_global->USE_MINI_HALOS && !flag_options_global->USE_HALO_FIELD) { - LOG_DEBUG("ION free_ionbox_gpu_data about to be called."); free_ionbox_gpu_data( &d_deltax_filtered, &d_xe_filtered, &d_y_arr, &d_Fcoll ); - LOG_DEBUG("ION free_ionbox_gpu_data called."); } set_ionized_temperatures(box,perturbed_field,spin_temp,&ionbox_constants); From 1acbcfbbdf3a46afe616c2a58d40252da8d20d45 Mon Sep 17 00:00:00 2001 From: alserene Date: Fri, 27 Dec 2024 00:05:18 +1100 Subject: [PATCH 071/145] Update comment for accuracy. --- src/py21cmfast/src/interp_tables.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/py21cmfast/src/interp_tables.c b/src/py21cmfast/src/interp_tables.c index db608ca02..f1e39202f 100644 --- a/src/py21cmfast/src/interp_tables.c +++ b/src/py21cmfast/src/interp_tables.c @@ -1061,7 +1061,7 @@ RGTable1D_f* get_Nion_conditional_table1D(void) { return &Nion_conditional_table1D; } -// Accessor function for the GPU SpinTemp memory allocation function to access nbins. +// Accessor function for GPU memory allocation functions to access nbins. int get_nbins(void) { return NDELTA; } From 9dbb964d2661f31fdec4bb9a0eeb597922e32514 Mon Sep 17 00:00:00 2001 From: alserene Date: Tue, 31 Dec 2024 00:23:54 +1100 Subject: [PATCH 072/145] Wrap CUDA calls in error checking function. --- src/py21cmfast/src/SpinTemperatureBox.cu | 110 +++++------------------ 1 file changed, 22 insertions(+), 88 deletions(-) diff --git a/src/py21cmfast/src/SpinTemperatureBox.cu b/src/py21cmfast/src/SpinTemperatureBox.cu index 504dabb64..8de8d2eae 100644 --- a/src/py21cmfast/src/SpinTemperatureBox.cu +++ b/src/py21cmfast/src/SpinTemperatureBox.cu @@ -32,6 +32,7 @@ #include "thermochem.h" #include "interpolation.h" +#include "cuda_utils.cuh" #include "SpinTemperatureBox.h" @@ -136,37 +137,19 @@ unsigned int init_sfrd_gpu_data( float **d_sfrd_grid, double **d_ave_sfrd_buf ) { - cudaError_t err = cudaGetLastError(); - - // Allocate device memory ------------------------------------------------------------------------------------------ - err = cudaMalloc((void**)d_y_arr, sizeof(float) * nbins); // already pointers to pointers (no & needed) - if (err != cudaSuccess) { - LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); - Throw(CUDAError); - } - err = cudaMalloc((void**)d_dens_R_grid, sizeof(float) * num_pixels); - if (err != cudaSuccess) { - LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); - Throw(CUDAError); - } - err = cudaMalloc((void**)d_sfrd_grid, sizeof(float) * num_pixels); - if (err != cudaSuccess) { - LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); - Throw(CUDAError); - } + // Allocate device memory + CALL_CUDA(cudaMalloc((void**)d_y_arr, sizeof(float) * nbins)); // already pointers to pointers (no & needed) + CALL_CUDA(cudaMalloc((void**)d_dens_R_grid, sizeof(float) * num_pixels)); + CALL_CUDA(cudaMalloc((void**)d_sfrd_grid, sizeof(float) * num_pixels)); LOG_INFO("SFRD_conditional_table.y_arr and density and sfrd grids allocated on device."); - // Initialise sfrd_grid to 0 (fill with byte=0) ---------------------------------------------------------------------- - err = cudaMemset(*d_sfrd_grid, 0, sizeof(float) * num_pixels); // dereference the pointer to a pointer (*) - if (err != cudaSuccess) { - LOG_ERROR("CUDA error: %s: %p", cudaGetErrorString(err), d_sfrd_grid); - Throw(CUDAError); - } + // Initialise sfrd_grid to 0 (fill with byte=0) + CALL_CUDA(cudaMemset(*d_sfrd_grid, 0, sizeof(float) * num_pixels)); // dereference the pointer to a pointer (*) LOG_INFO("sfrd grid initialised to 0."); // Get max threads/block for device int maxThreadsPerBlock; - cudaDeviceGetAttribute(&maxThreadsPerBlock, cudaDevAttrMaxThreadsPerBlock, 0); + CALL_CUDA(cudaDeviceGetAttribute(&maxThreadsPerBlock, cudaDevAttrMaxThreadsPerBlock, 0)); // Set threads/block based on device max unsigned int threadsPerBlock; @@ -187,19 +170,11 @@ unsigned int init_sfrd_gpu_data( // Allocate memory for SFRD sum buffer and initialise to 0 only for initial filter step; // reuse memory for remaining filter steps. unsigned int numBlocks = ceil(num_pixels / (threadsPerBlock * 2)); - err = cudaMalloc((void**)d_ave_sfrd_buf, sizeof(double) * numBlocks); // already pointer to a pointer (no & needed) ...91m & 256 -> 177979 - if (err != cudaSuccess) { - LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); - Throw(CUDAError); - } + CALL_CUDA(cudaMalloc((void**)d_ave_sfrd_buf, sizeof(double) * numBlocks)); // already pointer to a pointer (no & needed) ...91m & 256 -> 177979 LOG_INFO("SFRD sum reduction buffer allocated on device."); // Initialise buffer to 0 (fill with byte=0) - err = cudaMemset(*d_ave_sfrd_buf, 0, sizeof(double) * numBlocks); // dereference the pointer to a pointer (*) - if (err != cudaSuccess) { - LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); - Throw(CUDAError); - } + CALL_CUDA(cudaMemset(*d_ave_sfrd_buf, 0, sizeof(double) * numBlocks)); // dereference the pointer to a pointer (*) LOG_INFO("SFRD sum reduction buffer initialised to 0."); return threadsPerBlock; @@ -218,22 +193,12 @@ double calculate_sfrd_from_grid_gpu( float *d_sfrd_grid, double *d_ave_sfrd_buf ) { - cudaError_t err = cudaGetLastError(); - // Get growth factor for current filter step double zpp_growth_R_ct = zpp_growth[R_ct]; - // Copy data from host to device ----------------------------------------------------------------------------------- - err = cudaMemcpy(d_y_arr, SFRD_conditional_table->y_arr, sizeof(float) * SFRD_conditional_table->n_bin, cudaMemcpyHostToDevice); - if (err != cudaSuccess) { - LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); - Throw(CUDAError); - } - err = cudaMemcpy(d_dens_R_grid, dens_R_grid, sizeof(float) * num_pixels, cudaMemcpyHostToDevice); - if (err != cudaSuccess) { - LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); - Throw(CUDAError); - } + // Copy data from host to device + CALL_CUDA(cudaMemcpy(d_y_arr, SFRD_conditional_table->y_arr, sizeof(float) * SFRD_conditional_table->n_bin, cudaMemcpyHostToDevice)); + CALL_CUDA(cudaMemcpy(d_dens_R_grid, dens_R_grid, sizeof(float) * num_pixels, cudaMemcpyHostToDevice)); LOG_INFO("SFRD_conditional_table.y_arr and density grid copied to device."); unsigned int numBlocks = ceil(num_pixels / (threadsPerBlock * 2)); @@ -260,32 +225,20 @@ double calculate_sfrd_from_grid_gpu( LOG_WARNING("Thread size invalid; defaulting to 256."); compute_and_reduce<256><<< numBlocks, 256, 256 * sizeof(double) >>>(SFRD_conditional_table->x_min, SFRD_conditional_table->x_width, d_y_arr, d_dens_R_grid, zpp_growth_R_ct, d_sfrd_grid, d_ave_sfrd_buf, num_pixels); } + // CALL_CUDA(cudaDeviceSynchronize()); // Only use during development + CALL_CUDA(cudaGetLastError()); LOG_INFO("SpinTemperatureBox compute-and-reduce kernel called."); - // Only use during development? - err = cudaDeviceSynchronize(); - CATCH_CUDA_ERROR(err); - - err = cudaGetLastError(); - if (err != cudaSuccess) { - LOG_ERROR("Kernel launch error: %s", cudaGetErrorString(err)); - Throw(CUDAError); - } - - // Use thrust to reduce computed sums to one value - + // Use thrust to reduce computed sums to one value. // Wrap device pointer in a thrust::device_ptr thrust::device_ptr d_ave_sfrd_buf_ptr(d_ave_sfrd_buf); // Reduce final buffer sums to one value double ave_sfrd_buf = thrust::reduce(d_ave_sfrd_buf_ptr, d_ave_sfrd_buf_ptr + numBlocks, 0., thrust::plus()); + CALL_CUDA(cudaGetLastError()); LOG_INFO("SFRD sum reduced to single value by thrust::reduce operation."); // Copy results from device to host - err = cudaMemcpy(sfrd_grid, d_sfrd_grid, sizeof(float) * num_pixels, cudaMemcpyDeviceToHost); - if (err != cudaSuccess) { - LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); - Throw(CUDAError); - } + CALL_CUDA(cudaMemcpy(sfrd_grid, d_sfrd_grid, sizeof(float) * num_pixels, cudaMemcpyDeviceToHost)); LOG_INFO("SFRD sum copied to host."); return ave_sfrd_buf; @@ -297,29 +250,10 @@ void free_sfrd_gpu_data( float **d_sfrd_grid, double **d_ave_sfrd_buf ) { - cudaError_t err = cudaGetLastError(); - // Need to dereference the pointers to pointers (*) - err = cudaFree(*d_y_arr); - if (err != cudaSuccess) { - LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); - Throw(CUDAError); - } - err = cudaFree(*d_dens_R_grid); - if (err != cudaSuccess) { - LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); - Throw(CUDAError); - } - err = cudaFree(*d_sfrd_grid); - if (err != cudaSuccess) { - LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); - Throw(CUDAError); - } - err = cudaFree(*d_ave_sfrd_buf); - if (err != cudaSuccess) { - LOG_ERROR("CUDA error: %s", cudaGetErrorString(err)); - Throw(CUDAError); - } - + CALL_CUDA(cudaFree(*d_y_arr)); + CALL_CUDA(cudaFree(*d_dens_R_grid)); + CALL_CUDA(cudaFree(*d_sfrd_grid)); + CALL_CUDA(cudaFree(*d_ave_sfrd_buf)); LOG_INFO("Device memory freed."); } From 4ed3ec83217ad95647a9c218cc9cfa6c630e6124 Mon Sep 17 00:00:00 2001 From: alserene Date: Tue, 31 Dec 2024 00:25:52 +1100 Subject: [PATCH 073/145] Remove redundant casts. --- src/py21cmfast/src/SpinTemperatureBox.cu | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/py21cmfast/src/SpinTemperatureBox.cu b/src/py21cmfast/src/SpinTemperatureBox.cu index 8de8d2eae..33cc33902 100644 --- a/src/py21cmfast/src/SpinTemperatureBox.cu +++ b/src/py21cmfast/src/SpinTemperatureBox.cu @@ -138,9 +138,9 @@ unsigned int init_sfrd_gpu_data( double **d_ave_sfrd_buf ) { // Allocate device memory - CALL_CUDA(cudaMalloc((void**)d_y_arr, sizeof(float) * nbins)); // already pointers to pointers (no & needed) - CALL_CUDA(cudaMalloc((void**)d_dens_R_grid, sizeof(float) * num_pixels)); - CALL_CUDA(cudaMalloc((void**)d_sfrd_grid, sizeof(float) * num_pixels)); + CALL_CUDA(cudaMalloc(d_y_arr, sizeof(float) * nbins)); // already pointers to pointers (no & needed) + CALL_CUDA(cudaMalloc(d_dens_R_grid, sizeof(float) * num_pixels)); + CALL_CUDA(cudaMalloc(d_sfrd_grid, sizeof(float) * num_pixels)); LOG_INFO("SFRD_conditional_table.y_arr and density and sfrd grids allocated on device."); // Initialise sfrd_grid to 0 (fill with byte=0) @@ -170,7 +170,7 @@ unsigned int init_sfrd_gpu_data( // Allocate memory for SFRD sum buffer and initialise to 0 only for initial filter step; // reuse memory for remaining filter steps. unsigned int numBlocks = ceil(num_pixels / (threadsPerBlock * 2)); - CALL_CUDA(cudaMalloc((void**)d_ave_sfrd_buf, sizeof(double) * numBlocks)); // already pointer to a pointer (no & needed) ...91m & 256 -> 177979 + CALL_CUDA(cudaMalloc(d_ave_sfrd_buf, sizeof(double) * numBlocks)); // already pointer to a pointer (no & needed) LOG_INFO("SFRD sum reduction buffer allocated on device."); // Initialise buffer to 0 (fill with byte=0) From 993f73bd54a65106bb4ec0babab091e140843ca8 Mon Sep 17 00:00:00 2001 From: alserene Date: Tue, 31 Dec 2024 00:57:27 +1100 Subject: [PATCH 074/145] Reorganise error checking and synchronisation after CUDA kernel and thrust calls. --- src/py21cmfast/src/SpinTemperatureBox.cu | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/py21cmfast/src/SpinTemperatureBox.cu b/src/py21cmfast/src/SpinTemperatureBox.cu index 33cc33902..c0b9dd49f 100644 --- a/src/py21cmfast/src/SpinTemperatureBox.cu +++ b/src/py21cmfast/src/SpinTemperatureBox.cu @@ -225,8 +225,8 @@ double calculate_sfrd_from_grid_gpu( LOG_WARNING("Thread size invalid; defaulting to 256."); compute_and_reduce<256><<< numBlocks, 256, 256 * sizeof(double) >>>(SFRD_conditional_table->x_min, SFRD_conditional_table->x_width, d_y_arr, d_dens_R_grid, zpp_growth_R_ct, d_sfrd_grid, d_ave_sfrd_buf, num_pixels); } - // CALL_CUDA(cudaDeviceSynchronize()); // Only use during development CALL_CUDA(cudaGetLastError()); + // CALL_CUDA(cudaDeviceSynchronize()); // Only use during development LOG_INFO("SpinTemperatureBox compute-and-reduce kernel called."); // Use thrust to reduce computed sums to one value. @@ -235,6 +235,7 @@ double calculate_sfrd_from_grid_gpu( // Reduce final buffer sums to one value double ave_sfrd_buf = thrust::reduce(d_ave_sfrd_buf_ptr, d_ave_sfrd_buf_ptr + numBlocks, 0., thrust::plus()); CALL_CUDA(cudaGetLastError()); + // CALL_CUDA(cudaDeviceSynchronize()); // Only use during development LOG_INFO("SFRD sum reduced to single value by thrust::reduce operation."); // Copy results from device to host From a0cbffe9b1dcc305f08ff7de8fd3f62cf93945da Mon Sep 17 00:00:00 2001 From: JHu Date: Sun, 9 Feb 2025 21:01:38 +1100 Subject: [PATCH 075/145] add device rng functions --- build_cffi.py | 2 +- src/py21cmfast/src/device_rng.cu | 80 +++++++++++++++++++++++++++++++ src/py21cmfast/src/device_rng.cuh | 24 ++++++++++ 3 files changed, 105 insertions(+), 1 deletion(-) create mode 100644 src/py21cmfast/src/device_rng.cu create mode 100644 src/py21cmfast/src/device_rng.cuh diff --git a/build_cffi.py b/build_cffi.py index d9fccc1ea..788f3f266 100755 --- a/build_cffi.py +++ b/build_cffi.py @@ -29,7 +29,7 @@ # compiled cuda code extra_objects = [os.path.join(CLOC, "hello_world.o"), os.path.join(CLOC, "filtering_cuda.o"), os.path.join(CLOC, "Stochasticity_cuda.o") - , os.path.join(CLOC, "HaloField_cuda.o"), os.path.join(CLOC, "combined_cuda.o")] + , os.path.join(CLOC, "HaloField_cuda.o"), os.path.join(CLOC, "combined_cuda.o"), os.path.join(CLOC, "device_rng_cuda.o")] # os.path.join(CLOC, "interp_tables_cuda.o")] extra_link_args = ["-lcudart", "-lcudadevrt"] diff --git a/src/py21cmfast/src/device_rng.cu b/src/py21cmfast/src/device_rng.cu new file mode 100644 index 000000000..107b96dfe --- /dev/null +++ b/src/py21cmfast/src/device_rng.cu @@ -0,0 +1,80 @@ +#include +#include +#include +#include + +#include "cuda_utils.cuh" +#include "device_rng.cuh" + +__device__ curandState *d_randStates = nullptr; +__device__ int d_numStates = 0; + +// initiate random states +// use the same random seed, different sub-sequence, and with offset of 0 +__global__ void initRandStates(unsigned long long int random_seed, int totalStates) +{ + // get thread idx + int ind = blockIdx.x * blockDim.x + threadIdx.x; + + if (ind < totalStates){ + curand_init(random_seed, ind, 0, &d_randStates[ind]); + + // todo: add the following block to debug + if (ind < 2) + { + printf("temp check rng init.\n"); + printf("Thread %d: d = %u, v0 = %u, boxmuller_flag = %d, boxmuller_extra = %f\n", + ind, d_randStates[ind].d, d_randStates[ind].v[0], + d_randStates[ind].boxmuller_flag, d_randStates[ind].boxmuller_extra); + } + } +} + +// Function to initialize RNG states. +void init_rand_states(unsigned long long int seed, int numStates) +{ + CALL_CUDA(cudaMemcpyToSymbol(d_numStates, &numStates, sizeof(int), 0, cudaMemcpyHostToDevice)); + + // todo: add the following block to debug + curandState *checkPtr0 = nullptr; + CALL_CUDA(cudaMemcpyFromSymbol(&checkPtr0, d_randStates, sizeof(checkPtr0), 0, cudaMemcpyDeviceToHost)); + printf("init device pointer = %p\n", checkPtr0); + + curandState *tmpPtr = nullptr; + CALL_CUDA(cudaMalloc((void **)&tmpPtr, numStates * sizeof(curandState))); + CALL_CUDA(cudaMemcpyToSymbol(d_randStates, &tmpPtr, sizeof(tmpPtr), 0, cudaMemcpyHostToDevice)); + tmpPtr = nullptr; + + // todo: add the following block to debug (verify device pointer has been updated successfully) + curandState *checkPtr = nullptr; + CALL_CUDA(cudaMemcpyFromSymbol(&checkPtr, d_randStates, sizeof(checkPtr), 0, cudaMemcpyDeviceToHost)); + printf("updated device pointer = %p\n", checkPtr); + + // define kernel grids + int threadsPerBlock = 256; + int blocks = (numStates + threadsPerBlock - 1) / threadsPerBlock; + + // launch kernel function + initRandStates<<>>(seed, numStates); + CALL_CUDA(cudaGetLastError()); + cudaDeviceSynchronize(); +} + +void free_rand_states() +{ + // copy device pointer/variable to the host + curandState *h_randStates = nullptr; + int h_numStates = 0; + CALL_CUDA(cudaMemcpyFromSymbol(&h_randStates, d_randStates, sizeof(d_randStates), 0, cudaMemcpyDeviceToHost)); + CALL_CUDA(cudaMemcpyFromSymbol(&h_numStates, d_numStates, sizeof(int), 0, cudaMemcpyDeviceToHost)); + if (h_randStates){ + CALL_CUDA(cudaFree(h_randStates)); + h_randStates = nullptr; + CALL_CUDA(cudaMemcpyToSymbol(d_randStates, &h_randStates, sizeof(h_randStates), 0, cudaMemcpyHostToDevice)); + } + + if (h_numStates != 0){ + h_numStates = 0; + CALL_CUDA(cudaMemcpyToSymbol(d_numStates, &h_numStates, sizeof(int), 0, cudaMemcpyHostToDevice)); + } +} diff --git a/src/py21cmfast/src/device_rng.cuh b/src/py21cmfast/src/device_rng.cuh new file mode 100644 index 000000000..2f94321ab --- /dev/null +++ b/src/py21cmfast/src/device_rng.cuh @@ -0,0 +1,24 @@ +#ifndef _DEVICE_RNG_CUH +#define _DEVICE_RNG_CUH + +#ifdef __CUDACC__ +#include +// Declare the device variables as extern so that they can be shared across CUDA files. +extern __device__ curandState *d_randStates; +extern __device__ int d_numStates; +#endif + + +#ifdef __cplusplus +extern "C" +{ +#endif + // Function prototypes. + void init_rand_states(unsigned long long int seed, int numStates); + void free_rand_states(); + +#ifdef __cplusplus +} +#endif + +#endif From ae0bdc564b31406b34f59eb99027d21b7950b533 Mon Sep 17 00:00:00 2001 From: JHu Date: Sun, 9 Feb 2025 21:05:28 +1100 Subject: [PATCH 076/145] tmp: test for cuda activation and init rand states (need to move these functions to a different scripts later) --- src/py21cmfast/src/InitialConditions.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/py21cmfast/src/InitialConditions.c b/src/py21cmfast/src/InitialConditions.c index 046d2f671..7365c55a6 100644 --- a/src/py21cmfast/src/InitialConditions.c +++ b/src/py21cmfast/src/InitialConditions.c @@ -22,6 +22,9 @@ #include "InitialConditions.h" +#include "hello_world.h" +#include "device_rng.cuh" + void seed_rng_threads(gsl_rng * rng_arr[], unsigned long long int seed){ // setting tbe random seeds gsl_rng * rseed = gsl_rng_alloc(gsl_rng_mt19937); // An RNG for generating seeds for multithreading @@ -152,6 +155,10 @@ int ComputeInitialConditions( // Date: 9/29/06 int status; + printf("Start computing initial conditions\n"); + call_cuda(); + init_rand_states(random_seed, 10000000); + printf("finish init rand states \n"); Try{ // This Try wraps the entire function so we don't indent. From 7bfec65137d33b5be36f5bb8795da2f5ab3061e3 Mon Sep 17 00:00:00 2001 From: JHu Date: Sun, 9 Feb 2025 21:45:46 +1100 Subject: [PATCH 077/145] resolve linking issues caused by function name mangling --- src/py21cmfast/src/InputParameters.h | 12 +++++- src/py21cmfast/src/Stochasticity.h | 57 ++++++++++++++++++++++------ 2 files changed, 56 insertions(+), 13 deletions(-) diff --git a/src/py21cmfast/src/InputParameters.h b/src/py21cmfast/src/InputParameters.h index b4c104197..dfca73071 100644 --- a/src/py21cmfast/src/InputParameters.h +++ b/src/py21cmfast/src/InputParameters.h @@ -6,7 +6,15 @@ // Since it is unguarded, make sure to ONLY include this file from here #include "_inputparams_wrapper.h" -void Broadcast_struct_global_all(UserParams *user_params, CosmoParams *cosmo_params, AstroParams *astro_params, FlagOptions *flag_options); -void Broadcast_struct_global_noastro(UserParams *user_params, CosmoParams *cosmo_params); +#ifdef __cplusplus +extern "C" +{ +#endif + void Broadcast_struct_global_all(UserParams *user_params, CosmoParams *cosmo_params, AstroParams *astro_params, FlagOptions *flag_options); + void Broadcast_struct_global_noastro(UserParams *user_params, CosmoParams *cosmo_params); + +#ifdef __cplusplus +} +#endif #endif diff --git a/src/py21cmfast/src/Stochasticity.h b/src/py21cmfast/src/Stochasticity.h index be76ee7af..2181adeca 100644 --- a/src/py21cmfast/src/Stochasticity.h +++ b/src/py21cmfast/src/Stochasticity.h @@ -4,19 +4,54 @@ #include "InputParameters.h" #include "OutputStructs.h" -int stochastic_halofield(UserParams *user_params, CosmoParams *cosmo_params, AstroParams *astro_params, FlagOptions *flag_options - , unsigned long long int seed, float redshift_desc, float redshift, float *dens_field, float *halo_overlap_box, - HaloField *halos_desc, HaloField *halos); +struct HaloSamplingConstants +{ + // calculated per redshift + int from_catalog; // flag for first box or updating halos + double corr_sfr; + double corr_star; + double corr_xray; -int single_test_sample(UserParams *user_params, CosmoParams *cosmo_params, AstroParams *astro_params, FlagOptions *flag_options, - unsigned long long int seed, int n_condition, float *conditions, int *cond_crd, double z_out, double z_in, - int *out_n_tot, int *out_n_cell, double *out_n_exp, - double *out_m_cell, double *out_m_exp, float *out_halo_masses, int *out_halo_coords); + double z_in; + double z_out; + double growth_in; + double growth_out; + double M_min; + double lnM_min; + double M_max_tables; + double lnM_max_tb; + double sigma_min; -//This function, designed to be used in the wrapper to estimate Halo catalogue size, takes the parameters and returns average number of halos within the box -double expected_nhalo(double redshift, UserParams *user_params, CosmoParams *cosmo_params, AstroParams *astro_params, FlagOptions * flag_options); + // per-condition/redshift depending on from_catalog or not + double delta; + double M_cond; + double lnM_cond; + double sigma_cond; -//used in HaloField.c to assign rng to DexM halos -int add_properties_cat(unsigned long long int seed, float redshift, HaloField *halos); + // calculated per condition + double cond_val; // This is the table x value (density for grids, log mass for progenitors) + double expected_N; + double expected_M; +}; +#ifdef __cplusplus +extern "C" +{ +#endif + int stochastic_halofield(UserParams *user_params, CosmoParams *cosmo_params, AstroParams *astro_params, FlagOptions *flag_options, unsigned long long int seed, float redshift_desc, float redshift, float *dens_field, float *halo_overlap_box, + HaloField *halos_desc, HaloField *halos); + + int single_test_sample(UserParams *user_params, CosmoParams *cosmo_params, AstroParams *astro_params, FlagOptions *flag_options, + unsigned long long int seed, int n_condition, float *conditions, int *cond_crd, double z_out, double z_in, + int *out_n_tot, int *out_n_cell, double *out_n_exp, + double *out_m_cell, double *out_m_exp, float *out_halo_masses, int *out_halo_coords); + + // This function, designed to be used in the wrapper to estimate Halo catalogue size, takes the parameters and returns average number of halos within the box + double expected_nhalo(double redshift, UserParams *user_params, CosmoParams *cosmo_params, AstroParams *astro_params, FlagOptions *flag_options); + + // used in HaloField.c to assign rng to DexM halos + int add_properties_cat(unsigned long long int seed, float redshift, HaloField *halos); +#ifdef __cplusplus +} +#endif #endif From 7cafd55e75225dc8f123a1560a2bca46122dffea Mon Sep 17 00:00:00 2001 From: JHu Date: Sun, 9 Feb 2025 21:58:16 +1100 Subject: [PATCH 078/145] add device version of functions in hmf.c --- src/py21cmfast/src/hmf.cu | 21 +++++++++++++++++++++ src/py21cmfast/src/hmf.cuh | 24 ++++++++++++++++++++++++ 2 files changed, 45 insertions(+) create mode 100644 src/py21cmfast/src/hmf.cu create mode 100644 src/py21cmfast/src/hmf.cuh diff --git a/src/py21cmfast/src/hmf.cu b/src/py21cmfast/src/hmf.cu new file mode 100644 index 000000000..379a90a62 --- /dev/null +++ b/src/py21cmfast/src/hmf.cu @@ -0,0 +1,21 @@ +#include +#include + +#include "Constants.h" +#include "hmf.cuh" + +__device__ double sheth_delc_fixed(double del, double sig) +{ + return sqrt(JENKINS_a) * del * (1. + JENKINS_b * pow(sig * sig / (JENKINS_a * del * del), JENKINS_c)); +} + +// Get the relevant excursion set barrier density given the user-specified HMF +__device__ double get_delta_crit(int HMF, double sigma, double growthf) +{ + if (HMF == 4) + return DELTAC_DELOS; + if (HMF == 1) + return sheth_delc_fixed(Deltac / growthf, sigma) * growthf; + + return Deltac; +} diff --git a/src/py21cmfast/src/hmf.cuh b/src/py21cmfast/src/hmf.cuh new file mode 100644 index 000000000..d942de874 --- /dev/null +++ b/src/py21cmfast/src/hmf.cuh @@ -0,0 +1,24 @@ +#include + +#ifndef _HMF_CUH +#define _HMF_CUH + +// define macros +#ifndef JENKINS_a +#define JENKINS_a (0.73) // Jenkins+01, SMT has 0.707 +#endif + +#ifndef JENKINS_b +#define JENKINS_b (0.34) // Jenkins+01 fit from Barkana+01, SMT has 0.5 +#endif + +#ifndef JENKINS_c +#define JENKINS_c (0.81) // Jenkins+01 from from Barkana+01, SMT has 0.6 +#endif + +// #ifdef __CUDA_ARCH__ +__device__ double sheth_delc_fixed(double del, double sig); +__device__ double get_delta_crit(int HMF, double sigma, double growthf); +// #endif + +#endif \ No newline at end of file From f65b5660717053440aefdbccf644d74824f325df Mon Sep 17 00:00:00 2001 From: JHu Date: Mon, 10 Feb 2025 18:42:39 +1100 Subject: [PATCH 079/145] WIP: add cuda version of sampling progenitors --- src/py21cmfast/src/Stochasticity.c | 359 +++++++++----- src/py21cmfast/src/Stochasticity.cu | 680 +++++++++++++++++++-------- src/py21cmfast/src/Stochasticity.cuh | 5 +- 3 files changed, 733 insertions(+), 311 deletions(-) diff --git a/src/py21cmfast/src/Stochasticity.c b/src/py21cmfast/src/Stochasticity.c index 301519655..8f2aed0a6 100644 --- a/src/py21cmfast/src/Stochasticity.c +++ b/src/py21cmfast/src/Stochasticity.c @@ -6,6 +6,7 @@ #include #include #include +#include #include "cexcept.h" #include "exceptions.h" #include "logger.h" @@ -14,11 +15,18 @@ #include "InputParameters.h" #include "OutputStructs.h" #include "interp_tables.h" +#include "interpolation.h" #include "hmf.h" #include "cosmology.h" #include "InitialConditions.h" #include "Stochasticity.h" + +#include "Stochasticity.cuh" +#include "interp_tables.cuh" +#include "HaloField.cuh" + +#include //buffer size (per cell of arbitrary size) in the sampling function #define MAX_HALO_CELL (int)1e5 @@ -26,34 +34,34 @@ //Note: ideally I would split this into constants set per snapshot and // constants set per condition, however some variables (delta or Mass) // can be set with differing frequencies depending on the condition type -struct HaloSamplingConstants{ - //calculated per redshift - int from_catalog; //flag for first box or updating halos - double corr_sfr; - double corr_star; - double corr_xray; - - double z_in; - double z_out; - double growth_in; - double growth_out; - double M_min; - double lnM_min; - double M_max_tables; - double lnM_max_tb; - double sigma_min; - - //per-condition/redshift depending on from_catalog or not - double delta; - double M_cond; - double lnM_cond; - double sigma_cond; - - //calculated per condition - double cond_val; //This is the table x value (density for grids, log mass for progenitors) - double expected_N; - double expected_M; -}; +// struct HaloSamplingConstants{ +// //calculated per redshift +// int from_catalog; //flag for first box or updating halos +// double corr_sfr; +// double corr_star; +// double corr_xray; + +// double z_in; +// double z_out; +// double growth_in; +// double growth_out; +// double M_min; +// double lnM_min; +// double M_max_tables; +// double lnM_max_tb; +// double sigma_min; + +// //per-condition/redshift depending on from_catalog or not +// double delta; +// double M_cond; +// double lnM_cond; +// double sigma_cond; + +// //calculated per condition +// double cond_val; //This is the table x value (density for grids, log mass for progenitors) +// double expected_N; +// double expected_M; +// }; void print_hs_consts(struct HaloSamplingConstants * c){ LOG_DEBUG("Printing halo sampler constants...."); @@ -91,7 +99,7 @@ double expected_nhalo(double redshift, UserParams *user_params, CosmoParams *cos double sample_dndM_inverse(double condition, struct HaloSamplingConstants * hs_constants, gsl_rng * rng){ double p_in, result; p_in = gsl_rng_uniform(rng); - result = EvaluateNhaloInv(condition,p_in); + result = EvaluateNhaloInv(condition, p_in); result = fmin(1,fmax(0,result)); //clip in case of extrapolation result = result * hs_constants->M_cond; return result; @@ -167,6 +175,18 @@ void stoc_set_consts_z(struct HaloSamplingConstants *const_struct, double redshi return; } +double get_max_nhalo(struct HaloSamplingConstants *const_struct, float *halo_masses, int size) +{ + int idx_max = cblas_isamax(size, halo_masses, 1); + float mass_max = halo_masses[idx_max]; + double ln_mm = log(mass_max); + double sigma_cond = EvaluateSigma(ln_mm); + double delta = get_delta_crit(user_params_global->HMF, sigma_cond, const_struct->growth_in) / const_struct->growth_in * const_struct->growth_out; + int n_exp = EvaluateNhalo(ln_mm, const_struct->growth_out, const_struct->lnM_min, + const_struct->lnM_max_tb, mass_max, sigma_cond, delta); + double expected_N = n_exp * mass_max; + return expected_N; +} //set the constants which are calculated once per condition void stoc_set_consts_cond(struct HaloSamplingConstants *const_struct, double cond_val){ double m_exp,n_exp; @@ -640,6 +660,8 @@ int stoc_sample(struct HaloSamplingConstants * hs_constants, gsl_rng * rng, int //Expected mass takes into account potential dexm overlap M_out[0] = hs_constants->expected_M; + // tmp: 2025-01-22 + // printf("c code: meet sample condition 1 \n"); return 0; } @@ -824,13 +846,24 @@ int sample_halo_grids(gsl_rng **rng_arr, double redshift, float *dens_field, flo return 0; } +void print_current_time() +{ + time_t now = time(NULL); + printf("Current time: %s \n", ctime(&now)); +} + //NOTE: there's a lot of repeated code here and in build_halo_cats, find a way to merge -int sample_halo_progenitors(gsl_rng ** rng_arr, double z_in, double z_out, HaloField *halofield_in, - HaloField *halofield_out, struct HaloSamplingConstants *hs_constants){ +int sample_halo_progenitors(gsl_rng **rng_arr, double z_in, double z_out, HaloField *halofield_in, + HaloField *halofield_out, struct HaloSamplingConstants *hs_constants, RGTable1D_f *sigma_table) +{ if(z_in >= z_out){ LOG_ERROR("halo progenitors must go backwards in time!!! z_in = %.1f, z_out = %.1f",z_in,z_out); Throw(ValueError); } + + // tmp + printf("The redshift z_in: %.1f; z_out: %.1f \n", z_in, z_out); + //cell size for smoothing / CMF calculation double Mmax_tb = hs_constants->M_max_tables; double Mmin = hs_constants->M_min; @@ -842,101 +875,166 @@ int sample_halo_progenitors(gsl_rng ** rng_arr, double z_in, double z_out, HaloF unsigned long long int arraysize_total = halofield_out->buffer_size; unsigned long long int arraysize_local = arraysize_total / user_params_global->N_THREADS; - + printf("The number of halo sampling: %llu \n", nhalo_in); LOG_DEBUG("Beginning stochastic halo sampling of progenitors on %llu halos",nhalo_in); LOG_DEBUG("z = %f, Mmin = %e, d = %.3e",z_out,Mmin,delta); LOG_DEBUG("Total Array Size %llu, array size per thread %llu (~%.3e GB total)",arraysize_total,arraysize_local,6.*arraysize_total*sizeof(int)/1e9); double corr_arr[3] = {hs_constants->corr_star,hs_constants->corr_sfr,hs_constants->corr_xray}; -#pragma omp parallel num_threads(user_params_global->N_THREADS) - { - float prog_buf[MAX_HALO_CELL]; - int n_prog; - double M_prog; - - double propbuf_in[3]; - double propbuf_out[3]; - - int threadnum = omp_get_thread_num(); - double M2; - int jj; - unsigned long long int ii; - unsigned long long int count=0; - unsigned long long int istart = threadnum * arraysize_local; - - //we need a private version - //also the naming convention should be better between structs/struct pointers - struct HaloSamplingConstants hs_constants_priv; - hs_constants_priv = *hs_constants; - -#pragma omp for - for(ii=0;iihalo_masses[ii]; - if(M2 < Mmin || M2 > Mmax_tb){ - LOG_ERROR("Input Mass = %.2e at %llu of %llu, something went wrong in the input catalogue",M2,ii,nhalo_in); - Throw(ValueError); - } - //set condition-dependent variables for sampling - stoc_set_consts_cond(&hs_constants_priv,M2); - - //Sample the CMF set by the descendant - stoc_sample(&hs_constants_priv,rng_arr[threadnum],&n_prog,prog_buf); - - propbuf_in[0] = halofield_in->star_rng[ii]; - propbuf_in[1] = halofield_in->sfr_rng[ii]; - propbuf_in[2] = halofield_in->xray_rng[ii]; - - //place progenitors in local list - M_prog = 0; - for(jj=0;jjSAMPLER_MIN_MASS) continue; - - if(count >= arraysize_local){ - LOG_ERROR("More than %llu halos (expected %.1e) with buffer size factor %.1f", - arraysize_local,arraysize_local/user_params_global->MAXHALO_FACTOR,user_params_global->MAXHALO_FACTOR); - LOG_ERROR("If you expected to have an above average halo number try raising user_params_global->MAXHALO_FACTOR"); - Throw(ValueError); - } - - set_prop_rng(rng_arr[threadnum], true, corr_arr, propbuf_in, propbuf_out); - - halofield_out->halo_masses[istart + count] = prog_buf[jj]; - halofield_out->halo_coords[3*(istart + count) + 0] = halofield_in->halo_coords[3*ii+0]; - halofield_out->halo_coords[3*(istart + count) + 1] = halofield_in->halo_coords[3*ii+1]; - halofield_out->halo_coords[3*(istart + count) + 2] = halofield_in->halo_coords[3*ii+2]; - - halofield_out->star_rng[istart + count] = propbuf_out[0]; - halofield_out->sfr_rng[istart + count] = propbuf_out[1]; - halofield_out->xray_rng[istart + count] = propbuf_out[2]; - count++; - - if(ii==0){ - M_prog += prog_buf[jj]; - - LOG_ULTRA_DEBUG("First Halo Prog %d: Mass %.2e Stellar %.2e SFR %.2e XRAY %.2e e_d %.3f", - jj,prog_buf[jj],propbuf_out[0],propbuf_out[1],propbuf_out[2], - Deltac*hs_constants->growth_out/hs_constants->growth_in); - } - } - if(ii==0){ - LOG_ULTRA_DEBUG(" HMF %d delta %.3f delta_coll %.3f delta_desc %.3f adjusted %.3f",user_params_global->HMF, - hs_constants_priv.delta, - get_delta_crit(user_params_global->HMF,hs_constants_priv.sigma_cond,hs_constants->growth_out), - get_delta_crit(user_params_global->HMF,hs_constants_priv.sigma_cond,hs_constants->growth_in), - get_delta_crit(user_params_global->HMF,hs_constants_priv.sigma_cond,hs_constants->growth_in) - *hs_constants->growth_out/hs_constants->growth_in); - print_hs_consts(&hs_constants_priv); - LOG_SUPER_DEBUG("First Halo: Mass %.2f | N %d (exp. %.2e) | Total M %.2e (exp. %.2e)", - M2,n_prog,hs_constants_priv.expected_N,M_prog,hs_constants_priv.expected_M); - } - } - istart_threads[threadnum] = istart; - nhalo_threads[threadnum] = count; - } - condense_sparse_halolist(halofield_out, istart_threads, nhalo_threads); + // get parameters needed for sigma calculation + double x_min = sigma_table->x_min; + double x_width = sigma_table->x_width; + int sigma_bin = sigma_table->n_bin; + float *sigma_y_arr = sigma_table->y_arr; + + // Create a copy of hs_constants for passing to cuda + struct HaloSamplingConstants d_hs_constants; + d_hs_constants = *hs_constants; + + // get in halo data + float *halo_m = halofield_in->halo_masses; + float *halo_star_rng = halofield_in->star_rng; + float *halo_sfr_rng = halofield_in->sfr_rng; + float *halo_xray_rng = halofield_in->xray_rng; + int *halo_c = halofield_in->halo_coords; + + // call cuda function here + printf("Start cuda calculation for progenitors. "); + print_current_time(); + updateHaloOut(halo_m, halo_star_rng, halo_sfr_rng, + halo_xray_rng,halo_c,nhalo_in, sigma_y_arr, + sigma_bin, x_min, x_width, d_hs_constants, + arraysize_total, halofield_out); + printf("End cuda calculation for progenitors. "); + print_current_time(); + + // 2025-01-23 tmp: original processing in C only (start) +// // get max halo mass +// double max_halo_m = get_max_nhalo(hs_constants, halofield_in->halo_masses, nhalo_in); +// printf("The evaluated N halo is : %f \n", max_halo_m); + +// // check n_prog > 4 +// int nprog_check = 0; + +// // // tiger tmp: debug (start) +// // double res1, res2, res3, res4; +// // res1 = EvaluateNhaloInv(18.694414138793945, 0.0046723012881037529); +// // printf("tmp res1: %.17f \n", res1); +// // res2 = EvaluateNhaloInv(20.084152221679688, 0.32153863360286256); +// // printf("tmp res2: %.17f \n", res2); +// // res3 = EvaluateNhaloInv(26.806314468383789, 0.8698794976081996); +// // printf("tmp res3: %.17f \n", res3); +// // res4 = EvaluateNhaloInv(19.00053596496582, 0.83130413049947305); +// // printf("tmp res4: %.17f \n", res4); +// // // tiger tmp: debug (end) + +// #pragma omp parallel num_threads(user_params_global->N_THREADS) +// { +// float prog_buf[MAX_HALO_CELL]= {0}; +// int n_prog; +// double M_prog; + +// double propbuf_in[3]; +// double propbuf_out[3]; + +// int threadnum = omp_get_thread_num(); +// double M2; +// int jj; +// unsigned long long int ii; +// unsigned long long int count=0; +// unsigned long long int istart = threadnum * arraysize_local; + +// //we need a private version +// //also the naming convention should be better between structs/struct pointers +// struct HaloSamplingConstants hs_constants_priv; +// hs_constants_priv = *hs_constants; + +// #pragma omp for +// for(ii=0;iihalo_masses[ii]; +// if(M2 < Mmin || M2 > Mmax_tb){ +// printf("got an outlier."); +// LOG_ERROR("Input Mass = %.2e at %llu of %llu, something went wrong in the input catalogue",M2,ii,nhalo_in); +// Throw(ValueError); +// } +// //set condition-dependent variables for sampling +// stoc_set_consts_cond(&hs_constants_priv,M2); +// // tiger tmp debug (start) +// if (ii == 160 || ii == 680 || ii == 10792){ +// printf("temp check.\n"); +// } +// // tiger tmp dubug (end) + +// //Sample the CMF set by the descendant +// stoc_sample(&hs_constants_priv,rng_arr[threadnum],&n_prog,prog_buf); + +// if (n_prog >=100){ +// printf("The number of progenitors at z %.1f and halo %llu: %d \n", z_in, ii, n_prog); +// } + +// if (n_prog == 2){ +// nprog_check += 1; +// } + +// propbuf_in[0] = halofield_in->star_rng[ii]; +// propbuf_in[1] = halofield_in->sfr_rng[ii]; +// propbuf_in[2] = halofield_in->xray_rng[ii]; + +// //place progenitors in local list +// M_prog = 0; +// for(jj=0;jjSAMPLER_MIN_MASS) continue; + +// if(count >= arraysize_local){ +// LOG_ERROR("More than %llu halos (expected %.1e) with buffer size factor %.1f", +// arraysize_local,arraysize_local/user_params_global->MAXHALO_FACTOR,user_params_global->MAXHALO_FACTOR); +// LOG_ERROR("If you expected to have an above average halo number try raising user_params_global->MAXHALO_FACTOR"); +// Throw(ValueError); +// } + +// set_prop_rng(rng_arr[threadnum], true, corr_arr, propbuf_in, propbuf_out); + +// halofield_out->halo_masses[istart + count] = prog_buf[jj]; +// halofield_out->halo_coords[3*(istart + count) + 0] = halofield_in->halo_coords[3*ii+0]; +// halofield_out->halo_coords[3*(istart + count) + 1] = halofield_in->halo_coords[3*ii+1]; +// halofield_out->halo_coords[3*(istart + count) + 2] = halofield_in->halo_coords[3*ii+2]; + +// halofield_out->star_rng[istart + count] = propbuf_out[0]; +// halofield_out->sfr_rng[istart + count] = propbuf_out[1]; +// halofield_out->xray_rng[istart + count] = propbuf_out[2]; +// // printf("prop out: %f, %f, %f \n", propbuf_out[0], propbuf_out[1], propbuf_out[2]); +// count++; + +// if(ii==0){ +// M_prog += prog_buf[jj]; + +// LOG_ULTRA_DEBUG("First Halo Prog %d: Mass %.2e Stellar %.2e SFR %.2e XRAY %.2e e_d %.3f", +// jj,prog_buf[jj],propbuf_out[0],propbuf_out[1],propbuf_out[2], +// Deltac*hs_constants->growth_out/hs_constants->growth_in); +// } +// } +// if(ii==0){ +// LOG_ULTRA_DEBUG(" HMF %d delta %.3f delta_coll %.3f delta_desc %.3f adjusted %.3f",user_params_global->HMF, +// hs_constants_priv.delta, +// get_delta_crit(user_params_global->HMF,hs_constants_priv.sigma_cond,hs_constants->growth_out), +// get_delta_crit(user_params_global->HMF,hs_constants_priv.sigma_cond,hs_constants->growth_in), +// get_delta_crit(user_params_global->HMF,hs_constants_priv.sigma_cond,hs_constants->growth_in) +// *hs_constants->growth_out/hs_constants->growth_in); + // print_hs_consts(&hs_constants_priv); + // LOG_SUPER_DEBUG("First Halo: Mass %.2f | N %d (exp. %.2e) | Total M %.2e (exp. %.2e)", + // M2,n_prog,hs_constants_priv.expected_N,M_prog,hs_constants_priv.expected_M); + // } + // } + // istart_threads[threadnum] = istart; + // nhalo_threads[threadnum] = count; + // } + // printf("The number of halos with nprog == 2 is: %llu\n", nprog_check); + // condense_sparse_halolist(halofield_out, istart_threads, nhalo_threads); + +// 2025-01-23 tmp: original processing in C only (end) return 0; } @@ -958,15 +1056,28 @@ int stochastic_halofield(UserParams *user_params, CosmoParams *cosmo_params, struct HaloSamplingConstants hs_constants; stoc_set_consts_z(&hs_constants,redshift,redshift_desc); - //Fill them - //NOTE:Halos prev in the first box corresponds to the large DexM halos - if(redshift_desc < 0.){ + // tmp: confirm we could access sigma table + RGTable1D *nhalo_table = GetNhaloTable(); + RGTable1D *mcoll_table = GetMcollTable(); + RGTable2D *nhalo_inv_table = GetNhaloInvTable(); + RGTable1D_f *sigma_table = GetSigmaInterpTable(); + + // copy relevant tables to the device + copyTablesToDevice(*nhalo_table, *mcoll_table, *nhalo_inv_table); + + // copy global variables to the device + updateGlobalParams(user_params_global, cosmo_params_global, astro_params_global); + + // Fill them + // NOTE:Halos prev in the first box corresponds to the large DexM halos + if (redshift_desc < 0.) + { LOG_DEBUG("building first halo field at z=%.1f", redshift); sample_halo_grids(rng_stoc,redshift,dens_field,halo_overlap_box,halos_desc,halos,&hs_constants); } else{ LOG_DEBUG("Calculating halo progenitors from z=%.1f to z=%.1f | %llu", redshift_desc,redshift,halos_desc->n_halos); - sample_halo_progenitors(rng_stoc,redshift_desc,redshift,halos_desc,halos,&hs_constants); + sample_halo_progenitors(rng_stoc,redshift_desc,redshift,halos_desc,halos,&hs_constants, sigma_table); } LOG_DEBUG("Found %llu Halos", halos->n_halos); diff --git a/src/py21cmfast/src/Stochasticity.cu b/src/py21cmfast/src/Stochasticity.cu index 929f02cca..27b105d62 100644 --- a/src/py21cmfast/src/Stochasticity.cu +++ b/src/py21cmfast/src/Stochasticity.cu @@ -13,6 +13,8 @@ #include #include #include +#include +#include #include "Constants.h" #include "interpolation_types.h" @@ -22,6 +24,7 @@ #include "cuda_utils.cuh" #include "Stochasticity.cuh" #include "DeviceConstants.cuh" +#include "device_rng.cuh" #include "hmf.cu" #include "interp_tables.cu" @@ -93,25 +96,80 @@ void condense_device_vector() std::cout << std::endl; } -int condenseDeviceArray(float *d_array, int original_size, float mask_value) +// int condenseDeviceArray(float *d_array, int original_size, float mask_value) +// { +// // Wrap the raw device pointer into a thrust device pointer +// thrust::device_ptr d_array_ptr(d_array); + +// // Remove elements with mask value +// // i.e.move elements not equal to mask value to the beginning of the array without changing order +// auto new_end = thrust::remove(d_array_ptr, d_array_ptr + original_size, mask_value); + +// // Calculate the number of valid elements +// int valid_size = new_end - d_array_ptr; + +// // Fill the remaining space with mask value +// thrust::fill(new_end, d_array_ptr + original_size, mask_value); + +// // Print results (on host side) +// // std::cout << "Valid elements count: " << valid_size << "\n"; +// return valid_size; +// } + +template +int condenseDeviceArray(T *d_array, int original_size, T mask_value) { // Wrap the raw device pointer into a thrust device pointer - thrust::device_ptr d_array_ptr(d_array); + thrust::device_ptr d_array_ptr(d_array); - // Remove elements with value 0 - // thrust::device_vector::iterator new_end = thrust::remove(d_array_ptr, d_array_ptr + original_size, 0); - // thrust::device_ptr new_end = thrust::remove(d_array_ptr, d_array_ptr + original_size, 0); + // Remove elements with mask value auto new_end = thrust::remove(d_array_ptr, d_array_ptr + original_size, mask_value); // Calculate the number of valid elements int valid_size = new_end - d_array_ptr; + + // Fill the remaining space with mask value thrust::fill(new_end, d_array_ptr + original_size, mask_value); - // Print results (on host side) - // std::cout << "Valid elements count: " << valid_size << "\n"; return valid_size; } +void testCondenseDeviceArray() +{ + // Input data + float h_array[] = {1.0f, 0.0f, 2.0f, 3.0f, 0.0f, 4.0f}; + float mask_value = 0.0f; + int original_size = 6; + + // Expected outputs + float expected_array[] = {1.0f, 2.0f, 3.0f, 4.0f, 0.0f, 0.0f}; + int expected_valid_size = 4; + + // Allocate and copy to device + float *d_array; + cudaMalloc(&d_array, original_size * sizeof(float)); + cudaMemcpy(d_array, h_array, original_size * sizeof(float), cudaMemcpyHostToDevice); + + // Call the function from Stochasticity.cu + int valid_size = condenseDeviceArray(d_array, original_size, mask_value); + + // Copy the results back to the host + float h_result[original_size]; + cudaMemcpy(h_result, d_array, original_size * sizeof(float), cudaMemcpyDeviceToHost); + + // Validate the results + assert(valid_size == expected_valid_size); + for (int i = 0; i < original_size; ++i) + { + assert(h_result[i] == expected_array[i]); + } + + std::cout << "Test passed: condenseDeviceArray\n"; + + // Free device memory + cudaFree(d_array); +} + int filterWithMask(float *d_data, int *d_mask, int original_size) { // Wrap the raw pointers into thrust device pointers @@ -130,16 +188,90 @@ int filterWithMask(float *d_data, int *d_mask, int original_size) return valid_size; } +void testFilterWithMask() +{ + // Input arrays + float h_data[] = {1.1f, 2.2f, 3.3f, 4.4f, 5.5f}; // Input data + int h_mask[] = {1, 0, 1, 0, 1}; // Mask array + int original_size = 5; + + // Expected outputs + float expected_data[] = {1.1f, 3.3f, 5.5f}; // Expected filtered data + int expected_size = 3; // Number of valid elements + + // Allocate device memory + float *d_data; + int *d_mask; + cudaMalloc(&d_data, original_size * sizeof(float)); + cudaMalloc(&d_mask, original_size * sizeof(int)); + + // Copy data to device + cudaMemcpy(d_data, h_data, original_size * sizeof(float), cudaMemcpyHostToDevice); + cudaMemcpy(d_mask, h_mask, original_size * sizeof(int), cudaMemcpyHostToDevice); + + // Call the function + int valid_size = filterWithMask(d_data, d_mask, original_size); + + // Copy the filtered data back to host + float h_result[original_size]; + cudaMemcpy(h_result, d_data, original_size * sizeof(float), cudaMemcpyDeviceToHost); + + // Validate the size of the filtered array + assert(valid_size == expected_size); + + // Validate the filtered elements + for (int i = 0; i < valid_size; ++i) + { + assert(h_result[i] == expected_data[i]); + } + + // Print success message + std::cout << "Test passed: filterWithMask\n"; + + // Free device memory + cudaFree(d_data); + cudaFree(d_mask); +} + +void countElements(const int *array, int size, const std::vector &values_to_count) +{ + // Initialize a frequency array to count occurrences + int count[values_to_count.size()] = {0}; + + // Iterate through the input array + for (int i = 0; i < size; ++i) + { + // Find the index of the value in values_to_count + for (size_t j = 0; j < values_to_count.size(); ++j) + { + if (array[i] == values_to_count[j]) + { + count[j]++; + break; + } + } + } + + // Print the results + for (size_t i = 0; i < values_to_count.size(); ++i) + { + std::cout << "Value " << values_to_count[i] << ": " << count[i] << " occurrences\n"; + } +} + // decide the number of sparsity int getSparsity(int n_buffer, int n_halo){ - int power = floor(log2(n_buffer / n_halo)); - int sparsity = 1 << power; - return sparsity; + if (n_halo > 0){ + int power = floor(log2(n_buffer / n_halo)); + int sparsity = 1 << power; + return sparsity; + } + } // initialize device array with given value -void initializeArray(float *d_array, int n_elements, float value){ - thrust::device_ptr d_array_ptr(d_array); +void initializeArray(int *d_array, int n_elements, int value){ + thrust::device_ptr d_array_ptr(d_array); thrust::fill(d_array_ptr, d_array_ptr + n_elements, value); } // more members of deviceprop can be found in cura_runtime_api documentation @@ -171,11 +303,12 @@ struct GridLayout{ int n_threads; int n_blocks; }; -// calculate workload for the second iteration +// calculate workload +// todo: add more checks on sparsity GridLayout getWorkload(int sparsity, unsigned long long int n_halos){ GridLayout res; int n_threads, n_blocks; - if (sparsity == 4){ + if (sparsity != 0 && 256 % sparsity == 0){ n_threads = 256; } else { @@ -238,6 +371,7 @@ __device__ double sample_dndM_inverse(double condition, struct HaloSamplingConst { double p_in, result; p_in = curand_uniform_double(state); + // printf("curand uniform random number: %f\n", p_in); result = EvaluateNhaloInv(condition, p_in); result = fmin(1.0, fmax(0.0, result)); // clip in case of extrapolation result = result * hs_constants->M_cond; @@ -249,16 +383,16 @@ __device__ double remove_random_halo(curandState *state, int n_halo, int *idx, f int random_idx; do { random_idx = (int)(curand_uniform(state) * n_halo); - } while (M_out[random_idx] == -1.f); + } while (M_out[random_idx] == 0.0f); last_M_del = M_out[random_idx]; *M_prog -= last_M_del; - M_out[random_idx] = -1.f; // -1 mass halos are skipped and not counted + M_out[random_idx] = 0.0f; // -1 mass halos are skipped and not counted *idx = random_idx; return last_M_del; } -__device__ void fix_mass_sample(curandState *state, double exp_M, float *M_prog, float *M_out, int write_limit){ +__device__ void fix_mass_sample(curandState *state, double exp_M, float *M_prog, float *M_out, int write_limit, int *n_prog){ // Keep the last halo if it brings us closer to the expected mass // This is done by addition or subtraction over the limit to balance // the bias of the last halo being larger @@ -272,7 +406,8 @@ __device__ void fix_mass_sample(curandState *state, double exp_M, float *M_prog, { // *M_tot_pt -= M_out[*n_halo_pt - 1]; // here we remove by setting the counter one lower so it isn't read - M_out[write_limit] = -1.f; + M_out[write_limit] = 0.0f; + (*n_prog)--; } } else @@ -333,7 +468,7 @@ __device__ int stoc_mass_sample(struct HaloSamplingConstants *hs_constants, cura return 0; } -__device__ int stoc_sample(struct HaloSamplingConstants *hs_constants, curandState *state, float *M_out){ +__device__ int stoc_sample(struct HaloSamplingConstants *hs_constants, curandState *state, float *M_out, int *sampleCondition){ // TODO: really examine the case for number/mass sampling // The poisson sample fails spectacularly for high delta (from_catalogs or dense cells) // and excludes the correlation between number and mass (e.g many small halos or few large ones) @@ -348,6 +483,7 @@ __device__ int stoc_sample(struct HaloSamplingConstants *hs_constants, curandSta if (hs_constants->delta <= DELTA_MIN || hs_constants->expected_M < d_user_params.SAMPLER_MIN_MASS) { // *n_halo_out = 0; + *sampleCondition = 0; return 0; } // if delta is above critical, form one big halo @@ -355,7 +491,8 @@ __device__ int stoc_sample(struct HaloSamplingConstants *hs_constants, curandSta // *n_halo_out = 1; // Expected mass takes into account potential dexm overlap - M_out[0] = hs_constants->expected_M; + *M_out = hs_constants->expected_M; + *sampleCondition = 1; return 0; } @@ -425,29 +562,28 @@ __device__ int stoc_sample(struct HaloSamplingConstants *hs_constants, curandSta // LOG_SUPER_DEBUG("Set %llu elements beyond %llu to zero", halofield->buffer_size - count_total, count_total); // } -// todo: implement set_prop_rng -// __device__ void set_prop_rng(gsl_rng *rng, bool from_catalog, double *interp, double *input, double *output) -// { -// double rng_star, rng_sfr, rng_xray; +__device__ void set_prop_rng(curandState *state, bool from_catalog, double *interp, float *input, float *output) +{ + float rng_star, rng_sfr, rng_xray; -// // Correlate properties by interpolating between the sampled and descendant gaussians -// rng_star = astro_params_global->SIGMA_STAR > 0. ? gsl_ran_ugaussian(rng) : 0.; -// rng_sfr = astro_params_global->SIGMA_SFR_LIM > 0. ? gsl_ran_ugaussian(rng) : 0.; -// rng_xray = astro_params_global->SIGMA_LX > 0. ? gsl_ran_ugaussian(rng) : 0.; + // Correlate properties by interpolating between the sampled and descendant gaussians + rng_star = d_astro_params.SIGMA_STAR > 0. ? curand_normal(state) : 0.; + rng_sfr = d_astro_params.SIGMA_SFR_LIM > 0. ? curand_normal(state) : 0.; + rng_xray = d_astro_params.SIGMA_LX > 0. ? curand_normal(state) : 0.; -// if (from_catalog) -// { -// // this transforms the sample to one from the multivariate Gaussian, conditioned on the first sample -// rng_star = sqrt(1 - interp[0] * interp[0]) * rng_star + interp[0] * input[0]; -// rng_sfr = sqrt(1 - interp[1] * interp[1]) * rng_sfr + interp[1] * input[1]; -// rng_xray = sqrt(1 - interp[2] * interp[2]) * rng_xray + interp[2] * input[2]; -// } + if (from_catalog) + { + // this transforms the sample to one from the multivariate Gaussian, conditioned on the first sample + rng_star = sqrt(1 - interp[0] * interp[0]) * rng_star + interp[0] * input[0]; + rng_sfr = sqrt(1 - interp[1] * interp[1]) * rng_sfr + interp[1] * input[1]; + rng_xray = sqrt(1 - interp[2] * interp[2]) * rng_xray + interp[2] * input[2]; + } -// output[0] = rng_star; -// output[1] = rng_sfr; -// output[2] = rng_xray; -// return; -// } + output[0] = rng_star; + output[1] = rng_sfr; + output[2] = rng_xray; + return; +} // kernel function __global__ void setup_random_states(curandState *d_states, unsigned long long int random_seed){ @@ -456,36 +592,70 @@ __global__ void setup_random_states(curandState *d_states, unsigned long long in curand_init(random_seed, ind, 0, &d_states[ind]); } -__global__ void update_halo_constants(float *d_halo_masses, float *d_y_arr, double x_min, double x_width, +__global__ void update_halo_constants(float *d_halo_masses, float *d_star_rng_in, float *d_sfr_rng_in, float *d_xray_rng_in, + int *d_halo_coords_in, float *d_y_arr, double x_min, double x_width, unsigned long long int n_halos, int n_bin, struct HaloSamplingConstants d_hs_constants, int HMF, curandState *d_states, - float *d_halo_masses_out, float *star_rng_out, - float *sfr_rng_out, float *xray_rng_out, float *halo_coords_out, int *d_sum_check, - int *d_further_process, int *d_nprog_predict, int sparsity, unsigned long long int write_offset, double *expected_mass) + float *d_halo_masses_out, float *d_star_rng_out, + float *d_sfr_rng_out, float *d_xray_rng_out, int *d_halo_coords_out, int *d_sum_check, + int *d_further_process, int *d_nprog_predict, int sparsity, unsigned long long int write_offset, + double *expected_mass, int *d_n_prog, int offset_shared) { // Define shared memory for block-level reduction - // extern __shared__ float shared_mass[]; - __shared__ float shared_mass[256]; - - // get thread idx + extern __shared__ float shared_memory[]; + // __shared__ float shared_mass[256]; + + // partition shared memory + float *shared_mass = shared_memory; + float *shared_prop_rng = shared_memory + offset_shared; + + // get local thread idx int tid = threadIdx.x; + + // initialize shared_mass + shared_mass[tid] = 0.0f; + + // initialize shared_prop_rng + for (int i=0;i<3;i++){ + shared_prop_rng[tid+i*offset_shared] = 0.0f; + } + + + // get global thread idx int ind = blockIdx.x * blockDim.x + threadIdx.x; - if (ind >= n_halos) + + // get halo idx + int hid = ind / sparsity; + if (hid >= n_halos) { + // printf("Out of halo range.\n"); return; } - // determine which halo mass to access - int hid = ind / sparsity; + // get halo mass float M = d_halo_masses[hid]; - // idx of d_halo_masses_out - int out_id = write_offset + ind; + // get stoc properties from in halo + float prop_in[3] = {d_star_rng_in[hid], d_sfr_rng_in[hid], d_xray_rng_in[hid]}; + + // get correction + double corr_arr[3] = {d_hs_constants.corr_star, d_hs_constants.corr_sfr, d_hs_constants.corr_xray}; - // int n_prog = 0; // the value will be updated after calling stoc_sample + // get coordinate + int coords_in[3] = {d_halo_coords_in[hid*3], d_halo_coords_in[hid*3+1], d_halo_coords_in[hid*3+2]}; + + // idx of d_halo_masses_out and other halo field arrays + int out_id = write_offset + ind; // set condition-dependent variables for sampling stoc_set_consts_cond(&d_hs_constants, M, HMF, x_min, x_width, d_y_arr, n_bin, &expected_mass[hid]); + // if (hid == 1){ + // printf("check here. \n"); + // } + + // if (hid == 2){ + // printf("check here. \n"); + // } // tmp: just to verify the tables have been copied correctly if (ind == 0) @@ -497,95 +667,166 @@ __global__ void update_halo_constants(float *d_halo_masses, float *d_y_arr, doub printf("test params: %f \n", d_test_params); printf("A_VCB: %f \n", d_astro_params.A_VCB); printf("SIGMA_8: %f \n", d_cosmo_params.SIGMA_8); + // printf("number of rng states: %d\n", g_numRNGStates); + // // tiger tmp: debug (start) + // double res1, res2, res3, res4; + // res1 = EvaluateNhaloInv(18.694414138793945, 0.0046723012881037529); + // printf("tmp res1 on gpu: %.17f \n", res1); + // res2 = EvaluateNhaloInv(20.084152221679688, 0.32153863360286256); + // printf("tmp res2 on gpu: %.17f \n", res2); + // res3 = EvaluateNhaloInv(26.806314468383789, 0.8698794976081996); + // printf("tmp res3 on gpu: %.17f \n", res3); + // res4 = EvaluateNhaloInv(19.00053596496582, 0.83130413049947305); + // printf("tmp res4 on gpu: %.17f \n", res4); + // // tiger tmp: debug (end) } + // if (ind < 10000){ + // curandState ls_tmp = d_rngStates[ind]; + // } // todo: each thread across different blocks has unique random state // curand_init(seed, threadIdx.x, 0, &d_states[threadIdx.x]); // curandState local_state = d_states[threadIdx.x]; curandState local_state = d_states[ind]; + // if (blockIdx.x > 100000){ + // // printf("check here. \n"); + // } // tmp: for validation only // sample_dndM_inverse(0.38, &d_hs_constants, &local_state); // int tmp1 = 20; // double tmp2 = 681273355217.0; - // float tmp3 = 101976856.0; + // float tmp3 = 101976856.0; // remove_random_halo(&local_state, 59, &tmp1, &tmp2, &tmp3); - stoc_sample(&d_hs_constants, &local_state, &shared_mass[tid]); - d_states[ind] = local_state; + + // check sample condition + // condition 0: no sampling; condition 1: use expected_M; condition 2: sampling + int sampleCondition = 2; + stoc_sample(&d_hs_constants, &local_state, &shared_mass[tid], &sampleCondition); + + // get stochastic halo properties + set_prop_rng(&local_state, true, corr_arr, prop_in, &shared_prop_rng[tid*3]); + + __syncthreads(); - // printf("the first element of shared mass: %f \n", shared_mass[0]); - // passing value to arrays in global memory is done by one thread per group if (tid % sparsity == 0){ - float Mprog = 0.0; - int write_limit = 0; - int meetCondition = 0; + if (sampleCondition == 0){ + d_n_prog[hid] = 0; + } + if (sampleCondition == 1){ + if(shared_mass[tid] >= d_user_params.SAMPLER_MIN_MASS){ + d_halo_masses_out[out_id] = shared_mass[tid]; + d_n_prog[hid] = 1; + d_star_rng_out[out_id] = shared_prop_rng[3 * tid]; + d_sfr_rng_out[out_id] = shared_prop_rng[3 * tid + 1]; + d_xray_rng_out[out_id] = shared_prop_rng[3 * tid + 2]; + d_halo_coords_out[out_id*3] = coords_in[0]; + d_halo_coords_out[out_id*3+1] = coords_in[1]; + d_halo_coords_out[out_id*3+2] = coords_in[2]; - for (int i = 0; i < sparsity; ++i){ - Mprog += shared_mass[tid + i]; - if (Mprog >= d_hs_constants.expected_M) - { - write_limit = i; - meetCondition = 1; - break; } + } + if (sampleCondition == 2){ + float Mprog = 0.0; + int write_limit = 0; + int meetCondition = 0; + + for (int i = 0; i < sparsity; ++i){ + Mprog += shared_mass[tid + i]; + if (Mprog >= d_hs_constants.expected_M) + { + write_limit = i; + meetCondition = 1; + break; + } + } - // d_halo_masses_out[out_id+i] = shared_mass[tid+i]; + if (meetCondition){ + // correct the mass samples + int n_prog = write_limit +1; + + fix_mass_sample(&local_state, d_hs_constants.expected_M, &Mprog, &shared_mass[tid], write_limit, &n_prog); + + // record number of progenitors + d_n_prog[hid] = min(100,n_prog); + + for (int i = 0; i < write_limit + 1; ++i) + { + if(shared_mass[tid + i] < d_user_params.SAMPLER_MIN_MASS) continue; + // write the final mass sample to array in global memory + d_halo_masses_out[out_id + i] = shared_mass[tid + i]; + d_star_rng_out[out_id + i] = shared_prop_rng[3*(tid +i)]; + d_sfr_rng_out[out_id + i] = shared_prop_rng[3*(tid+i) + 1]; + d_xray_rng_out[out_id + i] = shared_prop_rng[3*(tid+i) + 2]; + d_halo_coords_out[(out_id+i) * 3] = coords_in[0]; + d_halo_coords_out[(out_id+i) * 3 + 1] = coords_in[1]; + d_halo_coords_out[(out_id+i) * 3 + 2] = coords_in[2]; + } } - if (meetCondition){ - // correct the mass samples - fix_mass_sample(&local_state, d_hs_constants.expected_M, &Mprog, &shared_mass[tid], write_limit); + else{ + d_further_process[hid] = 1; + d_nprog_predict[hid] = ceil(d_hs_constants.expected_M * sparsity / Mprog); - for (int i = 0; i < write_limit; ++i) - { - - // write the final mass sample to array in global memory - d_halo_masses_out[out_id + i] = shared_mass[tid + i]; } } - else{ - d_further_process[hid] = 1; - d_nprog_predict[hid] = ceil(d_hs_constants.expected_M * sparsity / Mprog); - - } } - // Perform reduction within the block - // for (int stride = blockDim.x / 2; stride > 0; stride /= 2) - // { - // if (tid < stride) - // { - // shared_check[tid] += shared_check[tid + stride]; - // } - // __syncthreads(); // Ensure all threads have completed each stage of reduction - // } - - // Write the result from each block to the global sum - // if (tid == 0) - // { - // atomicAdd(d_sum_check, shared_check[0]); - // } - - // Sample the CMF set by the descendant - // stoc_sample(&hs_constants, &local_state, &n_prog, prog_buf); - - // double sigma = EvaluateSigma(log(M), x_min, x_width, d_y_arr, n_bin); - // double delta = get_delta_crit(HMF, sigma, d_hs_constants.growth_in)\ + // Perform reduction within the block + // for (int stride = blockDim.x / 2; stride > 0; stride /= 2) + // { + // if (tid < stride) + // { + // shared_check[tid] += shared_check[tid + stride]; + // } + // __syncthreads(); // Ensure all threads have completed each stage of reduction + // } + + // Write the result from each block to the global sum + // if (tid == 0) + // { + // atomicAdd(d_sum_check, shared_check[0]); + // } + + // Sample the CMF set by the descendant + // stoc_sample(&hs_constants, &local_state, &n_prog, prog_buf); + + // double sigma = EvaluateSigma(log(M), x_min, x_width, d_y_arr, n_bin); + // double delta = get_delta_crit(HMF, sigma, d_hs_constants.growth_in)\ // / d_hs_constants.growth_in * d_hs_constants.growth_out; + d_states[ind] = local_state; return; } // function to launch kernel grids -int updateHaloOut(float *halo_masses, unsigned long long int n_halos, float *y_arr, int n_bin_y, double x_min, double x_width, - struct HaloSamplingConstants hs_constants, unsigned long long int n_buffer) +int updateHaloOut(float *halo_masses, float *star_rng, float *sfr_rng, float *xray_rng, int *halo_coords, + unsigned long long int n_halos, float *y_arr, int n_bin_y, double x_min, double x_width, + struct HaloSamplingConstants hs_constants, unsigned long long int n_buffer, HaloField *halofield_out) { - // allocate memory and copy halo_masses to the device + // allocate memory and copy halo data to the device (halo in) size_t size_halo = sizeof(float) * n_halos; float *d_halo_masses; CALL_CUDA(cudaMalloc(&d_halo_masses, size_halo)); CALL_CUDA(cudaMemcpy(d_halo_masses, halo_masses, size_halo, cudaMemcpyHostToDevice)); + float *d_star_rng; + CALL_CUDA(cudaMalloc(&d_star_rng, size_halo)); + CALL_CUDA(cudaMemcpy(d_star_rng, star_rng, size_halo, cudaMemcpyHostToDevice)); + + float *d_sfr_rng; + CALL_CUDA(cudaMalloc(&d_sfr_rng, size_halo)); + CALL_CUDA(cudaMemcpy(d_sfr_rng, sfr_rng, size_halo, cudaMemcpyHostToDevice)); + + float *d_xray_rng; + CALL_CUDA(cudaMalloc(&d_xray_rng, size_halo)); + CALL_CUDA(cudaMemcpy(d_xray_rng, xray_rng, size_halo, cudaMemcpyHostToDevice)); + + int *d_halo_coords; + size_t size_halo_coords = 3 * sizeof(int) * n_halos; + CALL_CUDA(cudaMalloc(&d_halo_coords, size_halo_coords)); + CALL_CUDA(cudaMemcpy(d_halo_coords, halo_coords, size_halo_coords, cudaMemcpyHostToDevice)); + // allocate memory and copy y_arr of sigma_table to the device size_t size_yarr = sizeof(float) * n_bin_y; float *d_y_arr; @@ -602,6 +843,11 @@ int updateHaloOut(float *halo_masses, unsigned long long int n_halos, float *y_a CALL_CUDA(cudaMalloc(&d_further_process, sizeof(int)*n_halos)); CALL_CUDA(cudaMemset(d_further_process, 0, sizeof(int)*n_halos)); + // allocate memory to store number of progenitors per halo + int *d_n_prog; + CALL_CUDA(cudaMalloc(&d_n_prog, sizeof(int) * n_halos)); + initializeArray(d_n_prog, n_halos, 32); + // allocate memory to store estimated n_prog after the first kernel launch int *d_nprog_predict; CALL_CUDA(cudaMalloc(&d_nprog_predict, sizeof(int) * n_halos)); @@ -616,49 +862,48 @@ int updateHaloOut(float *halo_masses, unsigned long long int n_halos, float *y_a // get parameters needed by the kernel int HMF = user_params_global->HMF; - // start with 4 threads work with one halo - int sparsity = 4; - - // define threads layout for starting - // int n_threads = 256; - // int n_blocks = (int)((n_halos*sparsity + 255) / 256); - // int total_threads = n_threads * n_blocks; - - GridLayout grids = getWorkload(sparsity, n_halos); - int total_threads = grids.n_threads * grids.n_blocks; + // set buffer size (hard-coded) + int scale = 5; + size_t d_n_buffer = n_halos * scale; + size_t buffer_size = sizeof(float) * d_n_buffer; // allocate memory for out halos (just allocate once at each call of this grid launch function) - - // size_t buffer_size = sizeof(float) * max(total_threads * 2, n_buffer) * 2; - size_t d_n_buffer = total_threads * 4 + 10; - size_t buffer_size = sizeof(float) * d_n_buffer; float *d_halo_masses_out; CALL_CUDA(cudaMalloc(&d_halo_masses_out, buffer_size)); - // CALL_CUDA(cudaMemset(d_halo_masses_out, 0, buffer_size)); - initializeArray(d_halo_masses_out, d_n_buffer, -1.0f); + CALL_CUDA(cudaMemset(d_halo_masses_out, 0, buffer_size)); + // initializeArray(d_halo_masses_out, d_n_buffer, -1.2f); - float *star_rng_out; - CALL_CUDA(cudaMalloc(&star_rng_out, buffer_size)); + float *d_star_rng_out; + CALL_CUDA(cudaMalloc(&d_star_rng_out, buffer_size)); + CALL_CUDA(cudaMemset(d_star_rng_out, 0, buffer_size)); - float *sfr_rng_out; - CALL_CUDA(cudaMalloc(&sfr_rng_out, buffer_size)); + float *d_sfr_rng_out; + CALL_CUDA(cudaMalloc(&d_sfr_rng_out, buffer_size)); + CALL_CUDA(cudaMemset(d_sfr_rng_out, 0, buffer_size)); - float *xray_rng_out; - CALL_CUDA(cudaMalloc(&xray_rng_out, buffer_size)); + float *d_xray_rng_out; + CALL_CUDA(cudaMalloc(&d_xray_rng_out, buffer_size)); + CALL_CUDA(cudaMemset(d_xray_rng_out, 0, buffer_size)); - float *halo_coords_out; - CALL_CUDA(cudaMalloc(&halo_coords_out, buffer_size * 3)); + int *d_halo_coords_out; + CALL_CUDA(cudaMalloc(&d_halo_coords_out, sizeof(int) * d_n_buffer * 3)); + initializeArray(d_halo_coords_out, d_n_buffer * 3, -1000); + // setup RNG (todo: set it only once for iteration over different redshift) + GridLayout grids_rng = getWorkload(1, d_n_buffer); + int total_threads_rng = grids_rng.n_threads * grids_rng.n_blocks; // Allocate memory for RNG states curandState *d_states; - CALL_CUDA(cudaMalloc((void **)&d_states, total_threads * sizeof(curandState))); + CALL_CUDA(cudaMalloc((void **)&d_states, total_threads_rng * sizeof(curandState))); // setup random states - setup_random_states<<>>(d_states, 1234ULL); - + setup_random_states<<>>(d_states, 1234ULL); // Check kernel launch errors CALL_CUDA(cudaGetLastError()); + // CALL_CUDA(cudaDeviceSynchronize()); + free_rand_states(); + // initiate n_halo check unsigned long long int n_halo_check = n_halos; @@ -666,7 +911,10 @@ int updateHaloOut(float *halo_masses, unsigned long long int n_halos, float *y_a unsigned long long int write_offset = 0; // initialize n filter halo - unsigned long long int n_filter_halo = n_halos; + unsigned long long int n_halos_tbp = n_halos; + + // initialize number of progenitors processed + unsigned long long int n_processed_prog; getDeviceProperties(); @@ -676,89 +924,151 @@ int updateHaloOut(float *halo_masses, unsigned long long int n_halos, float *y_a printf("Kernel Registers per Thread: %d\n", attr.numRegs); printf("Kernel Max Threads per Block: %d\n", attr.maxThreadsPerBlock); + // start with 4 threads work with one halo + int sparsity = 4; + + // Check if sparsity is smaller than scale + if (sparsity >= scale) + { + throw std::runtime_error("'sparsity' must be smaller than 'scale'."); + } + + // initial kernel grid + GridLayout grids = getWorkload(sparsity, n_halos); + // launch kernel grid - while (n_filter_halo > 0){ - size_t shared_size = grids.n_threads*sizeof(float); - update_halo_constants<<>>(d_halo_masses, d_y_arr, x_min, x_width, n_halos, n_bin_y, hs_constants, HMF, d_states, d_halo_masses_out, star_rng_out, - sfr_rng_out, xray_rng_out, halo_coords_out, d_sum_check, d_further_process, d_nprog_predict, sparsity, write_offset, d_expected_mass); + while (n_halos_tbp > 0){ + size_t shared_size = grids.n_threads * sizeof(float) * 4; + int offset_shared = grids.n_threads; + printf("start launching kernel function.\n"); + update_halo_constants<<>>(d_halo_masses, d_star_rng, d_sfr_rng, d_xray_rng, d_halo_coords, + d_y_arr, x_min, x_width, n_halos_tbp, n_bin_y, hs_constants, HMF, d_states, d_halo_masses_out, d_star_rng_out, + d_sfr_rng_out, d_xray_rng_out, d_halo_coords_out, d_sum_check, d_further_process, d_nprog_predict, sparsity, write_offset, d_expected_mass, + d_n_prog, offset_shared); // Check kernel launch errors CALL_CUDA(cudaGetLastError()); - CALL_CUDA(cudaDeviceSynchronize()); + // CALL_CUDA(cudaDeviceSynchronize()); // filter device halo masses in-place - n_filter_halo = filterWithMask(d_halo_masses, d_further_process, n_halos); - printf("The number of halos for further processing: %d \n", n_filter_halo); - - // condense out halo mass array - unsigned long long int n_processed_prog = condenseDeviceArray(d_halo_masses_out, d_n_buffer, -1.0f); - printf("The number of progenitors written in out halo field so far: %d \n", n_processed_prog); - - // tmp: the following is just needed for debugging purpose - float *h_filter_halos; - CALL_CUDA(cudaHostAlloc((void **)&h_filter_halos, sizeof(float) * n_filter_halo, cudaHostAllocDefault)); - CALL_CUDA(cudaMemcpy(h_filter_halos, d_halo_masses, sizeof(float) * n_filter_halo, cudaMemcpyDeviceToHost)); + n_halos_tbp = filterWithMask(d_halo_masses, d_further_process, n_halos_tbp); + printf("The number of halos for further processing: %d \n", n_halos_tbp); - int *h_nprog_predict; - CALL_CUDA(cudaHostAlloc((void **)&h_nprog_predict, sizeof(int) * n_halos, cudaHostAllocDefault)); - CALL_CUDA(cudaMemcpy(h_nprog_predict, d_nprog_predict, sizeof(int) * n_halos, cudaMemcpyDeviceToHost)); + // // tmp 2025-01-19: check d_halo_masses_out writing out + // float *h_halo_masses_out_check; + // CALL_CUDA(cudaHostAlloc((void **)&h_halo_masses_out_check, buffer_size, cudaHostAllocDefault)); + // CALL_CUDA(cudaMemcpy(h_halo_masses_out_check, d_halo_masses_out, buffer_size, cudaMemcpyDeviceToHost)); - // update sparsity value - unsigned long long int available_n_buffer = d_n_buffer - n_processed_prog; - sparsity = getSparsity(available_n_buffer, n_filter_halo); + // number of progenitors per halo + int *h_n_prog; + CALL_CUDA(cudaHostAlloc((void **)&h_n_prog, sizeof(int)*n_halos, cudaHostAllocDefault)); + CALL_CUDA(cudaMemcpy(h_n_prog, d_n_prog, sizeof(int)*n_halos, cudaMemcpyDeviceToHost)); - // check max threadblock size - int device; - CALL_CUDA(cudaGetDevice(&device)); - cudaDeviceProp deviceProp; - CALL_CUDA(cudaGetDeviceProperties(&deviceProp, device)); - int max_threads_pb = deviceProp.maxThreadsPerBlock; + // Values to count + std::vector values_to_count = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 100,32}; - // sparsity should not exceed the max threads per block - sparsity = std::min(sparsity, 512); + // Count and display occurrences + countElements(h_n_prog, n_halos, values_to_count); - // reset grids layout - grids = getWorkload(sparsity, n_filter_halo); + // condense halo mass array on the device + n_processed_prog = condenseDeviceArray(d_halo_masses_out, d_n_buffer, 0.0f); + printf("The number of progenitors written in out halo field so far: %d \n", n_processed_prog); + + // condense other halo field arrays on the device + // unsigned long long int n_processed_star_rng = condenseDeviceArray(d_star_rng_out, d_n_buffer, 0.0f); + // printf("The number of star prop rng written in out halo field so far: %d \n", n_processed_star_rng); - // update write offset - write_offset = n_processed_prog; + // unsigned long long int n_processed_sfr_rng = condenseDeviceArray(d_sfr_rng_out, d_n_buffer, 0.0f); + // printf("The number of sfr prop rng written in out halo field so far: %d \n", n_processed_sfr_rng); - // reset mask array - CALL_CUDA(cudaMemset(d_further_process, 0, sizeof(int) * n_halos)); + // unsigned long long int n_processed_xray_rng = condenseDeviceArray(d_xray_rng_out, d_n_buffer, 0.0f); + // printf("The number of xray prop rng written in out halo field so far: %d \n", n_processed_xray_rng); - // copy data from device to host - int h_sum_check; - CALL_CUDA(cudaMemcpy(&h_sum_check, d_sum_check, sizeof(int), cudaMemcpyDeviceToHost)); + // unsigned long long int n_processed_coords = condenseDeviceArray(d_halo_coords_out, d_n_buffer*3, -1000); + // printf("The number of halo coords written in out halo field so far: %d \n", n_processed_coords); + // tmp: the following is just needed for debugging purpose + // float *h_filter_halos; + // CALL_CUDA(cudaHostAlloc((void **)&h_filter_halos, sizeof(float) * n_halos_tbp, cudaHostAllocDefault)); + // CALL_CUDA(cudaMemcpy(h_filter_halos, d_halo_masses, sizeof(float) * n_halos_tbp, cudaMemcpyDeviceToHost)); + + // int *h_nprog_predict; + // CALL_CUDA(cudaHostAlloc((void **)&h_nprog_predict, sizeof(int) * n_halos, cudaHostAllocDefault)); + // CALL_CUDA(cudaMemcpy(h_nprog_predict, d_nprog_predict, sizeof(int) * n_halos, cudaMemcpyDeviceToHost)); + + if (n_halos_tbp > 0){ + // update sparsity value + unsigned long long int available_n_buffer = d_n_buffer - n_processed_prog; + sparsity = getSparsity(available_n_buffer, n_halos_tbp); + + // check max threadblock size + int device; + CALL_CUDA(cudaGetDevice(&device)); + cudaDeviceProp deviceProp; + CALL_CUDA(cudaGetDeviceProperties(&deviceProp, device)); + int max_threads_pb = deviceProp.maxThreadsPerBlock; + + // sparsity should not exceed the max threads per block + // sparsity = 256; + sparsity = std::min(sparsity, 512); + + // reset grids layout + grids = getWorkload(sparsity, n_halos_tbp); + + // update write offset + write_offset = n_processed_prog; + + // reset mask array + CALL_CUDA(cudaMemset(d_further_process, 0, sizeof(int) * n_halos)); + + // copy data from device to host + int h_sum_check; + CALL_CUDA(cudaMemcpy(&h_sum_check, d_sum_check, sizeof(int), cudaMemcpyDeviceToHost)); + } // tmp: for debug only - CALL_CUDA(cudaFreeHost(h_filter_halos)); + // CALL_CUDA(cudaFreeHost(h_filter_halos)); // CALL_CUDA(cudaFreeHost(h_sum_check)); } - // tmp: for debugging purpose; out halo need to copy back to host after all halos being processed - float *h_halo_masses_out; - CALL_CUDA(cudaHostAlloc((void **)&h_halo_masses_out, buffer_size, cudaHostAllocDefault)); - CALL_CUDA(cudaMemcpy(h_halo_masses_out, d_halo_masses_out, buffer_size, cudaMemcpyDeviceToHost)); + // write data back to the host + halofield_out->n_halos = n_processed_prog; + size_t out_size = sizeof(float) * n_processed_prog; + + // float *h_halo_masses_out; + // CALL_CUDA(cudaHostAlloc((void **)&h_halo_masses_out, out_size, cudaHostAllocDefault)); + CALL_CUDA(cudaMemcpy(halofield_out->halo_masses, d_halo_masses_out, out_size, cudaMemcpyDeviceToHost)); - CALL_CUDA(cudaFreeHost(h_halo_masses_out)); - // } + + CALL_CUDA(cudaMemcpy(halofield_out->star_rng, d_star_rng_out, out_size, cudaMemcpyDeviceToHost)); + CALL_CUDA(cudaMemcpy(halofield_out->sfr_rng, d_sfr_rng_out, out_size, cudaMemcpyDeviceToHost)); + CALL_CUDA(cudaMemcpy(halofield_out->xray_rng, d_xray_rng_out, out_size, cudaMemcpyDeviceToHost)); + size_t out_coords_size = sizeof(int) * n_processed_prog * 3; + CALL_CUDA(cudaMemcpy(halofield_out->halo_coords, d_halo_coords_out, out_coords_size, cudaMemcpyDeviceToHost)); + + // Free device memory CALL_CUDA(cudaFree(d_halo_masses)); CALL_CUDA(cudaFree(d_y_arr)); CALL_CUDA(cudaFree(d_states)); CALL_CUDA(cudaFree(d_halo_masses_out)); - CALL_CUDA(cudaFree(star_rng_out)); - CALL_CUDA(cudaFree(sfr_rng_out)); - CALL_CUDA(cudaFree(xray_rng_out)); - CALL_CUDA(cudaFree(halo_coords_out)); + CALL_CUDA(cudaFree(d_star_rng_out)); + CALL_CUDA(cudaFree(d_sfr_rng_out)); + CALL_CUDA(cudaFree(d_xray_rng_out)); + CALL_CUDA(cudaFree(d_halo_coords_out)); CALL_CUDA(cudaFree(d_further_process)); validate_thrust(); condense_device_vector(); + testCondenseDeviceArray(); + + testFilterWithMask(); + + + return 0; } diff --git a/src/py21cmfast/src/Stochasticity.cuh b/src/py21cmfast/src/Stochasticity.cuh index 887aa384d..62a4065f1 100644 --- a/src/py21cmfast/src/Stochasticity.cuh +++ b/src/py21cmfast/src/Stochasticity.cuh @@ -5,8 +5,9 @@ extern "C" { #endif - int updateHaloOut(float *halo_masses, unsigned long long int n_halos, float *y_arr, int n_bin_y, double x_min, double x_width, - struct HaloSamplingConstants hs_constants, unsigned long long int n_buffer); + int updateHaloOut(float *halo_masses, float *star_rng, float *sfr_rng, float *xray_rng, int *halo_coords, + unsigned long long int n_halos, float *y_arr, int n_bin_y, double x_min, double x_width, + struct HaloSamplingConstants hs_constants, unsigned long long int n_buffer, HaloField *halofield_out); #ifdef __cplusplus } #endif From 1b047af45c353d5ede4f3848850bd9b645fb22e4 Mon Sep 17 00:00:00 2001 From: JHu Date: Mon, 10 Feb 2025 18:44:21 +1100 Subject: [PATCH 080/145] tmp profilings --- src/py21cmfast/lightcones.py | 2 +- src/py21cmfast/outputs.py | 1 + src/py21cmfast/src/HaloField.c | 5 +++++ src/py21cmfast/wrapper.py | 41 ++++++++++++++++++++++------------ 4 files changed, 34 insertions(+), 15 deletions(-) diff --git a/src/py21cmfast/lightcones.py b/src/py21cmfast/lightcones.py index 3743f1c8c..7fb14cf3f 100644 --- a/src/py21cmfast/lightcones.py +++ b/src/py21cmfast/lightcones.py @@ -114,7 +114,7 @@ def with_equal_cdist_slices( d_at_redshift = cosmo.comoving_distance(min_redshift).to_value(Mpc) dmax = cosmo.comoving_distance(max_redshift).to_value(Mpc) res = resolution.to_value(Mpc) - + # breakpoint() lc_distances = np.arange(d_at_redshift, dmax + res, res) # if np.isclose(lc_distances.max() + res, dmax): # lc_distances = np.append(lc_distances, dmax) diff --git a/src/py21cmfast/outputs.py b/src/py21cmfast/outputs.py index 5fca83678..9a1b368c1 100644 --- a/src/py21cmfast/outputs.py +++ b/src/py21cmfast/outputs.py @@ -373,6 +373,7 @@ def compute( self, *, halos_desc, ics: InitialConditions, random_seed: int, hooks: dict ): """Compute the function.""" + # breakpoint() return self._compute( self.desc_redshift, self.redshift, diff --git a/src/py21cmfast/src/HaloField.c b/src/py21cmfast/src/HaloField.c index 02a0afa45..e40d2d442 100644 --- a/src/py21cmfast/src/HaloField.c +++ b/src/py21cmfast/src/HaloField.c @@ -30,6 +30,8 @@ #include "HaloField.h" +// #include "HaloField.cuh" + int check_halo(char * in_halo, UserParams *user_params, float R, int x, int y, int z, int check_type); void init_halo_coords(HaloField *halos, long long unsigned int n_halos); int pixel_in_halo(int grid_dim, int z_dim, int x, int x_index, int y, int y_index, int z, int z_index, float Rsq_curr_index ); @@ -42,6 +44,9 @@ int ComputeHaloField(float redshift_desc, float redshift, UserParams *user_param int status; + // update the global params to the device + // updateGlobalParams(user_params, cosmo_params, astro_params); + Try{ // This Try brackets the whole function, so we don't indent. //This happens if we are updating a halo field (no need to redo big halos) diff --git a/src/py21cmfast/wrapper.py b/src/py21cmfast/wrapper.py index b7eb51a1e..421f8d909 100644 --- a/src/py21cmfast/wrapper.py +++ b/src/py21cmfast/wrapper.py @@ -98,6 +98,8 @@ from scipy.interpolate import interp1d from typing import Any, Callable, Sequence +import time + from ._cfg import config from ._utils import OutputStruct, _check_compatible_inputs, _process_exitcode, asarray from .c_21cmfast import ffi, lib @@ -1004,7 +1006,7 @@ def perturb_field( # Run the C Code return fields.compute(ics=init_boxes, hooks=hooks) - +# @profile def determine_halo_list( *, redshift, @@ -1097,6 +1099,7 @@ def determine_halo_list( # precedence is as follows: If the descendant field is given, use that no matter what. # otherwise, if STOC_MINIMUM_Z == -1 *OR* we would jump over STOC_MINIMUM_Z, calculate this as the first box # if neither of the above are true, step back ZPRIME_STEP_FACTOR and calculate that box + # breakpoint() if not isinstance(halos_desc, HaloField) or not halos_desc.is_computed: # If a descendant field is not provided, we step back toward the minimum z if desc_z is None: @@ -1121,6 +1124,7 @@ def determine_halo_list( hbuffer_size = lib.expected_nhalo( redshift, user_params(), cosmo_params(), astro_params(), flag_options() ) + # breakpoint() hbuffer_size = int((hbuffer_size + 1) * user_params.MAXHALO_FACTOR) # set a minimum in case of fluctuation at high z hbuffer_size = int(max(hbuffer_size, 1e6)) @@ -1185,7 +1189,7 @@ def determine_halo_list( flag_options=flag_options, dummy=True, ) - + # breakpoint() # Run the C Code return fields.compute( ics=init_boxes, hooks=hooks, halos_desc=halos_desc, random_seed=random_seed @@ -3045,7 +3049,7 @@ def _get_required_redshifts_coeval(flag_options, redshift) -> list[float]: redshifts = np.sort(np.unique(redshifts))[::-1] return redshifts.tolist() - +# @profile def run_lightcone( *, redshift: float = None, @@ -3246,8 +3250,12 @@ def run_lightcone( redshift, global_params.ZPRIME_STEP_FACTOR, max_redshift ) ) - + print("max redshift: ", max_redshift) + print("number of z of iteration: ", scrollz.shape) + print("scrollz: ", scrollz) + # breakpoint() lcz = lightconer.lc_redshifts + if not np.all(min(scrollz) * 0.99 < lcz) and np.all(lcz < max(scrollz) * 1.01): # We have a 1% tolerance on the redshifts, because the lightcone redshifts are # computed via inverse fitting the comoving_distance. @@ -3342,12 +3350,13 @@ def run_lightcone( }, _globals=dict(global_params.items()), ) - + # breakpoint() + print(f"PerturbField calcuation start: {time.strftime('%H:%M:%S')}") if perturb is None: zz = scrollz else: zz = scrollz[:-1] - + # breakpoint() perturb_ = [] for z in zz: p = perturb_field(redshift=z, init_boxes=init_box, **iokw) @@ -3358,12 +3367,13 @@ def run_lightcone( pass perturb_.append(p) - + if perturb is not None: perturb_.append(perturb) perturb = perturb_ perturb_min = perturb[np.argmin(scrollz)] - + print(f"PerturbField calcuation finish: {time.strftime('%H:%M:%S')}") + # breakpoint() # Now that we've got all the perturb fields, we can purge init more. try: init_box.prepare_for_spin_temp( @@ -3390,7 +3400,7 @@ def run_lightcone( "Returning before the final redshift requires caching in order to " "continue the simulation later. Set write=True!" ) - + # breakpoint() # Iterate through redshift from top to bottom if lightcone.redshift != lightcone._current_redshift: logger.info( @@ -3421,7 +3431,7 @@ def run_lightcone( pf = None pf = None - + # breakpoint() # Now we can purge init_box further. try: init_box.prepare_for_halos(flag_options=flag_options, force=always_purge) @@ -3430,6 +3440,7 @@ def run_lightcone( # we explicitly pass the descendant halos here since we have a redshift list prior # this will generate the extra fields if STOC_MINIMUM_Z is given + print(f"HaloField calcuation start: {time.strftime('%H:%M:%S')}") pt_halos = [] if flag_options.USE_HALO_FIELD and not flag_options.FIXED_HALO_GRIDS: halos_desc = None @@ -3439,6 +3450,7 @@ def run_lightcone( halos_desc=halos_desc, **kw, ) + # breakpoint() halos_desc = halo_field pt_halos += [perturb_halo_list(redshift=z, halo_field=halo_field, **kw)] @@ -3450,7 +3462,8 @@ def run_lightcone( # reverse the halo lists to be in line with the redshift lists pt_halos = pt_halos[::-1] - + print(f"HaloField calcuation finish: {time.strftime('%H:%M:%S')}") + # breakpoint() # Now that we've got all the perturb fields, we can purge init more. try: init_box.prepare_for_spin_temp( @@ -3543,7 +3556,7 @@ def run_lightcone( spin_temp=st2, **iokw, ) - + # breakpoint() coeval = Coeval( redshift=z, initial_conditions=init_box, @@ -3584,7 +3597,7 @@ def run_lightcone( lightcone.global_quantities[quantity][iz] = np.mean( getattr(coeval, quantity) ) - + # breakpoint() # Get lightcone slices if prev_coeval is not None: for quantity, idx, this_lc in lightconer.make_lightcone_slices( @@ -3593,7 +3606,7 @@ def run_lightcone( if this_lc is not None: lightcone.lightcones[quantity][..., idx] = this_lc lc_index = idx - + # breakpoint() if lightcone_filename: lightcone.make_checkpoint( lightcone_filename, redshift=z, index=lc_index From 1b354b4702914ab25862cd212f4e5fb676b10a78 Mon Sep 17 00:00:00 2001 From: JHu Date: Mon, 10 Feb 2025 18:46:02 +1100 Subject: [PATCH 081/145] add cuda version of filtering (3d index) --- src/py21cmfast/src/filtering.c | 127 ++++++++++--------- src/py21cmfast/src/filtering.cu | 217 ++++++++++++++++++++++++++++++++ 2 files changed, 286 insertions(+), 58 deletions(-) create mode 100644 src/py21cmfast/src/filtering.cu diff --git a/src/py21cmfast/src/filtering.c b/src/py21cmfast/src/filtering.c index 65c48912e..ff1f20719 100644 --- a/src/py21cmfast/src/filtering.c +++ b/src/py21cmfast/src/filtering.c @@ -17,6 +17,8 @@ #include "indexing.h" #include "dft.h" +#include "tiger_checks.h" + double real_tophat_filter(double kR){ //Second order taylor expansion around kR==0 if (kR < 1e-4) @@ -77,81 +79,90 @@ double spherical_shell_filter(double k, double R_outer, double R_inner){ } void filter_box(fftwf_complex *box, int RES, int filter_type, float R, float R_param){ - int dimension, midpoint; //TODO: figure out why defining as ULL breaks this + int dimension, midpoint, total_elements; //TODO: figure out why defining as ULL breaks this switch(RES) { case 0: dimension = user_params_global->DIM; midpoint = MIDDLE; + total_elements = KSPACE_NUM_PIXELS; break; case 1: dimension = user_params_global->HII_DIM; midpoint = HII_MIDDLE; + total_elements = HII_KSPACE_NUM_PIXELS; break; default: LOG_ERROR("Resolution for filter functions must be 0(DIM) or 1(HII_DIM)"); Throw(ValueError); break; } + int dim_z = (int)(user_params_global->NON_CUBIC_FACTOR * midpoint) + 1; + // printf("RES: %d\n", RES); + // printf("The total number of elements: %d\n", total_elements); + // printf("dimension: %d\n", dimension); + // printf("z dimension: %d\n", dim_z); - //setup constants if needed - double R_const; - if(filter_type == 3){ - R_const = exp(-R/R_param); - } + checkComplextype(box, total_elements, dimension,dim_z, midpoint, RES, filter_type, R, R_param); + + // setup constants if needed + // double R_const; + // if(filter_type == 3){ + // R_const = exp(-R/R_param); + // } // loop through k-box - #pragma omp parallel num_threads(user_params_global->N_THREADS) - { - int n_x, n_z, n_y; - float k_x, k_y, k_z, k_mag_sq, kR; - unsigned long long grid_index; - #pragma omp for - for (n_x=0; n_xmidpoint) {k_x =(n_x-dimension) * DELTA_K;} - else {k_x = n_x * DELTA_K;} - - for (n_y=0; n_ymidpoint) {k_y =(n_y-dimension) * DELTA_K;} - else {k_y = n_y * DELTA_K;} - - for (n_z=0; n_z<=(int)(user_params_global->NON_CUBIC_FACTOR*midpoint); n_z++){ - k_z = n_z * DELTA_K_PARA; - k_mag_sq = k_x*k_x + k_y*k_y + k_z*k_z; - - grid_index = RES==1 ? HII_C_INDEX(n_x, n_y, n_z) : C_INDEX(n_x, n_y, n_z); - - if (filter_type == 0){ // real space top-hat - kR = sqrt(k_mag_sq)*R; - box[grid_index] *= real_tophat_filter(kR); - } - else if (filter_type == 1){ // k-space top hat - //NOTE: why was this commented???? - // This is actually (kR^2) but since we zero the value and find kR > 1 this is more computationally efficient - // kR = 0.17103765852*( k_x*k_x + k_y*k_y + k_z*k_z )*R*R; - kR = sqrt(k_mag_sq)*R; - box[grid_index] *= sharp_k_filter(kR); - } - else if (filter_type == 2){ // gaussian - // This is actually (kR^2) but since we zero the value and find kR > 1 this is more computationally efficient - kR = k_mag_sq*R*R; - box[grid_index] *= gaussian_filter(kR); - } - //The next two filters are not given by the HII_FILTER global, but used for specific grids - else if (filter_type == 3){ // exponentially decaying tophat, param == scale of decay (MFP) - //NOTE: This should be optimized, I havne't looked at it in a while - box[grid_index] *= exp_mfp_filter(sqrt(k_mag_sq),R,R_param,R_const); - } - else if (filter_type == 4){ //spherical shell, R_param == inner radius - box[grid_index] *= spherical_shell_filter(sqrt(k_mag_sq),R,R_param); - } - else{ - if ( (n_x==0) && (n_y==0) && (n_z==0) ) - LOG_WARNING("Filter type %i is undefined. Box is unfiltered.", filter_type); - } - } - } - } // end looping through k box - } + // #pragma omp parallel num_threads(user_params_global->N_THREADS) + // { + // int n_x, n_z, n_y; + // float k_x, k_y, k_z, k_mag_sq, kR; + // unsigned long long grid_index; + // #pragma omp for + // for (n_x=0; n_xmidpoint) {k_x =(n_x-dimension) * DELTA_K;} + // else {k_x = n_x * DELTA_K;} + + // for (n_y=0; n_ymidpoint) {k_y =(n_y-dimension) * DELTA_K;} + // else {k_y = n_y * DELTA_K;} + + // for (n_z=0; n_z<=(int)(user_params_global->NON_CUBIC_FACTOR*midpoint); n_z++){ + // k_z = n_z * DELTA_K_PARA; + // k_mag_sq = k_x*k_x + k_y*k_y + k_z*k_z; + + // grid_index = RES==1 ? HII_C_INDEX(n_x, n_y, n_z) : C_INDEX(n_x, n_y, n_z); + + // if (filter_type == 0){ // real space top-hat + // kR = sqrt(k_mag_sq)*R; + // box[grid_index] *= real_tophat_filter(kR); + // } + // else if (filter_type == 1){ // k-space top hat + // //NOTE: why was this commented???? + // // This is actually (kR^2) but since we zero the value and find kR > 1 this is more computationally efficient + // // kR = 0.17103765852*( k_x*k_x + k_y*k_y + k_z*k_z )*R*R; + // kR = sqrt(k_mag_sq)*R; + // box[grid_index] *= sharp_k_filter(kR); + // } + // else if (filter_type == 2){ // gaussian + // // This is actually (kR^2) but since we zero the value and find kR > 1 this is more computationally efficient + // kR = k_mag_sq*R*R; + // box[grid_index] *= gaussian_filter(kR); + // } + // //The next two filters are not given by the HII_FILTER global, but used for specific grids + // else if (filter_type == 3){ // exponentially decaying tophat, param == scale of decay (MFP) + // //NOTE: This should be optimized, I havne't looked at it in a while + // box[grid_index] *= exp_mfp_filter(sqrt(k_mag_sq),R,R_param,R_const); + // } + // else if (filter_type == 4){ //spherical shell, R_param == inner radius + // box[grid_index] *= spherical_shell_filter(sqrt(k_mag_sq),R,R_param); + // } + // else{ + // if ( (n_x==0) && (n_y==0) && (n_z==0) ) + // LOG_WARNING("Filter type %i is undefined. Box is unfiltered.", filter_type); + // } + // } + // } + // } // end looping through k box + // } return; } diff --git a/src/py21cmfast/src/filtering.cu b/src/py21cmfast/src/filtering.cu new file mode 100644 index 000000000..d7b2aa0b4 --- /dev/null +++ b/src/py21cmfast/src/filtering.cu @@ -0,0 +1,217 @@ +#include +// #include +#include +#include +#include + +// #include "logger.h" + +#include "indexing.h" +#include "Constants.h" +#include "InputParameters.h" + +#include "tiger_checks.h" + +// device functions +__device__ double real_tophat_filter(double kR) +{ + // Second order taylor expansion around kR==0 + if (kR < 1e-4) + return 1 - kR * kR / 10; + return 3.0 * pow(kR, -3) * (sin(kR) - cos(kR) * kR); +} + +__device__ double sharp_k_filter(double kR) +{ + // equates integrated volume to the real space top-hat (9pi/2)^(-1/3) + if (kR * 0.413566994 > 1) + return 0.; + return 1; +} + +__device__ double gaussian_filter(double kR_squared) +{ + return exp(-0.643 * 0.643 * kR_squared / 2.); +} + +__device__ double exp_mfp_filter(double k, double R, double mfp, double exp_term) +{ + double f; + + double kR = k * R; + double ratio = mfp / R; + // Second order taylor expansion around kR==0 + // NOTE: the taylor coefficients could be stored and passed in + // but there aren't any super expensive operations here + // assuming the integer pow calls are optimized by the compiler + // test with the profiler + if (kR < 1e-4) + { + double ts_0 = 6 * pow(ratio, 3) - exp_term * (6 * pow(ratio, 3) + 6 * pow(ratio, 2) + 3 * ratio); + return ts_0 + (exp_term * (2 * pow(ratio, 2) + 0.5 * ratio) - 2 * ts_0 * pow(ratio, 2)) * kR * kR; + } + + // Davies & Furlanetto MFP-eps(r) window function + f = (kR * kR * pow(ratio, 2) + 2 * ratio + 1) * ratio * cos(kR); + f += (kR * kR * (pow(ratio, 2) - pow(ratio, 3)) + ratio + 1) * sin(kR) / kR; + f *= exp_term; + f -= 2 * pow(ratio, 2); + f *= -3 * ratio / pow(pow(kR * ratio, 2) + 1, 2); + return f; +} + +__device__ double spherical_shell_filter(double k, double R_outer, double R_inner) +{ + double kR_inner = k * R_inner; + double kR_outer = k * R_outer; + + // Second order taylor expansion around kR_outer==0 + if (kR_outer < 1e-4) + return 1. - kR_outer * kR_outer / 10 * + (pow(R_inner / R_outer, 5) - 1) / + (pow(R_inner / R_outer, 3) - 1); + + return 3.0 / (pow(kR_outer, 3) - pow(kR_inner, 3)) * (sin(kR_outer) - cos(kR_outer) * kR_outer - sin(kR_inner) + cos(kR_inner) * kR_inner); +} + +// kernel function +__global__ void printComplexArray(cufftComplex *box, int num_elements, int dim_x, int dim_y, int dim_z, int midpoint, int RES, int filter_type, float R, float R_param, double delta_k, double delta_k_para) +{ + // global x, y, z index + int x = blockIdx.x * blockDim.x + threadIdx.x; + int y = blockIdx.y * blockDim.y + threadIdx.y; + int z = blockIdx.z * blockDim.z + threadIdx.z; + + // index dependent calculation + float k_x, k_y, k_z, k_mag_sq, kR; + if (z > midpoint) + { + k_x = (z - dim_z) * delta_k; + } + else + { + k_x = z * delta_k; + } + + if (y > midpoint) + { + k_y = (y - dim_y) * delta_k; + } + else + { + k_y = y * delta_k; + } + + k_z = x * delta_k_para; + k_mag_sq = k_x * k_x + k_y * k_y + k_z * k_z; + + // setup constants if needed + double R_const; + if (filter_type == 3) + { + R_const = exp(-R / R_param); + } + + // data index + if (x < dim_x && y < dim_y && z < dim_z) + { + unsigned long long idx = x + dim_x * y + dim_x * dim_y * z; + + if (filter_type == 0) + { // real space top-hat + kR = sqrt(k_mag_sq) * R; + box[idx].x *= real_tophat_filter(kR); + box[idx].y *= real_tophat_filter(kR); + } + else if (filter_type == 1) + { // k-space top hat + // NOTE: why was this commented???? + // This is actually (kR^2) but since we zero the value and find kR > 1 this is more computationally efficient + // kR = 0.17103765852*( k_x*k_x + k_y*k_y + k_z*k_z )*R*R; + kR = sqrt(k_mag_sq) * R; + box[idx].x *= sharp_k_filter(kR); + box[idx].y *= sharp_k_filter(kR); + } + else if (filter_type == 2) + { // gaussian + // This is actually (kR^2) but since we zero the value and find kR > 1 this is more computationally efficient + kR = k_mag_sq * R * R; + box[idx].x *= gaussian_filter(kR); + box[idx].y *= gaussian_filter(kR); + } + // The next two filters are not given by the HII_FILTER global, but used for specific grids + else if (filter_type == 3) + { // exponentially decaying tophat, param == scale of decay (MFP) + // NOTE: This should be optimized, I havne't looked at it in a while + box[idx].x *= exp_mfp_filter(sqrt(k_mag_sq), R, R_param, R_const); + box[idx].y *= exp_mfp_filter(sqrt(k_mag_sq), R, R_param, R_const); + } + else if (filter_type == 4) + { // spherical shell, R_param == inner radius + box[idx].x *= spherical_shell_filter(sqrt(k_mag_sq), R, R_param); + box[idx].y *= spherical_shell_filter(sqrt(k_mag_sq), R, R_param); + } + // else + // { + // if ((x == 0) && (y == 0) && (z == 0)) + // LOG_WARNING("Filter type %i is undefined. Box is unfiltered.", filter_type); + // } + + // if (idx < num_elements) + // { + // printf("Device Element %llu: (%f, %f)\n", idx, d_array[idx].x, d_array[idx].y); + // } + } + + +} + +int checkComplextype(fftwf_complex *box, int total_elements, int xy_dim, int z_dim, int midpoint, int RES, int filter_type, float R, float R_param) +{ + const int num_elements = 16; // Number of elements to print + + // Print original host array for reference + // printf("Original fftwf_complex host array:\n"); + // for (int i = 0; i < num_elements; i++) + // { + // printf("Host Element %d: (%f, %f)\n", i, box[i][0], box[i][1]); + // } + // printf("The total number of elements: %d\n", total_elements); + + // Cast fftwf_complex to cufftComplex + cufftComplex *h_cu_box = reinterpret_cast(box); + // Allocate device memory for cufftComplex array + cufftComplex *d_cu_box; + cudaMalloc((void **)&d_cu_box, sizeof(cufftComplex) * total_elements); + + // Copy the cuComplex array from host to device + cudaMemcpy(d_cu_box, h_cu_box, sizeof(cufftComplex) * total_elements, cudaMemcpyHostToDevice); + + // Define threads layout. + int block_x = (z_dim + 3)/4; + int block_y = (xy_dim +7)/8; + int block_z = (xy_dim + 7)/8; + dim3 blockGrid(block_x, block_y, block_z); + dim3 threadsPerBlock(4,8,8); + + // pass the following macros as values + double delta_k = DELTA_K; + double delta_k_para = DELTA_K_PARA; + + // Launch the kernel to print the first few elements + printComplexArray<<>>(d_cu_box, num_elements, z_dim, xy_dim, xy_dim, midpoint, RES, filter_type, R, R_param, delta_k, delta_k_para); + + // Wait for the kernel to finish + cudaDeviceSynchronize(); + + // copy the data from device to host + cudaMemcpy(h_cu_box, d_cu_box, sizeof(cufftComplex) * total_elements, cudaMemcpyDeviceToHost); + + // Free device memory + cudaFree(d_cu_box); + + // Free host memory + // fftwf_free(box); + + return 0; +} From ec76419c6b684dd4f727d5778735511d09d067f4 Mon Sep 17 00:00:00 2001 From: JHu Date: Mon, 10 Feb 2025 18:47:53 +1100 Subject: [PATCH 082/145] add hello world cuda function for validation --- src/py21cmfast/src/hello_world.cu | 11 +++++++++++ src/py21cmfast/src/hello_world.h | 6 ++++++ 2 files changed, 17 insertions(+) create mode 100644 src/py21cmfast/src/hello_world.cu create mode 100644 src/py21cmfast/src/hello_world.h diff --git a/src/py21cmfast/src/hello_world.cu b/src/py21cmfast/src/hello_world.cu new file mode 100644 index 000000000..ab1a5595b --- /dev/null +++ b/src/py21cmfast/src/hello_world.cu @@ -0,0 +1,11 @@ +#include + +__global__ void hello_kernel() { + printf("Hello World from GPU! BlockIdx: %d, ThreadIdx: %d\n", blockIdx.x, threadIdx.x); +} + +extern "C" int call_cuda() { + hello_kernel<<<3, 3>>>(); + cudaDeviceSynchronize(); + return 0; +} \ No newline at end of file diff --git a/src/py21cmfast/src/hello_world.h b/src/py21cmfast/src/hello_world.h new file mode 100644 index 000000000..d11e30a1a --- /dev/null +++ b/src/py21cmfast/src/hello_world.h @@ -0,0 +1,6 @@ +#ifndef _HELLO_WORLD_H +#define _HELLO_WORLD_H + +int call_cuda(); + +#endif // HELLO_WORLD_H \ No newline at end of file From d92f42f944f302a7b0fc3db9b3aac398c696da5a Mon Sep 17 00:00:00 2001 From: JHu Date: Mon, 10 Feb 2025 18:48:58 +1100 Subject: [PATCH 083/145] adjust env file for use --- environment_dev.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/environment_dev.yml b/environment_dev.yml index 7c5635386..66b44974a 100644 --- a/environment_dev.yml +++ b/environment_dev.yml @@ -1,8 +1,8 @@ -name: 21cmfast +name: tiger21_dev_module channels: - defaults dependencies: - - python=3.9 +# - python=3.9 - sphinxcontrib-htmlhelp - zlib - pip @@ -113,7 +113,7 @@ dependencies: - h5py - jupyter - nb_conda - - pytest-plt +# - pytest-plt - questionary - - pip: - - pre-commit +# - pip: +# - pre-commit From bde0516dd64d776d2f9ef0830066e93ca80ae2b2 Mon Sep 17 00:00:00 2001 From: JHu Date: Mon, 10 Feb 2025 18:50:58 +1100 Subject: [PATCH 084/145] tmp change --- py21cmfast/c_21cmfast.c | 7507 ++++++++++++++++++++++ py21cmfast/c_21cmfast.o | Bin 0 -> 430416 bytes src/py21cmfast/src/logger.h | 25 +- src/py21cmfast/src/test_Stochasticity.cu | 46 + src/py21cmfast/src/tiger_checks.h | 17 + 5 files changed, 7587 insertions(+), 8 deletions(-) create mode 100644 py21cmfast/c_21cmfast.c create mode 100644 py21cmfast/c_21cmfast.o create mode 100644 src/py21cmfast/src/test_Stochasticity.cu create mode 100644 src/py21cmfast/src/tiger_checks.h diff --git a/py21cmfast/c_21cmfast.c b/py21cmfast/c_21cmfast.c new file mode 100644 index 000000000..38c90c1ab --- /dev/null +++ b/py21cmfast/c_21cmfast.c @@ -0,0 +1,7507 @@ +#define _CFFI_ + +/* We try to define Py_LIMITED_API before including Python.h. + + Mess: we can only define it if Py_DEBUG, Py_TRACE_REFS and + Py_REF_DEBUG are not defined. This is a best-effort approximation: + we can learn about Py_DEBUG from pyconfig.h, but it is unclear if + the same works for the other two macros. Py_DEBUG implies them, + but not the other way around. + + The implementation is messy (issue #350): on Windows, with _MSC_VER, + we have to define Py_LIMITED_API even before including pyconfig.h. + In that case, we guess what pyconfig.h will do to the macros above, + and check our guess after the #include. + + Note that on Windows, with CPython 3.x, you need >= 3.5 and virtualenv + version >= 16.0.0. With older versions of either, you don't get a + copy of PYTHON3.DLL in the virtualenv. We can't check the version of + CPython *before* we even include pyconfig.h. ffi.set_source() puts + a ``#define _CFFI_NO_LIMITED_API'' at the start of this file if it is + running on Windows < 3.5, as an attempt at fixing it, but that's + arguably wrong because it may not be the target version of Python. + Still better than nothing I guess. As another workaround, you can + remove the definition of Py_LIMITED_API here. + + See also 'py_limited_api' in cffi/setuptools_ext.py. +*/ +#if !defined(_CFFI_USE_EMBEDDING) && !defined(Py_LIMITED_API) +# ifdef _MSC_VER +# if !defined(_DEBUG) && !defined(Py_DEBUG) && !defined(Py_TRACE_REFS) && !defined(Py_REF_DEBUG) && !defined(_CFFI_NO_LIMITED_API) +# define Py_LIMITED_API +# endif +# include + /* sanity-check: Py_LIMITED_API will cause crashes if any of these + are also defined. Normally, the Python file PC/pyconfig.h does not + cause any of these to be defined, with the exception that _DEBUG + causes Py_DEBUG. Double-check that. */ +# ifdef Py_LIMITED_API +# if defined(Py_DEBUG) +# error "pyconfig.h unexpectedly defines Py_DEBUG, but Py_LIMITED_API is set" +# endif +# if defined(Py_TRACE_REFS) +# error "pyconfig.h unexpectedly defines Py_TRACE_REFS, but Py_LIMITED_API is set" +# endif +# if defined(Py_REF_DEBUG) +# error "pyconfig.h unexpectedly defines Py_REF_DEBUG, but Py_LIMITED_API is set" +# endif +# endif +# else +# include +# if !defined(Py_DEBUG) && !defined(Py_TRACE_REFS) && !defined(Py_REF_DEBUG) && !defined(_CFFI_NO_LIMITED_API) +# define Py_LIMITED_API +# endif +# endif +#endif + +#include +#ifdef __cplusplus +extern "C" { +#endif +#include + +/* This part is from file 'cffi/parse_c_type.h'. It is copied at the + beginning of C sources generated by CFFI's ffi.set_source(). */ + +typedef void *_cffi_opcode_t; + +#define _CFFI_OP(opcode, arg) (_cffi_opcode_t)(opcode | (((uintptr_t)(arg)) << 8)) +#define _CFFI_GETOP(cffi_opcode) ((unsigned char)(uintptr_t)cffi_opcode) +#define _CFFI_GETARG(cffi_opcode) (((intptr_t)cffi_opcode) >> 8) + +#define _CFFI_OP_PRIMITIVE 1 +#define _CFFI_OP_POINTER 3 +#define _CFFI_OP_ARRAY 5 +#define _CFFI_OP_OPEN_ARRAY 7 +#define _CFFI_OP_STRUCT_UNION 9 +#define _CFFI_OP_ENUM 11 +#define _CFFI_OP_FUNCTION 13 +#define _CFFI_OP_FUNCTION_END 15 +#define _CFFI_OP_NOOP 17 +#define _CFFI_OP_BITFIELD 19 +#define _CFFI_OP_TYPENAME 21 +#define _CFFI_OP_CPYTHON_BLTN_V 23 // varargs +#define _CFFI_OP_CPYTHON_BLTN_N 25 // noargs +#define _CFFI_OP_CPYTHON_BLTN_O 27 // O (i.e. a single arg) +#define _CFFI_OP_CONSTANT 29 +#define _CFFI_OP_CONSTANT_INT 31 +#define _CFFI_OP_GLOBAL_VAR 33 +#define _CFFI_OP_DLOPEN_FUNC 35 +#define _CFFI_OP_DLOPEN_CONST 37 +#define _CFFI_OP_GLOBAL_VAR_F 39 +#define _CFFI_OP_EXTERN_PYTHON 41 + +#define _CFFI_PRIM_VOID 0 +#define _CFFI_PRIM_BOOL 1 +#define _CFFI_PRIM_CHAR 2 +#define _CFFI_PRIM_SCHAR 3 +#define _CFFI_PRIM_UCHAR 4 +#define _CFFI_PRIM_SHORT 5 +#define _CFFI_PRIM_USHORT 6 +#define _CFFI_PRIM_INT 7 +#define _CFFI_PRIM_UINT 8 +#define _CFFI_PRIM_LONG 9 +#define _CFFI_PRIM_ULONG 10 +#define _CFFI_PRIM_LONGLONG 11 +#define _CFFI_PRIM_ULONGLONG 12 +#define _CFFI_PRIM_FLOAT 13 +#define _CFFI_PRIM_DOUBLE 14 +#define _CFFI_PRIM_LONGDOUBLE 15 + +#define _CFFI_PRIM_WCHAR 16 +#define _CFFI_PRIM_INT8 17 +#define _CFFI_PRIM_UINT8 18 +#define _CFFI_PRIM_INT16 19 +#define _CFFI_PRIM_UINT16 20 +#define _CFFI_PRIM_INT32 21 +#define _CFFI_PRIM_UINT32 22 +#define _CFFI_PRIM_INT64 23 +#define _CFFI_PRIM_UINT64 24 +#define _CFFI_PRIM_INTPTR 25 +#define _CFFI_PRIM_UINTPTR 26 +#define _CFFI_PRIM_PTRDIFF 27 +#define _CFFI_PRIM_SIZE 28 +#define _CFFI_PRIM_SSIZE 29 +#define _CFFI_PRIM_INT_LEAST8 30 +#define _CFFI_PRIM_UINT_LEAST8 31 +#define _CFFI_PRIM_INT_LEAST16 32 +#define _CFFI_PRIM_UINT_LEAST16 33 +#define _CFFI_PRIM_INT_LEAST32 34 +#define _CFFI_PRIM_UINT_LEAST32 35 +#define _CFFI_PRIM_INT_LEAST64 36 +#define _CFFI_PRIM_UINT_LEAST64 37 +#define _CFFI_PRIM_INT_FAST8 38 +#define _CFFI_PRIM_UINT_FAST8 39 +#define _CFFI_PRIM_INT_FAST16 40 +#define _CFFI_PRIM_UINT_FAST16 41 +#define _CFFI_PRIM_INT_FAST32 42 +#define _CFFI_PRIM_UINT_FAST32 43 +#define _CFFI_PRIM_INT_FAST64 44 +#define _CFFI_PRIM_UINT_FAST64 45 +#define _CFFI_PRIM_INTMAX 46 +#define _CFFI_PRIM_UINTMAX 47 +#define _CFFI_PRIM_FLOATCOMPLEX 48 +#define _CFFI_PRIM_DOUBLECOMPLEX 49 +#define _CFFI_PRIM_CHAR16 50 +#define _CFFI_PRIM_CHAR32 51 + +#define _CFFI__NUM_PRIM 52 +#define _CFFI__UNKNOWN_PRIM (-1) +#define _CFFI__UNKNOWN_FLOAT_PRIM (-2) +#define _CFFI__UNKNOWN_LONG_DOUBLE (-3) + +#define _CFFI__IO_FILE_STRUCT (-1) + + +struct _cffi_global_s { + const char *name; + void *address; + _cffi_opcode_t type_op; + void *size_or_direct_fn; // OP_GLOBAL_VAR: size, or 0 if unknown + // OP_CPYTHON_BLTN_*: addr of direct function +}; + +struct _cffi_getconst_s { + unsigned long long value; + const struct _cffi_type_context_s *ctx; + int gindex; +}; + +struct _cffi_struct_union_s { + const char *name; + int type_index; // -> _cffi_types, on a OP_STRUCT_UNION + int flags; // _CFFI_F_* flags below + size_t size; + int alignment; + int first_field_index; // -> _cffi_fields array + int num_fields; +}; +#define _CFFI_F_UNION 0x01 // is a union, not a struct +#define _CFFI_F_CHECK_FIELDS 0x02 // complain if fields are not in the + // "standard layout" or if some are missing +#define _CFFI_F_PACKED 0x04 // for CHECK_FIELDS, assume a packed struct +#define _CFFI_F_EXTERNAL 0x08 // in some other ffi.include() +#define _CFFI_F_OPAQUE 0x10 // opaque + +struct _cffi_field_s { + const char *name; + size_t field_offset; + size_t field_size; + _cffi_opcode_t field_type_op; +}; + +struct _cffi_enum_s { + const char *name; + int type_index; // -> _cffi_types, on a OP_ENUM + int type_prim; // _CFFI_PRIM_xxx + const char *enumerators; // comma-delimited string +}; + +struct _cffi_typename_s { + const char *name; + int type_index; /* if opaque, points to a possibly artificial + OP_STRUCT which is itself opaque */ +}; + +struct _cffi_type_context_s { + _cffi_opcode_t *types; + const struct _cffi_global_s *globals; + const struct _cffi_field_s *fields; + const struct _cffi_struct_union_s *struct_unions; + const struct _cffi_enum_s *enums; + const struct _cffi_typename_s *typenames; + int num_globals; + int num_struct_unions; + int num_enums; + int num_typenames; + const char *const *includes; + int num_types; + int flags; /* future extension */ +}; + +struct _cffi_parse_info_s { + const struct _cffi_type_context_s *ctx; + _cffi_opcode_t *output; + unsigned int output_size; + size_t error_location; + const char *error_message; +}; + +struct _cffi_externpy_s { + const char *name; + size_t size_of_result; + void *reserved1, *reserved2; +}; + +#ifdef _CFFI_INTERNAL +static int parse_c_type(struct _cffi_parse_info_s *info, const char *input); +static int search_in_globals(const struct _cffi_type_context_s *ctx, + const char *search, size_t search_len); +static int search_in_struct_unions(const struct _cffi_type_context_s *ctx, + const char *search, size_t search_len); +#endif + +/* this block of #ifs should be kept exactly identical between + c/_cffi_backend.c, cffi/vengine_cpy.py, cffi/vengine_gen.py + and cffi/_cffi_include.h */ +#if defined(_MSC_VER) +# include /* for alloca() */ +# if _MSC_VER < 1600 /* MSVC < 2010 */ + typedef __int8 int8_t; + typedef __int16 int16_t; + typedef __int32 int32_t; + typedef __int64 int64_t; + typedef unsigned __int8 uint8_t; + typedef unsigned __int16 uint16_t; + typedef unsigned __int32 uint32_t; + typedef unsigned __int64 uint64_t; + typedef __int8 int_least8_t; + typedef __int16 int_least16_t; + typedef __int32 int_least32_t; + typedef __int64 int_least64_t; + typedef unsigned __int8 uint_least8_t; + typedef unsigned __int16 uint_least16_t; + typedef unsigned __int32 uint_least32_t; + typedef unsigned __int64 uint_least64_t; + typedef __int8 int_fast8_t; + typedef __int16 int_fast16_t; + typedef __int32 int_fast32_t; + typedef __int64 int_fast64_t; + typedef unsigned __int8 uint_fast8_t; + typedef unsigned __int16 uint_fast16_t; + typedef unsigned __int32 uint_fast32_t; + typedef unsigned __int64 uint_fast64_t; + typedef __int64 intmax_t; + typedef unsigned __int64 uintmax_t; +# else +# include +# endif +# if _MSC_VER < 1800 /* MSVC < 2013 */ +# ifndef __cplusplus + typedef unsigned char _Bool; +# endif +# endif +#else +# include +# if (defined (__SVR4) && defined (__sun)) || defined(_AIX) || defined(__hpux) +# include +# endif +#endif + +#ifdef __GNUC__ +# define _CFFI_UNUSED_FN __attribute__((unused)) +#else +# define _CFFI_UNUSED_FN /* nothing */ +#endif + +#ifdef __cplusplus +# ifndef _Bool + typedef bool _Bool; /* semi-hackish: C++ has no _Bool; bool is builtin */ +# endif +#endif + +/********** CPython-specific section **********/ +#ifndef PYPY_VERSION + + +#if PY_MAJOR_VERSION >= 3 +# define PyInt_FromLong PyLong_FromLong +#endif + +#define _cffi_from_c_double PyFloat_FromDouble +#define _cffi_from_c_float PyFloat_FromDouble +#define _cffi_from_c_long PyInt_FromLong +#define _cffi_from_c_ulong PyLong_FromUnsignedLong +#define _cffi_from_c_longlong PyLong_FromLongLong +#define _cffi_from_c_ulonglong PyLong_FromUnsignedLongLong +#define _cffi_from_c__Bool PyBool_FromLong + +#define _cffi_to_c_double PyFloat_AsDouble +#define _cffi_to_c_float PyFloat_AsDouble + +#define _cffi_from_c_int(x, type) \ + (((type)-1) > 0 ? /* unsigned */ \ + (sizeof(type) < sizeof(long) ? \ + PyInt_FromLong((long)x) : \ + sizeof(type) == sizeof(long) ? \ + PyLong_FromUnsignedLong((unsigned long)x) : \ + PyLong_FromUnsignedLongLong((unsigned long long)x)) : \ + (sizeof(type) <= sizeof(long) ? \ + PyInt_FromLong((long)x) : \ + PyLong_FromLongLong((long long)x))) + +#define _cffi_to_c_int(o, type) \ + ((type)( \ + sizeof(type) == 1 ? (((type)-1) > 0 ? (type)_cffi_to_c_u8(o) \ + : (type)_cffi_to_c_i8(o)) : \ + sizeof(type) == 2 ? (((type)-1) > 0 ? (type)_cffi_to_c_u16(o) \ + : (type)_cffi_to_c_i16(o)) : \ + sizeof(type) == 4 ? (((type)-1) > 0 ? (type)_cffi_to_c_u32(o) \ + : (type)_cffi_to_c_i32(o)) : \ + sizeof(type) == 8 ? (((type)-1) > 0 ? (type)_cffi_to_c_u64(o) \ + : (type)_cffi_to_c_i64(o)) : \ + (Py_FatalError("unsupported size for type " #type), (type)0))) + +#define _cffi_to_c_i8 \ + ((int(*)(PyObject *))_cffi_exports[1]) +#define _cffi_to_c_u8 \ + ((int(*)(PyObject *))_cffi_exports[2]) +#define _cffi_to_c_i16 \ + ((int(*)(PyObject *))_cffi_exports[3]) +#define _cffi_to_c_u16 \ + ((int(*)(PyObject *))_cffi_exports[4]) +#define _cffi_to_c_i32 \ + ((int(*)(PyObject *))_cffi_exports[5]) +#define _cffi_to_c_u32 \ + ((unsigned int(*)(PyObject *))_cffi_exports[6]) +#define _cffi_to_c_i64 \ + ((long long(*)(PyObject *))_cffi_exports[7]) +#define _cffi_to_c_u64 \ + ((unsigned long long(*)(PyObject *))_cffi_exports[8]) +#define _cffi_to_c_char \ + ((int(*)(PyObject *))_cffi_exports[9]) +#define _cffi_from_c_pointer \ + ((PyObject *(*)(char *, struct _cffi_ctypedescr *))_cffi_exports[10]) +#define _cffi_to_c_pointer \ + ((char *(*)(PyObject *, struct _cffi_ctypedescr *))_cffi_exports[11]) +#define _cffi_get_struct_layout \ + not used any more +#define _cffi_restore_errno \ + ((void(*)(void))_cffi_exports[13]) +#define _cffi_save_errno \ + ((void(*)(void))_cffi_exports[14]) +#define _cffi_from_c_char \ + ((PyObject *(*)(char))_cffi_exports[15]) +#define _cffi_from_c_deref \ + ((PyObject *(*)(char *, struct _cffi_ctypedescr *))_cffi_exports[16]) +#define _cffi_to_c \ + ((int(*)(char *, struct _cffi_ctypedescr *, PyObject *))_cffi_exports[17]) +#define _cffi_from_c_struct \ + ((PyObject *(*)(char *, struct _cffi_ctypedescr *))_cffi_exports[18]) +#define _cffi_to_c_wchar_t \ + ((_cffi_wchar_t(*)(PyObject *))_cffi_exports[19]) +#define _cffi_from_c_wchar_t \ + ((PyObject *(*)(_cffi_wchar_t))_cffi_exports[20]) +#define _cffi_to_c_long_double \ + ((long double(*)(PyObject *))_cffi_exports[21]) +#define _cffi_to_c__Bool \ + ((_Bool(*)(PyObject *))_cffi_exports[22]) +#define _cffi_prepare_pointer_call_argument \ + ((Py_ssize_t(*)(struct _cffi_ctypedescr *, \ + PyObject *, char **))_cffi_exports[23]) +#define _cffi_convert_array_from_object \ + ((int(*)(char *, struct _cffi_ctypedescr *, PyObject *))_cffi_exports[24]) +#define _CFFI_CPIDX 25 +#define _cffi_call_python \ + ((void(*)(struct _cffi_externpy_s *, char *))_cffi_exports[_CFFI_CPIDX]) +#define _cffi_to_c_wchar3216_t \ + ((int(*)(PyObject *))_cffi_exports[26]) +#define _cffi_from_c_wchar3216_t \ + ((PyObject *(*)(int))_cffi_exports[27]) +#define _CFFI_NUM_EXPORTS 28 + +struct _cffi_ctypedescr; + +static void *_cffi_exports[_CFFI_NUM_EXPORTS]; + +#define _cffi_type(index) ( \ + assert((((uintptr_t)_cffi_types[index]) & 1) == 0), \ + (struct _cffi_ctypedescr *)_cffi_types[index]) + +static PyObject *_cffi_init(const char *module_name, Py_ssize_t version, + const struct _cffi_type_context_s *ctx) +{ + PyObject *module, *o_arg, *new_module; + void *raw[] = { + (void *)module_name, + (void *)version, + (void *)_cffi_exports, + (void *)ctx, + }; + + module = PyImport_ImportModule("_cffi_backend"); + if (module == NULL) + goto failure; + + o_arg = PyLong_FromVoidPtr((void *)raw); + if (o_arg == NULL) + goto failure; + + new_module = PyObject_CallMethod( + module, (char *)"_init_cffi_1_0_external_module", (char *)"O", o_arg); + + Py_DECREF(o_arg); + Py_DECREF(module); + return new_module; + + failure: + Py_XDECREF(module); + return NULL; +} + + +#ifdef HAVE_WCHAR_H +typedef wchar_t _cffi_wchar_t; +#else +typedef uint16_t _cffi_wchar_t; /* same random pick as _cffi_backend.c */ +#endif + +_CFFI_UNUSED_FN static uint16_t _cffi_to_c_char16_t(PyObject *o) +{ + if (sizeof(_cffi_wchar_t) == 2) + return (uint16_t)_cffi_to_c_wchar_t(o); + else + return (uint16_t)_cffi_to_c_wchar3216_t(o); +} + +_CFFI_UNUSED_FN static PyObject *_cffi_from_c_char16_t(uint16_t x) +{ + if (sizeof(_cffi_wchar_t) == 2) + return _cffi_from_c_wchar_t((_cffi_wchar_t)x); + else + return _cffi_from_c_wchar3216_t((int)x); +} + +_CFFI_UNUSED_FN static int _cffi_to_c_char32_t(PyObject *o) +{ + if (sizeof(_cffi_wchar_t) == 4) + return (int)_cffi_to_c_wchar_t(o); + else + return (int)_cffi_to_c_wchar3216_t(o); +} + +_CFFI_UNUSED_FN static PyObject *_cffi_from_c_char32_t(unsigned int x) +{ + if (sizeof(_cffi_wchar_t) == 4) + return _cffi_from_c_wchar_t((_cffi_wchar_t)x); + else + return _cffi_from_c_wchar3216_t((int)x); +} + +union _cffi_union_alignment_u { + unsigned char m_char; + unsigned short m_short; + unsigned int m_int; + unsigned long m_long; + unsigned long long m_longlong; + float m_float; + double m_double; + long double m_longdouble; +}; + +struct _cffi_freeme_s { + struct _cffi_freeme_s *next; + union _cffi_union_alignment_u alignment; +}; + +_CFFI_UNUSED_FN static int +_cffi_convert_array_argument(struct _cffi_ctypedescr *ctptr, PyObject *arg, + char **output_data, Py_ssize_t datasize, + struct _cffi_freeme_s **freeme) +{ + char *p; + if (datasize < 0) + return -1; + + p = *output_data; + if (p == NULL) { + struct _cffi_freeme_s *fp = (struct _cffi_freeme_s *)PyObject_Malloc( + offsetof(struct _cffi_freeme_s, alignment) + (size_t)datasize); + if (fp == NULL) + return -1; + fp->next = *freeme; + *freeme = fp; + p = *output_data = (char *)&fp->alignment; + } + memset((void *)p, 0, (size_t)datasize); + return _cffi_convert_array_from_object(p, ctptr, arg); +} + +_CFFI_UNUSED_FN static void +_cffi_free_array_arguments(struct _cffi_freeme_s *freeme) +{ + do { + void *p = (void *)freeme; + freeme = freeme->next; + PyObject_Free(p); + } while (freeme != NULL); +} + +/********** end CPython-specific section **********/ +#else +_CFFI_UNUSED_FN +static void (*_cffi_call_python_org)(struct _cffi_externpy_s *, char *); +# define _cffi_call_python _cffi_call_python_org +#endif + + +#define _cffi_array_len(array) (sizeof(array) / sizeof((array)[0])) + +#define _cffi_prim_int(size, sign) \ + ((size) == 1 ? ((sign) ? _CFFI_PRIM_INT8 : _CFFI_PRIM_UINT8) : \ + (size) == 2 ? ((sign) ? _CFFI_PRIM_INT16 : _CFFI_PRIM_UINT16) : \ + (size) == 4 ? ((sign) ? _CFFI_PRIM_INT32 : _CFFI_PRIM_UINT32) : \ + (size) == 8 ? ((sign) ? _CFFI_PRIM_INT64 : _CFFI_PRIM_UINT64) : \ + _CFFI__UNKNOWN_PRIM) + +#define _cffi_prim_float(size) \ + ((size) == sizeof(float) ? _CFFI_PRIM_FLOAT : \ + (size) == sizeof(double) ? _CFFI_PRIM_DOUBLE : \ + (size) == sizeof(long double) ? _CFFI__UNKNOWN_LONG_DOUBLE : \ + _CFFI__UNKNOWN_FLOAT_PRIM) + +#define _cffi_check_int(got, got_nonpos, expected) \ + ((got_nonpos) == (expected <= 0) && \ + (got) == (unsigned long long)expected) + +#ifdef MS_WIN32 +# define _cffi_stdcall __stdcall +#else +# define _cffi_stdcall /* nothing */ +#endif + +#ifdef __cplusplus +} +#endif + +/************************************************************/ + + + #include "21cmFAST.h" + + +/************************************************************/ + +static void *_cffi_types[] = { +/* 0 */ _CFFI_OP(_CFFI_OP_FUNCTION, 1), // double()(double) +/* 1 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), // double +/* 2 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), +/* 3 */ _CFFI_OP(_CFFI_OP_FUNCTION, 1), // double()(double, UserParams *, CosmoParams *, AstroParams *, FlagOptions *) +/* 4 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 5 */ _CFFI_OP(_CFFI_OP_POINTER, 514), // UserParams * +/* 6 */ _CFFI_OP(_CFFI_OP_POINTER, 504), // CosmoParams * +/* 7 */ _CFFI_OP(_CFFI_OP_POINTER, 502), // AstroParams * +/* 8 */ _CFFI_OP(_CFFI_OP_POINTER, 505), // FlagOptions * +/* 9 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), +/* 10 */ _CFFI_OP(_CFFI_OP_FUNCTION, 1), // double()(double, double) +/* 11 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 12 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 13 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), +/* 14 */ _CFFI_OP(_CFFI_OP_FUNCTION, 1), // double()(double, double, double) +/* 15 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 16 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 17 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 18 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), +/* 19 */ _CFFI_OP(_CFFI_OP_FUNCTION, 1), // double()(double, double, double, double) +/* 20 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 21 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 22 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 23 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 24 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), +/* 25 */ _CFFI_OP(_CFFI_OP_FUNCTION, 1), // double()(double, double, double, double, double, double, double) +/* 26 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 27 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 28 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 29 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 30 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 31 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 32 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 33 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), +/* 34 */ _CFFI_OP(_CFFI_OP_FUNCTION, 1), // double()(double, double, double, double, double, double, double, double) +/* 35 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 36 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 37 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 38 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 39 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 40 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 41 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 42 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 43 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), +/* 44 */ _CFFI_OP(_CFFI_OP_FUNCTION, 1), // double()(double, double, double, double, double, double, double, double, double) +/* 45 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 46 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 47 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 48 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 49 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 50 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 51 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 52 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 53 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 54 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), +/* 55 */ _CFFI_OP(_CFFI_OP_FUNCTION, 1), // double()(double, double, double, double, double, double, double, double, double, _Bool) +/* 56 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 57 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 58 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 59 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 60 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 61 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 62 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 63 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 64 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 65 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 1), // _Bool +/* 66 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), +/* 67 */ _CFFI_OP(_CFFI_OP_FUNCTION, 1), // double()(double, double, double, double, double, double, double, double, double, double) +/* 68 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 69 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 70 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 71 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 72 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 73 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 74 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 75 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 76 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 77 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 78 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), +/* 79 */ _CFFI_OP(_CFFI_OP_FUNCTION, 1), // double()(double, double, double, double, double, double, double, double, double, double, _Bool) +/* 80 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 81 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 82 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 83 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 84 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 85 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 86 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 87 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 88 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 89 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 90 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 1), +/* 91 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), +/* 92 */ _CFFI_OP(_CFFI_OP_FUNCTION, 1), // double()(double, double, double, double, double, double, double, double, double, double, double) +/* 93 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 94 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 95 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 96 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 97 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 98 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 99 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 100 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 101 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 102 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 103 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 104 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), +/* 105 */ _CFFI_OP(_CFFI_OP_FUNCTION, 1), // double()(double, double, double, double, double, double, double, double, double, double, double, double, double, double, int) +/* 106 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 107 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 108 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 109 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 110 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 111 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 112 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 113 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 114 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 115 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 116 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 117 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 118 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 119 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 120 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 7), // int +/* 121 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), +/* 122 */ _CFFI_OP(_CFFI_OP_FUNCTION, 1), // double()(double, double, double, double, double, double, double, double, double, double, double, double, double, int) +/* 123 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 124 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 125 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 126 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 127 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 128 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 129 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 130 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 131 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 132 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 133 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 134 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 135 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 136 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 7), +/* 137 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), +/* 138 */ _CFFI_OP(_CFFI_OP_FUNCTION, 1), // double()(double, double, double, double, double, double, int) +/* 139 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 140 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 141 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 142 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 143 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 144 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 145 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 7), +/* 146 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), +/* 147 */ _CFFI_OP(_CFFI_OP_FUNCTION, 1), // double()(double, double, double, double, int) +/* 148 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 149 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 150 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 151 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 152 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 7), +/* 153 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), +/* 154 */ _CFFI_OP(_CFFI_OP_FUNCTION, 1), // double()(double, double, double, int) +/* 155 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 156 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 157 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 158 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 7), +/* 159 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), +/* 160 */ _CFFI_OP(_CFFI_OP_FUNCTION, 1), // double()(float) +/* 161 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), // float +/* 162 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), +/* 163 */ _CFFI_OP(_CFFI_OP_FUNCTION, 1), // double()(int, double, double) +/* 164 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 7), +/* 165 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 166 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 167 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), +/* 168 */ _CFFI_OP(_CFFI_OP_FUNCTION, 1), // double()(void) +/* 169 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), +/* 170 */ _CFFI_OP(_CFFI_OP_FUNCTION, 161), // float()(UserParams *, CosmoParams *, int, float *, float *) +/* 171 */ _CFFI_OP(_CFFI_OP_NOOP, 5), +/* 172 */ _CFFI_OP(_CFFI_OP_NOOP, 6), +/* 173 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 7), +/* 174 */ _CFFI_OP(_CFFI_OP_POINTER, 161), // float * +/* 175 */ _CFFI_OP(_CFFI_OP_NOOP, 174), +/* 176 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), +/* 177 */ _CFFI_OP(_CFFI_OP_FUNCTION, 120), // int()(UserParams *, CosmoParams *) +/* 178 */ _CFFI_OP(_CFFI_OP_NOOP, 5), +/* 179 */ _CFFI_OP(_CFFI_OP_NOOP, 6), +/* 180 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), +/* 181 */ _CFFI_OP(_CFFI_OP_FUNCTION, 120), // int()(UserParams *, CosmoParams *, AstroParams *, FlagOptions *) +/* 182 */ _CFFI_OP(_CFFI_OP_NOOP, 5), +/* 183 */ _CFFI_OP(_CFFI_OP_NOOP, 6), +/* 184 */ _CFFI_OP(_CFFI_OP_NOOP, 7), +/* 185 */ _CFFI_OP(_CFFI_OP_NOOP, 8), +/* 186 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), +/* 187 */ _CFFI_OP(_CFFI_OP_FUNCTION, 120), // int()(UserParams *, CosmoParams *, AstroParams *, FlagOptions *, HaloBox *, double, double, int, XraySourceBox *) +/* 188 */ _CFFI_OP(_CFFI_OP_NOOP, 5), +/* 189 */ _CFFI_OP(_CFFI_OP_NOOP, 6), +/* 190 */ _CFFI_OP(_CFFI_OP_NOOP, 7), +/* 191 */ _CFFI_OP(_CFFI_OP_NOOP, 8), +/* 192 */ _CFFI_OP(_CFFI_OP_POINTER, 507), // HaloBox * +/* 193 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 194 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 195 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 7), +/* 196 */ _CFFI_OP(_CFFI_OP_POINTER, 515), // XraySourceBox * +/* 197 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), +/* 198 */ _CFFI_OP(_CFFI_OP_FUNCTION, 120), // int()(UserParams *, CosmoParams *, AstroParams *, FlagOptions *, float *, double, double, int, double *) +/* 199 */ _CFFI_OP(_CFFI_OP_NOOP, 5), +/* 200 */ _CFFI_OP(_CFFI_OP_NOOP, 6), +/* 201 */ _CFFI_OP(_CFFI_OP_NOOP, 7), +/* 202 */ _CFFI_OP(_CFFI_OP_NOOP, 8), +/* 203 */ _CFFI_OP(_CFFI_OP_NOOP, 174), +/* 204 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 205 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 206 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 7), +/* 207 */ _CFFI_OP(_CFFI_OP_POINTER, 1), // double * +/* 208 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), +/* 209 */ _CFFI_OP(_CFFI_OP_FUNCTION, 120), // int()(UserParams *, CosmoParams *, AstroParams *, FlagOptions *, int, int, float *, int *, double, double, int *, int *, double *, double *, double *, float *, int *) +/* 210 */ _CFFI_OP(_CFFI_OP_NOOP, 5), +/* 211 */ _CFFI_OP(_CFFI_OP_NOOP, 6), +/* 212 */ _CFFI_OP(_CFFI_OP_NOOP, 7), +/* 213 */ _CFFI_OP(_CFFI_OP_NOOP, 8), +/* 214 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 7), +/* 215 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 7), +/* 216 */ _CFFI_OP(_CFFI_OP_NOOP, 174), +/* 217 */ _CFFI_OP(_CFFI_OP_POINTER, 120), // int * +/* 218 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 219 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 220 */ _CFFI_OP(_CFFI_OP_NOOP, 217), +/* 221 */ _CFFI_OP(_CFFI_OP_NOOP, 217), +/* 222 */ _CFFI_OP(_CFFI_OP_NOOP, 207), +/* 223 */ _CFFI_OP(_CFFI_OP_NOOP, 207), +/* 224 */ _CFFI_OP(_CFFI_OP_NOOP, 207), +/* 225 */ _CFFI_OP(_CFFI_OP_NOOP, 174), +/* 226 */ _CFFI_OP(_CFFI_OP_NOOP, 217), +/* 227 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), +/* 228 */ _CFFI_OP(_CFFI_OP_FUNCTION, 120), // int()(_Bool) +/* 229 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 1), +/* 230 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), +/* 231 */ _CFFI_OP(_CFFI_OP_FUNCTION, 120), // int()(_Bool, _Bool, double *) +/* 232 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 1), +/* 233 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 1), +/* 234 */ _CFFI_OP(_CFFI_OP_NOOP, 207), +/* 235 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), +/* 236 */ _CFFI_OP(_CFFI_OP_FUNCTION, 120), // int()(double *) +/* 237 */ _CFFI_OP(_CFFI_OP_NOOP, 207), +/* 238 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), +/* 239 */ _CFFI_OP(_CFFI_OP_FUNCTION, 120), // int()(double *, double *, int *, double *, double *, int *, double *, double *, int *) +/* 240 */ _CFFI_OP(_CFFI_OP_NOOP, 207), +/* 241 */ _CFFI_OP(_CFFI_OP_NOOP, 207), +/* 242 */ _CFFI_OP(_CFFI_OP_NOOP, 217), +/* 243 */ _CFFI_OP(_CFFI_OP_NOOP, 207), +/* 244 */ _CFFI_OP(_CFFI_OP_NOOP, 207), +/* 245 */ _CFFI_OP(_CFFI_OP_NOOP, 217), +/* 246 */ _CFFI_OP(_CFFI_OP_NOOP, 207), +/* 247 */ _CFFI_OP(_CFFI_OP_NOOP, 207), +/* 248 */ _CFFI_OP(_CFFI_OP_NOOP, 217), +/* 249 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), +/* 250 */ _CFFI_OP(_CFFI_OP_FUNCTION, 120), // int()(double *, double *, int) +/* 251 */ _CFFI_OP(_CFFI_OP_NOOP, 207), +/* 252 */ _CFFI_OP(_CFFI_OP_NOOP, 207), +/* 253 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 7), +/* 254 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), +/* 255 */ _CFFI_OP(_CFFI_OP_FUNCTION, 120), // int()(double, UserParams *, CosmoParams *, AstroParams *, FlagOptions *, InitialConditions *, PerturbedField *, PerturbHaloField *, TsBox *, IonizedBox *, HaloBox *) +/* 256 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 257 */ _CFFI_OP(_CFFI_OP_NOOP, 5), +/* 258 */ _CFFI_OP(_CFFI_OP_NOOP, 6), +/* 259 */ _CFFI_OP(_CFFI_OP_NOOP, 7), +/* 260 */ _CFFI_OP(_CFFI_OP_NOOP, 8), +/* 261 */ _CFFI_OP(_CFFI_OP_POINTER, 509), // InitialConditions * +/* 262 */ _CFFI_OP(_CFFI_OP_POINTER, 512), // PerturbedField * +/* 263 */ _CFFI_OP(_CFFI_OP_POINTER, 511), // PerturbHaloField * +/* 264 */ _CFFI_OP(_CFFI_OP_POINTER, 513), // TsBox * +/* 265 */ _CFFI_OP(_CFFI_OP_POINTER, 510), // IonizedBox * +/* 266 */ _CFFI_OP(_CFFI_OP_NOOP, 192), +/* 267 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), +/* 268 */ _CFFI_OP(_CFFI_OP_FUNCTION, 120), // int()(double, UserParams *, CosmoParams *, AstroParams *, FlagOptions *, float *, float *, float *, float *, PerturbHaloField *, float *) +/* 269 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 270 */ _CFFI_OP(_CFFI_OP_NOOP, 5), +/* 271 */ _CFFI_OP(_CFFI_OP_NOOP, 6), +/* 272 */ _CFFI_OP(_CFFI_OP_NOOP, 7), +/* 273 */ _CFFI_OP(_CFFI_OP_NOOP, 8), +/* 274 */ _CFFI_OP(_CFFI_OP_NOOP, 174), +/* 275 */ _CFFI_OP(_CFFI_OP_NOOP, 174), +/* 276 */ _CFFI_OP(_CFFI_OP_NOOP, 174), +/* 277 */ _CFFI_OP(_CFFI_OP_NOOP, 174), +/* 278 */ _CFFI_OP(_CFFI_OP_NOOP, 263), +/* 279 */ _CFFI_OP(_CFFI_OP_NOOP, 174), +/* 280 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), +/* 281 */ _CFFI_OP(_CFFI_OP_FUNCTION, 120), // int()(float, UserParams *, CosmoParams *, AstroParams *, FlagOptions *, InitialConditions *, HaloField *, PerturbHaloField *) +/* 282 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), +/* 283 */ _CFFI_OP(_CFFI_OP_NOOP, 5), +/* 284 */ _CFFI_OP(_CFFI_OP_NOOP, 6), +/* 285 */ _CFFI_OP(_CFFI_OP_NOOP, 7), +/* 286 */ _CFFI_OP(_CFFI_OP_NOOP, 8), +/* 287 */ _CFFI_OP(_CFFI_OP_NOOP, 261), +/* 288 */ _CFFI_OP(_CFFI_OP_POINTER, 508), // HaloField * +/* 289 */ _CFFI_OP(_CFFI_OP_NOOP, 263), +/* 290 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), +/* 291 */ _CFFI_OP(_CFFI_OP_FUNCTION, 120), // int()(float, UserParams *, CosmoParams *, AstroParams *, FlagOptions *, TsBox *, IonizedBox *, PerturbedField *, BrightnessTemp *) +/* 292 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), +/* 293 */ _CFFI_OP(_CFFI_OP_NOOP, 5), +/* 294 */ _CFFI_OP(_CFFI_OP_NOOP, 6), +/* 295 */ _CFFI_OP(_CFFI_OP_NOOP, 7), +/* 296 */ _CFFI_OP(_CFFI_OP_NOOP, 8), +/* 297 */ _CFFI_OP(_CFFI_OP_NOOP, 264), +/* 298 */ _CFFI_OP(_CFFI_OP_NOOP, 265), +/* 299 */ _CFFI_OP(_CFFI_OP_NOOP, 262), +/* 300 */ _CFFI_OP(_CFFI_OP_POINTER, 503), // BrightnessTemp * +/* 301 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), +/* 302 */ _CFFI_OP(_CFFI_OP_FUNCTION, 120), // int()(float, UserParams *, CosmoParams *, InitialConditions *, PerturbedField *) +/* 303 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), +/* 304 */ _CFFI_OP(_CFFI_OP_NOOP, 5), +/* 305 */ _CFFI_OP(_CFFI_OP_NOOP, 6), +/* 306 */ _CFFI_OP(_CFFI_OP_NOOP, 261), +/* 307 */ _CFFI_OP(_CFFI_OP_NOOP, 262), +/* 308 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), +/* 309 */ _CFFI_OP(_CFFI_OP_FUNCTION, 120), // int()(float, float, UserParams *, CosmoParams *, AstroParams *, FlagOptions *, InitialConditions *, unsigned long long, HaloField *, HaloField *) +/* 310 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), +/* 311 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), +/* 312 */ _CFFI_OP(_CFFI_OP_NOOP, 5), +/* 313 */ _CFFI_OP(_CFFI_OP_NOOP, 6), +/* 314 */ _CFFI_OP(_CFFI_OP_NOOP, 7), +/* 315 */ _CFFI_OP(_CFFI_OP_NOOP, 8), +/* 316 */ _CFFI_OP(_CFFI_OP_NOOP, 261), +/* 317 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 12), // unsigned long long +/* 318 */ _CFFI_OP(_CFFI_OP_NOOP, 288), +/* 319 */ _CFFI_OP(_CFFI_OP_NOOP, 288), +/* 320 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), +/* 321 */ _CFFI_OP(_CFFI_OP_FUNCTION, 120), // int()(float, float, UserParams *, CosmoParams *, AstroParams *, FlagOptions *, PerturbedField *, PerturbedField *, IonizedBox *, TsBox *, HaloBox *, InitialConditions *, IonizedBox *) +/* 322 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), +/* 323 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), +/* 324 */ _CFFI_OP(_CFFI_OP_NOOP, 5), +/* 325 */ _CFFI_OP(_CFFI_OP_NOOP, 6), +/* 326 */ _CFFI_OP(_CFFI_OP_NOOP, 7), +/* 327 */ _CFFI_OP(_CFFI_OP_NOOP, 8), +/* 328 */ _CFFI_OP(_CFFI_OP_NOOP, 262), +/* 329 */ _CFFI_OP(_CFFI_OP_NOOP, 262), +/* 330 */ _CFFI_OP(_CFFI_OP_NOOP, 265), +/* 331 */ _CFFI_OP(_CFFI_OP_NOOP, 264), +/* 332 */ _CFFI_OP(_CFFI_OP_NOOP, 192), +/* 333 */ _CFFI_OP(_CFFI_OP_NOOP, 261), +/* 334 */ _CFFI_OP(_CFFI_OP_NOOP, 265), +/* 335 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), +/* 336 */ _CFFI_OP(_CFFI_OP_FUNCTION, 120), // int()(float, float, UserParams *, CosmoParams *, AstroParams *, FlagOptions *, float, short, PerturbedField *, XraySourceBox *, TsBox *, InitialConditions *, TsBox *) +/* 337 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), +/* 338 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), +/* 339 */ _CFFI_OP(_CFFI_OP_NOOP, 5), +/* 340 */ _CFFI_OP(_CFFI_OP_NOOP, 6), +/* 341 */ _CFFI_OP(_CFFI_OP_NOOP, 7), +/* 342 */ _CFFI_OP(_CFFI_OP_NOOP, 8), +/* 343 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), +/* 344 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 5), // short +/* 345 */ _CFFI_OP(_CFFI_OP_NOOP, 262), +/* 346 */ _CFFI_OP(_CFFI_OP_NOOP, 196), +/* 347 */ _CFFI_OP(_CFFI_OP_NOOP, 264), +/* 348 */ _CFFI_OP(_CFFI_OP_NOOP, 261), +/* 349 */ _CFFI_OP(_CFFI_OP_NOOP, 264), +/* 350 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), +/* 351 */ _CFFI_OP(_CFFI_OP_FUNCTION, 120), // int()(int, UserParams *, CosmoParams *, AstroParams *, FlagOptions *, int, int, float *, float *, double *, double *, double *) +/* 352 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 7), +/* 353 */ _CFFI_OP(_CFFI_OP_NOOP, 5), +/* 354 */ _CFFI_OP(_CFFI_OP_NOOP, 6), +/* 355 */ _CFFI_OP(_CFFI_OP_NOOP, 7), +/* 356 */ _CFFI_OP(_CFFI_OP_NOOP, 8), +/* 357 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 7), +/* 358 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 7), +/* 359 */ _CFFI_OP(_CFFI_OP_NOOP, 174), +/* 360 */ _CFFI_OP(_CFFI_OP_NOOP, 174), +/* 361 */ _CFFI_OP(_CFFI_OP_NOOP, 207), +/* 362 */ _CFFI_OP(_CFFI_OP_NOOP, 207), +/* 363 */ _CFFI_OP(_CFFI_OP_NOOP, 207), +/* 364 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), +/* 365 */ _CFFI_OP(_CFFI_OP_FUNCTION, 120), // int()(unsigned long long, UserParams *, CosmoParams *, InitialConditions *) +/* 366 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 12), +/* 367 */ _CFFI_OP(_CFFI_OP_NOOP, 5), +/* 368 */ _CFFI_OP(_CFFI_OP_NOOP, 6), +/* 369 */ _CFFI_OP(_CFFI_OP_NOOP, 261), +/* 370 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), +/* 371 */ _CFFI_OP(_CFFI_OP_FUNCTION, 120), // int()(void) +/* 372 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), +/* 373 */ _CFFI_OP(_CFFI_OP_FUNCTION, 518), // void()(AstroParams *, FlagOptions *, float *, float *, float *) +/* 374 */ _CFFI_OP(_CFFI_OP_NOOP, 7), +/* 375 */ _CFFI_OP(_CFFI_OP_NOOP, 8), +/* 376 */ _CFFI_OP(_CFFI_OP_NOOP, 174), +/* 377 */ _CFFI_OP(_CFFI_OP_NOOP, 174), +/* 378 */ _CFFI_OP(_CFFI_OP_NOOP, 174), +/* 379 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), +/* 380 */ _CFFI_OP(_CFFI_OP_FUNCTION, 518), // void()(UserParams *, CosmoParams *) +/* 381 */ _CFFI_OP(_CFFI_OP_NOOP, 5), +/* 382 */ _CFFI_OP(_CFFI_OP_NOOP, 6), +/* 383 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), +/* 384 */ _CFFI_OP(_CFFI_OP_FUNCTION, 518), // void()(UserParams *, CosmoParams *, AstroParams *, FlagOptions *) +/* 385 */ _CFFI_OP(_CFFI_OP_NOOP, 5), +/* 386 */ _CFFI_OP(_CFFI_OP_NOOP, 6), +/* 387 */ _CFFI_OP(_CFFI_OP_NOOP, 7), +/* 388 */ _CFFI_OP(_CFFI_OP_NOOP, 8), +/* 389 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), +/* 390 */ _CFFI_OP(_CFFI_OP_FUNCTION, 518), // void()(double, double) +/* 391 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 392 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 393 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), +/* 394 */ _CFFI_OP(_CFFI_OP_FUNCTION, 518), // void()(double, double, _Bool) +/* 395 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 396 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 397 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 1), +/* 398 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), +/* 399 */ _CFFI_OP(_CFFI_OP_FUNCTION, 518), // void()(double, double, double, double, double, _Bool) +/* 400 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 401 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 402 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 403 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 404 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 405 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 1), +/* 406 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), +/* 407 */ _CFFI_OP(_CFFI_OP_FUNCTION, 518), // void()(double, double, double, double, double, double) +/* 408 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 409 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 410 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 411 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 412 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 413 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 414 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), +/* 415 */ _CFFI_OP(_CFFI_OP_FUNCTION, 518), // void()(double, double, double, double, double, double, _Bool) +/* 416 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 417 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 418 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 419 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 420 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 421 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 422 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 1), +/* 423 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), +/* 424 */ _CFFI_OP(_CFFI_OP_FUNCTION, 518), // void()(double, double, double, float, double, double, double, float, float, float, float, int, int, _Bool) +/* 425 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 426 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 427 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 428 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), +/* 429 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 430 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 431 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), +/* 432 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), +/* 433 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), +/* 434 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), +/* 435 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), +/* 436 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 7), +/* 437 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 7), +/* 438 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 1), +/* 439 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), +/* 440 */ _CFFI_OP(_CFFI_OP_FUNCTION, 518), // void()(float, float) +/* 441 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), +/* 442 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), +/* 443 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), +/* 444 */ _CFFI_OP(_CFFI_OP_FUNCTION, 518), // void()(float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, int, int, _Bool, _Bool) +/* 445 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), +/* 446 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), +/* 447 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), +/* 448 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), +/* 449 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), +/* 450 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), +/* 451 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), +/* 452 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), +/* 453 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), +/* 454 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), +/* 455 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), +/* 456 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), +/* 457 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), +/* 458 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), +/* 459 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), +/* 460 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), +/* 461 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), +/* 462 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), +/* 463 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), +/* 464 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), +/* 465 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), +/* 466 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), +/* 467 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 7), +/* 468 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 7), +/* 469 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 1), +/* 470 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 1), +/* 471 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), +/* 472 */ _CFFI_OP(_CFFI_OP_FUNCTION, 518), // void()(int, float, float, float, float, float, float, float, _Bool) +/* 473 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 7), +/* 474 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), +/* 475 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), +/* 476 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), +/* 477 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), +/* 478 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), +/* 479 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), +/* 480 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), +/* 481 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 1), +/* 482 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), +/* 483 */ _CFFI_OP(_CFFI_OP_FUNCTION, 518), // void()(int, float, float, float, float, float, float, float, float, float, float, _Bool) +/* 484 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 7), +/* 485 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), +/* 486 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), +/* 487 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), +/* 488 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), +/* 489 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), +/* 490 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), +/* 491 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), +/* 492 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), +/* 493 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), +/* 494 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), +/* 495 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 1), +/* 496 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), +/* 497 */ _CFFI_OP(_CFFI_OP_FUNCTION, 518), // void()(void *) +/* 498 */ _CFFI_OP(_CFFI_OP_POINTER, 518), // void * +/* 499 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), +/* 500 */ _CFFI_OP(_CFFI_OP_FUNCTION, 518), // void()(void) +/* 501 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), +/* 502 */ _CFFI_OP(_CFFI_OP_STRUCT_UNION, 0), // AstroParams +/* 503 */ _CFFI_OP(_CFFI_OP_STRUCT_UNION, 1), // BrightnessTemp +/* 504 */ _CFFI_OP(_CFFI_OP_STRUCT_UNION, 2), // CosmoParams +/* 505 */ _CFFI_OP(_CFFI_OP_STRUCT_UNION, 3), // FlagOptions +/* 506 */ _CFFI_OP(_CFFI_OP_STRUCT_UNION, 4), // GlobalParams +/* 507 */ _CFFI_OP(_CFFI_OP_STRUCT_UNION, 5), // HaloBox +/* 508 */ _CFFI_OP(_CFFI_OP_STRUCT_UNION, 6), // HaloField +/* 509 */ _CFFI_OP(_CFFI_OP_STRUCT_UNION, 7), // InitialConditions +/* 510 */ _CFFI_OP(_CFFI_OP_STRUCT_UNION, 8), // IonizedBox +/* 511 */ _CFFI_OP(_CFFI_OP_STRUCT_UNION, 9), // PerturbHaloField +/* 512 */ _CFFI_OP(_CFFI_OP_STRUCT_UNION, 10), // PerturbedField +/* 513 */ _CFFI_OP(_CFFI_OP_STRUCT_UNION, 11), // TsBox +/* 514 */ _CFFI_OP(_CFFI_OP_STRUCT_UNION, 12), // UserParams +/* 515 */ _CFFI_OP(_CFFI_OP_STRUCT_UNION, 13), // XraySourceBox +/* 516 */ _CFFI_OP(_CFFI_OP_POINTER, 517), // char * +/* 517 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 2), // char +/* 518 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 0), // void +}; + +static void _cffi_d_Broadcast_struct_global_all(UserParams * x0, CosmoParams * x1, AstroParams * x2, FlagOptions * x3) +{ + Broadcast_struct_global_all(x0, x1, x2, x3); +} +#ifndef PYPY_VERSION +static PyObject * +_cffi_f_Broadcast_struct_global_all(PyObject *self, PyObject *args) +{ + UserParams * x0; + CosmoParams * x1; + AstroParams * x2; + FlagOptions * x3; + Py_ssize_t datasize; + struct _cffi_freeme_s *large_args_free = NULL; + PyObject *arg0; + PyObject *arg1; + PyObject *arg2; + PyObject *arg3; + + if (!PyArg_UnpackTuple(args, "Broadcast_struct_global_all", 4, 4, &arg0, &arg1, &arg2, &arg3)) + return NULL; + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(5), arg0, (char **)&x0); + if (datasize != 0) { + x0 = ((size_t)datasize) <= 640 ? (UserParams *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(5), arg0, (char **)&x0, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(6), arg1, (char **)&x1); + if (datasize != 0) { + x1 = ((size_t)datasize) <= 640 ? (CosmoParams *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(6), arg1, (char **)&x1, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(7), arg2, (char **)&x2); + if (datasize != 0) { + x2 = ((size_t)datasize) <= 640 ? (AstroParams *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(7), arg2, (char **)&x2, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(8), arg3, (char **)&x3); + if (datasize != 0) { + x3 = ((size_t)datasize) <= 640 ? (FlagOptions *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(8), arg3, (char **)&x3, + datasize, &large_args_free) < 0) + return NULL; + } + + Py_BEGIN_ALLOW_THREADS + _cffi_restore_errno(); + { Broadcast_struct_global_all(x0, x1, x2, x3); } + _cffi_save_errno(); + Py_END_ALLOW_THREADS + + (void)self; /* unused */ + if (large_args_free != NULL) _cffi_free_array_arguments(large_args_free); + Py_INCREF(Py_None); + return Py_None; +} +#else +# define _cffi_f_Broadcast_struct_global_all _cffi_d_Broadcast_struct_global_all +#endif + +static void _cffi_d_Broadcast_struct_global_noastro(UserParams * x0, CosmoParams * x1) +{ + Broadcast_struct_global_noastro(x0, x1); +} +#ifndef PYPY_VERSION +static PyObject * +_cffi_f_Broadcast_struct_global_noastro(PyObject *self, PyObject *args) +{ + UserParams * x0; + CosmoParams * x1; + Py_ssize_t datasize; + struct _cffi_freeme_s *large_args_free = NULL; + PyObject *arg0; + PyObject *arg1; + + if (!PyArg_UnpackTuple(args, "Broadcast_struct_global_noastro", 2, 2, &arg0, &arg1)) + return NULL; + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(5), arg0, (char **)&x0); + if (datasize != 0) { + x0 = ((size_t)datasize) <= 640 ? (UserParams *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(5), arg0, (char **)&x0, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(6), arg1, (char **)&x1); + if (datasize != 0) { + x1 = ((size_t)datasize) <= 640 ? (CosmoParams *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(6), arg1, (char **)&x1, + datasize, &large_args_free) < 0) + return NULL; + } + + Py_BEGIN_ALLOW_THREADS + _cffi_restore_errno(); + { Broadcast_struct_global_noastro(x0, x1); } + _cffi_save_errno(); + Py_END_ALLOW_THREADS + + (void)self; /* unused */ + if (large_args_free != NULL) _cffi_free_array_arguments(large_args_free); + Py_INCREF(Py_None); + return Py_None; +} +#else +# define _cffi_f_Broadcast_struct_global_noastro _cffi_d_Broadcast_struct_global_noastro +#endif + +static int _cffi_d_ComputeBrightnessTemp(float x0, UserParams * x1, CosmoParams * x2, AstroParams * x3, FlagOptions * x4, TsBox * x5, IonizedBox * x6, PerturbedField * x7, BrightnessTemp * x8) +{ + return ComputeBrightnessTemp(x0, x1, x2, x3, x4, x5, x6, x7, x8); +} +#ifndef PYPY_VERSION +static PyObject * +_cffi_f_ComputeBrightnessTemp(PyObject *self, PyObject *args) +{ + float x0; + UserParams * x1; + CosmoParams * x2; + AstroParams * x3; + FlagOptions * x4; + TsBox * x5; + IonizedBox * x6; + PerturbedField * x7; + BrightnessTemp * x8; + Py_ssize_t datasize; + struct _cffi_freeme_s *large_args_free = NULL; + int result; + PyObject *pyresult; + PyObject *arg0; + PyObject *arg1; + PyObject *arg2; + PyObject *arg3; + PyObject *arg4; + PyObject *arg5; + PyObject *arg6; + PyObject *arg7; + PyObject *arg8; + + if (!PyArg_UnpackTuple(args, "ComputeBrightnessTemp", 9, 9, &arg0, &arg1, &arg2, &arg3, &arg4, &arg5, &arg6, &arg7, &arg8)) + return NULL; + + x0 = (float)_cffi_to_c_float(arg0); + if (x0 == (float)-1 && PyErr_Occurred()) + return NULL; + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(5), arg1, (char **)&x1); + if (datasize != 0) { + x1 = ((size_t)datasize) <= 640 ? (UserParams *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(5), arg1, (char **)&x1, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(6), arg2, (char **)&x2); + if (datasize != 0) { + x2 = ((size_t)datasize) <= 640 ? (CosmoParams *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(6), arg2, (char **)&x2, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(7), arg3, (char **)&x3); + if (datasize != 0) { + x3 = ((size_t)datasize) <= 640 ? (AstroParams *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(7), arg3, (char **)&x3, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(8), arg4, (char **)&x4); + if (datasize != 0) { + x4 = ((size_t)datasize) <= 640 ? (FlagOptions *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(8), arg4, (char **)&x4, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(264), arg5, (char **)&x5); + if (datasize != 0) { + x5 = ((size_t)datasize) <= 640 ? (TsBox *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(264), arg5, (char **)&x5, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(265), arg6, (char **)&x6); + if (datasize != 0) { + x6 = ((size_t)datasize) <= 640 ? (IonizedBox *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(265), arg6, (char **)&x6, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(262), arg7, (char **)&x7); + if (datasize != 0) { + x7 = ((size_t)datasize) <= 640 ? (PerturbedField *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(262), arg7, (char **)&x7, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(300), arg8, (char **)&x8); + if (datasize != 0) { + x8 = ((size_t)datasize) <= 640 ? (BrightnessTemp *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(300), arg8, (char **)&x8, + datasize, &large_args_free) < 0) + return NULL; + } + + Py_BEGIN_ALLOW_THREADS + _cffi_restore_errno(); + { result = ComputeBrightnessTemp(x0, x1, x2, x3, x4, x5, x6, x7, x8); } + _cffi_save_errno(); + Py_END_ALLOW_THREADS + + (void)self; /* unused */ + pyresult = _cffi_from_c_int(result, int); + if (large_args_free != NULL) _cffi_free_array_arguments(large_args_free); + return pyresult; +} +#else +# define _cffi_f_ComputeBrightnessTemp _cffi_d_ComputeBrightnessTemp +#endif + +static int _cffi_d_ComputeHaloBox(double x0, UserParams * x1, CosmoParams * x2, AstroParams * x3, FlagOptions * x4, InitialConditions * x5, PerturbedField * x6, PerturbHaloField * x7, TsBox * x8, IonizedBox * x9, HaloBox * x10) +{ + return ComputeHaloBox(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10); +} +#ifndef PYPY_VERSION +static PyObject * +_cffi_f_ComputeHaloBox(PyObject *self, PyObject *args) +{ + double x0; + UserParams * x1; + CosmoParams * x2; + AstroParams * x3; + FlagOptions * x4; + InitialConditions * x5; + PerturbedField * x6; + PerturbHaloField * x7; + TsBox * x8; + IonizedBox * x9; + HaloBox * x10; + Py_ssize_t datasize; + struct _cffi_freeme_s *large_args_free = NULL; + int result; + PyObject *pyresult; + PyObject *arg0; + PyObject *arg1; + PyObject *arg2; + PyObject *arg3; + PyObject *arg4; + PyObject *arg5; + PyObject *arg6; + PyObject *arg7; + PyObject *arg8; + PyObject *arg9; + PyObject *arg10; + + if (!PyArg_UnpackTuple(args, "ComputeHaloBox", 11, 11, &arg0, &arg1, &arg2, &arg3, &arg4, &arg5, &arg6, &arg7, &arg8, &arg9, &arg10)) + return NULL; + + x0 = (double)_cffi_to_c_double(arg0); + if (x0 == (double)-1 && PyErr_Occurred()) + return NULL; + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(5), arg1, (char **)&x1); + if (datasize != 0) { + x1 = ((size_t)datasize) <= 640 ? (UserParams *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(5), arg1, (char **)&x1, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(6), arg2, (char **)&x2); + if (datasize != 0) { + x2 = ((size_t)datasize) <= 640 ? (CosmoParams *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(6), arg2, (char **)&x2, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(7), arg3, (char **)&x3); + if (datasize != 0) { + x3 = ((size_t)datasize) <= 640 ? (AstroParams *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(7), arg3, (char **)&x3, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(8), arg4, (char **)&x4); + if (datasize != 0) { + x4 = ((size_t)datasize) <= 640 ? (FlagOptions *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(8), arg4, (char **)&x4, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(261), arg5, (char **)&x5); + if (datasize != 0) { + x5 = ((size_t)datasize) <= 640 ? (InitialConditions *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(261), arg5, (char **)&x5, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(262), arg6, (char **)&x6); + if (datasize != 0) { + x6 = ((size_t)datasize) <= 640 ? (PerturbedField *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(262), arg6, (char **)&x6, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(263), arg7, (char **)&x7); + if (datasize != 0) { + x7 = ((size_t)datasize) <= 640 ? (PerturbHaloField *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(263), arg7, (char **)&x7, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(264), arg8, (char **)&x8); + if (datasize != 0) { + x8 = ((size_t)datasize) <= 640 ? (TsBox *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(264), arg8, (char **)&x8, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(265), arg9, (char **)&x9); + if (datasize != 0) { + x9 = ((size_t)datasize) <= 640 ? (IonizedBox *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(265), arg9, (char **)&x9, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(192), arg10, (char **)&x10); + if (datasize != 0) { + x10 = ((size_t)datasize) <= 640 ? (HaloBox *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(192), arg10, (char **)&x10, + datasize, &large_args_free) < 0) + return NULL; + } + + Py_BEGIN_ALLOW_THREADS + _cffi_restore_errno(); + { result = ComputeHaloBox(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10); } + _cffi_save_errno(); + Py_END_ALLOW_THREADS + + (void)self; /* unused */ + pyresult = _cffi_from_c_int(result, int); + if (large_args_free != NULL) _cffi_free_array_arguments(large_args_free); + return pyresult; +} +#else +# define _cffi_f_ComputeHaloBox _cffi_d_ComputeHaloBox +#endif + +static int _cffi_d_ComputeHaloField(float x0, float x1, UserParams * x2, CosmoParams * x3, AstroParams * x4, FlagOptions * x5, InitialConditions * x6, unsigned long long x7, HaloField * x8, HaloField * x9) +{ + return ComputeHaloField(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9); +} +#ifndef PYPY_VERSION +static PyObject * +_cffi_f_ComputeHaloField(PyObject *self, PyObject *args) +{ + float x0; + float x1; + UserParams * x2; + CosmoParams * x3; + AstroParams * x4; + FlagOptions * x5; + InitialConditions * x6; + unsigned long long x7; + HaloField * x8; + HaloField * x9; + Py_ssize_t datasize; + struct _cffi_freeme_s *large_args_free = NULL; + int result; + PyObject *pyresult; + PyObject *arg0; + PyObject *arg1; + PyObject *arg2; + PyObject *arg3; + PyObject *arg4; + PyObject *arg5; + PyObject *arg6; + PyObject *arg7; + PyObject *arg8; + PyObject *arg9; + + if (!PyArg_UnpackTuple(args, "ComputeHaloField", 10, 10, &arg0, &arg1, &arg2, &arg3, &arg4, &arg5, &arg6, &arg7, &arg8, &arg9)) + return NULL; + + x0 = (float)_cffi_to_c_float(arg0); + if (x0 == (float)-1 && PyErr_Occurred()) + return NULL; + + x1 = (float)_cffi_to_c_float(arg1); + if (x1 == (float)-1 && PyErr_Occurred()) + return NULL; + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(5), arg2, (char **)&x2); + if (datasize != 0) { + x2 = ((size_t)datasize) <= 640 ? (UserParams *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(5), arg2, (char **)&x2, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(6), arg3, (char **)&x3); + if (datasize != 0) { + x3 = ((size_t)datasize) <= 640 ? (CosmoParams *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(6), arg3, (char **)&x3, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(7), arg4, (char **)&x4); + if (datasize != 0) { + x4 = ((size_t)datasize) <= 640 ? (AstroParams *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(7), arg4, (char **)&x4, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(8), arg5, (char **)&x5); + if (datasize != 0) { + x5 = ((size_t)datasize) <= 640 ? (FlagOptions *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(8), arg5, (char **)&x5, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(261), arg6, (char **)&x6); + if (datasize != 0) { + x6 = ((size_t)datasize) <= 640 ? (InitialConditions *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(261), arg6, (char **)&x6, + datasize, &large_args_free) < 0) + return NULL; + } + + x7 = _cffi_to_c_int(arg7, unsigned long long); + if (x7 == (unsigned long long)-1 && PyErr_Occurred()) + return NULL; + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(288), arg8, (char **)&x8); + if (datasize != 0) { + x8 = ((size_t)datasize) <= 640 ? (HaloField *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(288), arg8, (char **)&x8, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(288), arg9, (char **)&x9); + if (datasize != 0) { + x9 = ((size_t)datasize) <= 640 ? (HaloField *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(288), arg9, (char **)&x9, + datasize, &large_args_free) < 0) + return NULL; + } + + Py_BEGIN_ALLOW_THREADS + _cffi_restore_errno(); + { result = ComputeHaloField(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9); } + _cffi_save_errno(); + Py_END_ALLOW_THREADS + + (void)self; /* unused */ + pyresult = _cffi_from_c_int(result, int); + if (large_args_free != NULL) _cffi_free_array_arguments(large_args_free); + return pyresult; +} +#else +# define _cffi_f_ComputeHaloField _cffi_d_ComputeHaloField +#endif + +static int _cffi_d_ComputeInitialConditions(unsigned long long x0, UserParams * x1, CosmoParams * x2, InitialConditions * x3) +{ + return ComputeInitialConditions(x0, x1, x2, x3); +} +#ifndef PYPY_VERSION +static PyObject * +_cffi_f_ComputeInitialConditions(PyObject *self, PyObject *args) +{ + unsigned long long x0; + UserParams * x1; + CosmoParams * x2; + InitialConditions * x3; + Py_ssize_t datasize; + struct _cffi_freeme_s *large_args_free = NULL; + int result; + PyObject *pyresult; + PyObject *arg0; + PyObject *arg1; + PyObject *arg2; + PyObject *arg3; + + if (!PyArg_UnpackTuple(args, "ComputeInitialConditions", 4, 4, &arg0, &arg1, &arg2, &arg3)) + return NULL; + + x0 = _cffi_to_c_int(arg0, unsigned long long); + if (x0 == (unsigned long long)-1 && PyErr_Occurred()) + return NULL; + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(5), arg1, (char **)&x1); + if (datasize != 0) { + x1 = ((size_t)datasize) <= 640 ? (UserParams *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(5), arg1, (char **)&x1, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(6), arg2, (char **)&x2); + if (datasize != 0) { + x2 = ((size_t)datasize) <= 640 ? (CosmoParams *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(6), arg2, (char **)&x2, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(261), arg3, (char **)&x3); + if (datasize != 0) { + x3 = ((size_t)datasize) <= 640 ? (InitialConditions *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(261), arg3, (char **)&x3, + datasize, &large_args_free) < 0) + return NULL; + } + + Py_BEGIN_ALLOW_THREADS + _cffi_restore_errno(); + { result = ComputeInitialConditions(x0, x1, x2, x3); } + _cffi_save_errno(); + Py_END_ALLOW_THREADS + + (void)self; /* unused */ + pyresult = _cffi_from_c_int(result, int); + if (large_args_free != NULL) _cffi_free_array_arguments(large_args_free); + return pyresult; +} +#else +# define _cffi_f_ComputeInitialConditions _cffi_d_ComputeInitialConditions +#endif + +static int _cffi_d_ComputeIonizedBox(float x0, float x1, UserParams * x2, CosmoParams * x3, AstroParams * x4, FlagOptions * x5, PerturbedField * x6, PerturbedField * x7, IonizedBox * x8, TsBox * x9, HaloBox * x10, InitialConditions * x11, IonizedBox * x12) +{ + return ComputeIonizedBox(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12); +} +#ifndef PYPY_VERSION +static PyObject * +_cffi_f_ComputeIonizedBox(PyObject *self, PyObject *args) +{ + float x0; + float x1; + UserParams * x2; + CosmoParams * x3; + AstroParams * x4; + FlagOptions * x5; + PerturbedField * x6; + PerturbedField * x7; + IonizedBox * x8; + TsBox * x9; + HaloBox * x10; + InitialConditions * x11; + IonizedBox * x12; + Py_ssize_t datasize; + struct _cffi_freeme_s *large_args_free = NULL; + int result; + PyObject *pyresult; + PyObject *arg0; + PyObject *arg1; + PyObject *arg2; + PyObject *arg3; + PyObject *arg4; + PyObject *arg5; + PyObject *arg6; + PyObject *arg7; + PyObject *arg8; + PyObject *arg9; + PyObject *arg10; + PyObject *arg11; + PyObject *arg12; + + if (!PyArg_UnpackTuple(args, "ComputeIonizedBox", 13, 13, &arg0, &arg1, &arg2, &arg3, &arg4, &arg5, &arg6, &arg7, &arg8, &arg9, &arg10, &arg11, &arg12)) + return NULL; + + x0 = (float)_cffi_to_c_float(arg0); + if (x0 == (float)-1 && PyErr_Occurred()) + return NULL; + + x1 = (float)_cffi_to_c_float(arg1); + if (x1 == (float)-1 && PyErr_Occurred()) + return NULL; + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(5), arg2, (char **)&x2); + if (datasize != 0) { + x2 = ((size_t)datasize) <= 640 ? (UserParams *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(5), arg2, (char **)&x2, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(6), arg3, (char **)&x3); + if (datasize != 0) { + x3 = ((size_t)datasize) <= 640 ? (CosmoParams *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(6), arg3, (char **)&x3, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(7), arg4, (char **)&x4); + if (datasize != 0) { + x4 = ((size_t)datasize) <= 640 ? (AstroParams *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(7), arg4, (char **)&x4, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(8), arg5, (char **)&x5); + if (datasize != 0) { + x5 = ((size_t)datasize) <= 640 ? (FlagOptions *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(8), arg5, (char **)&x5, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(262), arg6, (char **)&x6); + if (datasize != 0) { + x6 = ((size_t)datasize) <= 640 ? (PerturbedField *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(262), arg6, (char **)&x6, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(262), arg7, (char **)&x7); + if (datasize != 0) { + x7 = ((size_t)datasize) <= 640 ? (PerturbedField *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(262), arg7, (char **)&x7, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(265), arg8, (char **)&x8); + if (datasize != 0) { + x8 = ((size_t)datasize) <= 640 ? (IonizedBox *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(265), arg8, (char **)&x8, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(264), arg9, (char **)&x9); + if (datasize != 0) { + x9 = ((size_t)datasize) <= 640 ? (TsBox *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(264), arg9, (char **)&x9, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(192), arg10, (char **)&x10); + if (datasize != 0) { + x10 = ((size_t)datasize) <= 640 ? (HaloBox *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(192), arg10, (char **)&x10, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(261), arg11, (char **)&x11); + if (datasize != 0) { + x11 = ((size_t)datasize) <= 640 ? (InitialConditions *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(261), arg11, (char **)&x11, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(265), arg12, (char **)&x12); + if (datasize != 0) { + x12 = ((size_t)datasize) <= 640 ? (IonizedBox *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(265), arg12, (char **)&x12, + datasize, &large_args_free) < 0) + return NULL; + } + + Py_BEGIN_ALLOW_THREADS + _cffi_restore_errno(); + { result = ComputeIonizedBox(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12); } + _cffi_save_errno(); + Py_END_ALLOW_THREADS + + (void)self; /* unused */ + pyresult = _cffi_from_c_int(result, int); + if (large_args_free != NULL) _cffi_free_array_arguments(large_args_free); + return pyresult; +} +#else +# define _cffi_f_ComputeIonizedBox _cffi_d_ComputeIonizedBox +#endif + +static int _cffi_d_ComputeLF(int x0, UserParams * x1, CosmoParams * x2, AstroParams * x3, FlagOptions * x4, int x5, int x6, float * x7, float * x8, double * x9, double * x10, double * x11) +{ + return ComputeLF(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11); +} +#ifndef PYPY_VERSION +static PyObject * +_cffi_f_ComputeLF(PyObject *self, PyObject *args) +{ + int x0; + UserParams * x1; + CosmoParams * x2; + AstroParams * x3; + FlagOptions * x4; + int x5; + int x6; + float * x7; + float * x8; + double * x9; + double * x10; + double * x11; + Py_ssize_t datasize; + struct _cffi_freeme_s *large_args_free = NULL; + int result; + PyObject *pyresult; + PyObject *arg0; + PyObject *arg1; + PyObject *arg2; + PyObject *arg3; + PyObject *arg4; + PyObject *arg5; + PyObject *arg6; + PyObject *arg7; + PyObject *arg8; + PyObject *arg9; + PyObject *arg10; + PyObject *arg11; + + if (!PyArg_UnpackTuple(args, "ComputeLF", 12, 12, &arg0, &arg1, &arg2, &arg3, &arg4, &arg5, &arg6, &arg7, &arg8, &arg9, &arg10, &arg11)) + return NULL; + + x0 = _cffi_to_c_int(arg0, int); + if (x0 == (int)-1 && PyErr_Occurred()) + return NULL; + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(5), arg1, (char **)&x1); + if (datasize != 0) { + x1 = ((size_t)datasize) <= 640 ? (UserParams *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(5), arg1, (char **)&x1, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(6), arg2, (char **)&x2); + if (datasize != 0) { + x2 = ((size_t)datasize) <= 640 ? (CosmoParams *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(6), arg2, (char **)&x2, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(7), arg3, (char **)&x3); + if (datasize != 0) { + x3 = ((size_t)datasize) <= 640 ? (AstroParams *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(7), arg3, (char **)&x3, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(8), arg4, (char **)&x4); + if (datasize != 0) { + x4 = ((size_t)datasize) <= 640 ? (FlagOptions *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(8), arg4, (char **)&x4, + datasize, &large_args_free) < 0) + return NULL; + } + + x5 = _cffi_to_c_int(arg5, int); + if (x5 == (int)-1 && PyErr_Occurred()) + return NULL; + + x6 = _cffi_to_c_int(arg6, int); + if (x6 == (int)-1 && PyErr_Occurred()) + return NULL; + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(174), arg7, (char **)&x7); + if (datasize != 0) { + x7 = ((size_t)datasize) <= 640 ? (float *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(174), arg7, (char **)&x7, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(174), arg8, (char **)&x8); + if (datasize != 0) { + x8 = ((size_t)datasize) <= 640 ? (float *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(174), arg8, (char **)&x8, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(207), arg9, (char **)&x9); + if (datasize != 0) { + x9 = ((size_t)datasize) <= 640 ? (double *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(207), arg9, (char **)&x9, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(207), arg10, (char **)&x10); + if (datasize != 0) { + x10 = ((size_t)datasize) <= 640 ? (double *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(207), arg10, (char **)&x10, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(207), arg11, (char **)&x11); + if (datasize != 0) { + x11 = ((size_t)datasize) <= 640 ? (double *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(207), arg11, (char **)&x11, + datasize, &large_args_free) < 0) + return NULL; + } + + Py_BEGIN_ALLOW_THREADS + _cffi_restore_errno(); + { result = ComputeLF(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11); } + _cffi_save_errno(); + Py_END_ALLOW_THREADS + + (void)self; /* unused */ + pyresult = _cffi_from_c_int(result, int); + if (large_args_free != NULL) _cffi_free_array_arguments(large_args_free); + return pyresult; +} +#else +# define _cffi_f_ComputeLF _cffi_d_ComputeLF +#endif + +static int _cffi_d_ComputePerturbField(float x0, UserParams * x1, CosmoParams * x2, InitialConditions * x3, PerturbedField * x4) +{ + return ComputePerturbField(x0, x1, x2, x3, x4); +} +#ifndef PYPY_VERSION +static PyObject * +_cffi_f_ComputePerturbField(PyObject *self, PyObject *args) +{ + float x0; + UserParams * x1; + CosmoParams * x2; + InitialConditions * x3; + PerturbedField * x4; + Py_ssize_t datasize; + struct _cffi_freeme_s *large_args_free = NULL; + int result; + PyObject *pyresult; + PyObject *arg0; + PyObject *arg1; + PyObject *arg2; + PyObject *arg3; + PyObject *arg4; + + if (!PyArg_UnpackTuple(args, "ComputePerturbField", 5, 5, &arg0, &arg1, &arg2, &arg3, &arg4)) + return NULL; + + x0 = (float)_cffi_to_c_float(arg0); + if (x0 == (float)-1 && PyErr_Occurred()) + return NULL; + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(5), arg1, (char **)&x1); + if (datasize != 0) { + x1 = ((size_t)datasize) <= 640 ? (UserParams *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(5), arg1, (char **)&x1, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(6), arg2, (char **)&x2); + if (datasize != 0) { + x2 = ((size_t)datasize) <= 640 ? (CosmoParams *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(6), arg2, (char **)&x2, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(261), arg3, (char **)&x3); + if (datasize != 0) { + x3 = ((size_t)datasize) <= 640 ? (InitialConditions *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(261), arg3, (char **)&x3, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(262), arg4, (char **)&x4); + if (datasize != 0) { + x4 = ((size_t)datasize) <= 640 ? (PerturbedField *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(262), arg4, (char **)&x4, + datasize, &large_args_free) < 0) + return NULL; + } + + Py_BEGIN_ALLOW_THREADS + _cffi_restore_errno(); + { result = ComputePerturbField(x0, x1, x2, x3, x4); } + _cffi_save_errno(); + Py_END_ALLOW_THREADS + + (void)self; /* unused */ + pyresult = _cffi_from_c_int(result, int); + if (large_args_free != NULL) _cffi_free_array_arguments(large_args_free); + return pyresult; +} +#else +# define _cffi_f_ComputePerturbField _cffi_d_ComputePerturbField +#endif + +static int _cffi_d_ComputePerturbHaloField(float x0, UserParams * x1, CosmoParams * x2, AstroParams * x3, FlagOptions * x4, InitialConditions * x5, HaloField * x6, PerturbHaloField * x7) +{ + return ComputePerturbHaloField(x0, x1, x2, x3, x4, x5, x6, x7); +} +#ifndef PYPY_VERSION +static PyObject * +_cffi_f_ComputePerturbHaloField(PyObject *self, PyObject *args) +{ + float x0; + UserParams * x1; + CosmoParams * x2; + AstroParams * x3; + FlagOptions * x4; + InitialConditions * x5; + HaloField * x6; + PerturbHaloField * x7; + Py_ssize_t datasize; + struct _cffi_freeme_s *large_args_free = NULL; + int result; + PyObject *pyresult; + PyObject *arg0; + PyObject *arg1; + PyObject *arg2; + PyObject *arg3; + PyObject *arg4; + PyObject *arg5; + PyObject *arg6; + PyObject *arg7; + + if (!PyArg_UnpackTuple(args, "ComputePerturbHaloField", 8, 8, &arg0, &arg1, &arg2, &arg3, &arg4, &arg5, &arg6, &arg7)) + return NULL; + + x0 = (float)_cffi_to_c_float(arg0); + if (x0 == (float)-1 && PyErr_Occurred()) + return NULL; + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(5), arg1, (char **)&x1); + if (datasize != 0) { + x1 = ((size_t)datasize) <= 640 ? (UserParams *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(5), arg1, (char **)&x1, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(6), arg2, (char **)&x2); + if (datasize != 0) { + x2 = ((size_t)datasize) <= 640 ? (CosmoParams *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(6), arg2, (char **)&x2, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(7), arg3, (char **)&x3); + if (datasize != 0) { + x3 = ((size_t)datasize) <= 640 ? (AstroParams *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(7), arg3, (char **)&x3, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(8), arg4, (char **)&x4); + if (datasize != 0) { + x4 = ((size_t)datasize) <= 640 ? (FlagOptions *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(8), arg4, (char **)&x4, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(261), arg5, (char **)&x5); + if (datasize != 0) { + x5 = ((size_t)datasize) <= 640 ? (InitialConditions *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(261), arg5, (char **)&x5, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(288), arg6, (char **)&x6); + if (datasize != 0) { + x6 = ((size_t)datasize) <= 640 ? (HaloField *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(288), arg6, (char **)&x6, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(263), arg7, (char **)&x7); + if (datasize != 0) { + x7 = ((size_t)datasize) <= 640 ? (PerturbHaloField *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(263), arg7, (char **)&x7, + datasize, &large_args_free) < 0) + return NULL; + } + + Py_BEGIN_ALLOW_THREADS + _cffi_restore_errno(); + { result = ComputePerturbHaloField(x0, x1, x2, x3, x4, x5, x6, x7); } + _cffi_save_errno(); + Py_END_ALLOW_THREADS + + (void)self; /* unused */ + pyresult = _cffi_from_c_int(result, int); + if (large_args_free != NULL) _cffi_free_array_arguments(large_args_free); + return pyresult; +} +#else +# define _cffi_f_ComputePerturbHaloField _cffi_d_ComputePerturbHaloField +#endif + +static float _cffi_d_ComputeTau(UserParams * x0, CosmoParams * x1, int x2, float * x3, float * x4) +{ + return ComputeTau(x0, x1, x2, x3, x4); +} +#ifndef PYPY_VERSION +static PyObject * +_cffi_f_ComputeTau(PyObject *self, PyObject *args) +{ + UserParams * x0; + CosmoParams * x1; + int x2; + float * x3; + float * x4; + Py_ssize_t datasize; + struct _cffi_freeme_s *large_args_free = NULL; + float result; + PyObject *pyresult; + PyObject *arg0; + PyObject *arg1; + PyObject *arg2; + PyObject *arg3; + PyObject *arg4; + + if (!PyArg_UnpackTuple(args, "ComputeTau", 5, 5, &arg0, &arg1, &arg2, &arg3, &arg4)) + return NULL; + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(5), arg0, (char **)&x0); + if (datasize != 0) { + x0 = ((size_t)datasize) <= 640 ? (UserParams *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(5), arg0, (char **)&x0, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(6), arg1, (char **)&x1); + if (datasize != 0) { + x1 = ((size_t)datasize) <= 640 ? (CosmoParams *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(6), arg1, (char **)&x1, + datasize, &large_args_free) < 0) + return NULL; + } + + x2 = _cffi_to_c_int(arg2, int); + if (x2 == (int)-1 && PyErr_Occurred()) + return NULL; + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(174), arg3, (char **)&x3); + if (datasize != 0) { + x3 = ((size_t)datasize) <= 640 ? (float *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(174), arg3, (char **)&x3, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(174), arg4, (char **)&x4); + if (datasize != 0) { + x4 = ((size_t)datasize) <= 640 ? (float *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(174), arg4, (char **)&x4, + datasize, &large_args_free) < 0) + return NULL; + } + + Py_BEGIN_ALLOW_THREADS + _cffi_restore_errno(); + { result = ComputeTau(x0, x1, x2, x3, x4); } + _cffi_save_errno(); + Py_END_ALLOW_THREADS + + (void)self; /* unused */ + pyresult = _cffi_from_c_float(result); + if (large_args_free != NULL) _cffi_free_array_arguments(large_args_free); + return pyresult; +} +#else +# define _cffi_f_ComputeTau _cffi_d_ComputeTau +#endif + +static int _cffi_d_ComputeTsBox(float x0, float x1, UserParams * x2, CosmoParams * x3, AstroParams * x4, FlagOptions * x5, float x6, short x7, PerturbedField * x8, XraySourceBox * x9, TsBox * x10, InitialConditions * x11, TsBox * x12) +{ + return ComputeTsBox(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12); +} +#ifndef PYPY_VERSION +static PyObject * +_cffi_f_ComputeTsBox(PyObject *self, PyObject *args) +{ + float x0; + float x1; + UserParams * x2; + CosmoParams * x3; + AstroParams * x4; + FlagOptions * x5; + float x6; + short x7; + PerturbedField * x8; + XraySourceBox * x9; + TsBox * x10; + InitialConditions * x11; + TsBox * x12; + Py_ssize_t datasize; + struct _cffi_freeme_s *large_args_free = NULL; + int result; + PyObject *pyresult; + PyObject *arg0; + PyObject *arg1; + PyObject *arg2; + PyObject *arg3; + PyObject *arg4; + PyObject *arg5; + PyObject *arg6; + PyObject *arg7; + PyObject *arg8; + PyObject *arg9; + PyObject *arg10; + PyObject *arg11; + PyObject *arg12; + + if (!PyArg_UnpackTuple(args, "ComputeTsBox", 13, 13, &arg0, &arg1, &arg2, &arg3, &arg4, &arg5, &arg6, &arg7, &arg8, &arg9, &arg10, &arg11, &arg12)) + return NULL; + + x0 = (float)_cffi_to_c_float(arg0); + if (x0 == (float)-1 && PyErr_Occurred()) + return NULL; + + x1 = (float)_cffi_to_c_float(arg1); + if (x1 == (float)-1 && PyErr_Occurred()) + return NULL; + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(5), arg2, (char **)&x2); + if (datasize != 0) { + x2 = ((size_t)datasize) <= 640 ? (UserParams *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(5), arg2, (char **)&x2, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(6), arg3, (char **)&x3); + if (datasize != 0) { + x3 = ((size_t)datasize) <= 640 ? (CosmoParams *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(6), arg3, (char **)&x3, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(7), arg4, (char **)&x4); + if (datasize != 0) { + x4 = ((size_t)datasize) <= 640 ? (AstroParams *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(7), arg4, (char **)&x4, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(8), arg5, (char **)&x5); + if (datasize != 0) { + x5 = ((size_t)datasize) <= 640 ? (FlagOptions *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(8), arg5, (char **)&x5, + datasize, &large_args_free) < 0) + return NULL; + } + + x6 = (float)_cffi_to_c_float(arg6); + if (x6 == (float)-1 && PyErr_Occurred()) + return NULL; + + x7 = _cffi_to_c_int(arg7, short); + if (x7 == (short)-1 && PyErr_Occurred()) + return NULL; + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(262), arg8, (char **)&x8); + if (datasize != 0) { + x8 = ((size_t)datasize) <= 640 ? (PerturbedField *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(262), arg8, (char **)&x8, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(196), arg9, (char **)&x9); + if (datasize != 0) { + x9 = ((size_t)datasize) <= 640 ? (XraySourceBox *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(196), arg9, (char **)&x9, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(264), arg10, (char **)&x10); + if (datasize != 0) { + x10 = ((size_t)datasize) <= 640 ? (TsBox *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(264), arg10, (char **)&x10, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(261), arg11, (char **)&x11); + if (datasize != 0) { + x11 = ((size_t)datasize) <= 640 ? (InitialConditions *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(261), arg11, (char **)&x11, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(264), arg12, (char **)&x12); + if (datasize != 0) { + x12 = ((size_t)datasize) <= 640 ? (TsBox *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(264), arg12, (char **)&x12, + datasize, &large_args_free) < 0) + return NULL; + } + + Py_BEGIN_ALLOW_THREADS + _cffi_restore_errno(); + { result = ComputeTsBox(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12); } + _cffi_save_errno(); + Py_END_ALLOW_THREADS + + (void)self; /* unused */ + pyresult = _cffi_from_c_int(result, int); + if (large_args_free != NULL) _cffi_free_array_arguments(large_args_free); + return pyresult; +} +#else +# define _cffi_f_ComputeTsBox _cffi_d_ComputeTsBox +#endif + +static int _cffi_d_ComputeZstart_PhotonCons(double * x0) +{ + return ComputeZstart_PhotonCons(x0); +} +#ifndef PYPY_VERSION +static PyObject * +_cffi_f_ComputeZstart_PhotonCons(PyObject *self, PyObject *arg0) +{ + double * x0; + Py_ssize_t datasize; + struct _cffi_freeme_s *large_args_free = NULL; + int result; + PyObject *pyresult; + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(207), arg0, (char **)&x0); + if (datasize != 0) { + x0 = ((size_t)datasize) <= 640 ? (double *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(207), arg0, (char **)&x0, + datasize, &large_args_free) < 0) + return NULL; + } + + Py_BEGIN_ALLOW_THREADS + _cffi_restore_errno(); + { result = ComputeZstart_PhotonCons(x0); } + _cffi_save_errno(); + Py_END_ALLOW_THREADS + + (void)self; /* unused */ + pyresult = _cffi_from_c_int(result, int); + if (large_args_free != NULL) _cffi_free_array_arguments(large_args_free); + return pyresult; +} +#else +# define _cffi_f_ComputeZstart_PhotonCons _cffi_d_ComputeZstart_PhotonCons +#endif + +static int _cffi_d_CreateFFTWWisdoms(UserParams * x0, CosmoParams * x1) +{ + return CreateFFTWWisdoms(x0, x1); +} +#ifndef PYPY_VERSION +static PyObject * +_cffi_f_CreateFFTWWisdoms(PyObject *self, PyObject *args) +{ + UserParams * x0; + CosmoParams * x1; + Py_ssize_t datasize; + struct _cffi_freeme_s *large_args_free = NULL; + int result; + PyObject *pyresult; + PyObject *arg0; + PyObject *arg1; + + if (!PyArg_UnpackTuple(args, "CreateFFTWWisdoms", 2, 2, &arg0, &arg1)) + return NULL; + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(5), arg0, (char **)&x0); + if (datasize != 0) { + x0 = ((size_t)datasize) <= 640 ? (UserParams *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(5), arg0, (char **)&x0, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(6), arg1, (char **)&x1); + if (datasize != 0) { + x1 = ((size_t)datasize) <= 640 ? (CosmoParams *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(6), arg1, (char **)&x1, + datasize, &large_args_free) < 0) + return NULL; + } + + Py_BEGIN_ALLOW_THREADS + _cffi_restore_errno(); + { result = CreateFFTWWisdoms(x0, x1); } + _cffi_save_errno(); + Py_END_ALLOW_THREADS + + (void)self; /* unused */ + pyresult = _cffi_from_c_int(result, int); + if (large_args_free != NULL) _cffi_free_array_arguments(large_args_free); + return pyresult; +} +#else +# define _cffi_f_CreateFFTWWisdoms _cffi_d_CreateFFTWWisdoms +#endif + +static double _cffi_d_EvaluateFcoll_delta(double x0, double x1, double x2, double x3) +{ + return EvaluateFcoll_delta(x0, x1, x2, x3); +} +#ifndef PYPY_VERSION +static PyObject * +_cffi_f_EvaluateFcoll_delta(PyObject *self, PyObject *args) +{ + double x0; + double x1; + double x2; + double x3; + double result; + PyObject *pyresult; + PyObject *arg0; + PyObject *arg1; + PyObject *arg2; + PyObject *arg3; + + if (!PyArg_UnpackTuple(args, "EvaluateFcoll_delta", 4, 4, &arg0, &arg1, &arg2, &arg3)) + return NULL; + + x0 = (double)_cffi_to_c_double(arg0); + if (x0 == (double)-1 && PyErr_Occurred()) + return NULL; + + x1 = (double)_cffi_to_c_double(arg1); + if (x1 == (double)-1 && PyErr_Occurred()) + return NULL; + + x2 = (double)_cffi_to_c_double(arg2); + if (x2 == (double)-1 && PyErr_Occurred()) + return NULL; + + x3 = (double)_cffi_to_c_double(arg3); + if (x3 == (double)-1 && PyErr_Occurred()) + return NULL; + + Py_BEGIN_ALLOW_THREADS + _cffi_restore_errno(); + { result = EvaluateFcoll_delta(x0, x1, x2, x3); } + _cffi_save_errno(); + Py_END_ALLOW_THREADS + + (void)self; /* unused */ + pyresult = _cffi_from_c_double(result); + return pyresult; +} +#else +# define _cffi_f_EvaluateFcoll_delta _cffi_d_EvaluateFcoll_delta +#endif + +static double _cffi_d_EvaluateMcoll(double x0, double x1, double x2, double x3, double x4, double x5, double x6) +{ + return EvaluateMcoll(x0, x1, x2, x3, x4, x5, x6); +} +#ifndef PYPY_VERSION +static PyObject * +_cffi_f_EvaluateMcoll(PyObject *self, PyObject *args) +{ + double x0; + double x1; + double x2; + double x3; + double x4; + double x5; + double x6; + double result; + PyObject *pyresult; + PyObject *arg0; + PyObject *arg1; + PyObject *arg2; + PyObject *arg3; + PyObject *arg4; + PyObject *arg5; + PyObject *arg6; + + if (!PyArg_UnpackTuple(args, "EvaluateMcoll", 7, 7, &arg0, &arg1, &arg2, &arg3, &arg4, &arg5, &arg6)) + return NULL; + + x0 = (double)_cffi_to_c_double(arg0); + if (x0 == (double)-1 && PyErr_Occurred()) + return NULL; + + x1 = (double)_cffi_to_c_double(arg1); + if (x1 == (double)-1 && PyErr_Occurred()) + return NULL; + + x2 = (double)_cffi_to_c_double(arg2); + if (x2 == (double)-1 && PyErr_Occurred()) + return NULL; + + x3 = (double)_cffi_to_c_double(arg3); + if (x3 == (double)-1 && PyErr_Occurred()) + return NULL; + + x4 = (double)_cffi_to_c_double(arg4); + if (x4 == (double)-1 && PyErr_Occurred()) + return NULL; + + x5 = (double)_cffi_to_c_double(arg5); + if (x5 == (double)-1 && PyErr_Occurred()) + return NULL; + + x6 = (double)_cffi_to_c_double(arg6); + if (x6 == (double)-1 && PyErr_Occurred()) + return NULL; + + Py_BEGIN_ALLOW_THREADS + _cffi_restore_errno(); + { result = EvaluateMcoll(x0, x1, x2, x3, x4, x5, x6); } + _cffi_save_errno(); + Py_END_ALLOW_THREADS + + (void)self; /* unused */ + pyresult = _cffi_from_c_double(result); + return pyresult; +} +#else +# define _cffi_f_EvaluateMcoll _cffi_d_EvaluateMcoll +#endif + +static double _cffi_d_EvaluateNhalo(double x0, double x1, double x2, double x3, double x4, double x5, double x6) +{ + return EvaluateNhalo(x0, x1, x2, x3, x4, x5, x6); +} +#ifndef PYPY_VERSION +static PyObject * +_cffi_f_EvaluateNhalo(PyObject *self, PyObject *args) +{ + double x0; + double x1; + double x2; + double x3; + double x4; + double x5; + double x6; + double result; + PyObject *pyresult; + PyObject *arg0; + PyObject *arg1; + PyObject *arg2; + PyObject *arg3; + PyObject *arg4; + PyObject *arg5; + PyObject *arg6; + + if (!PyArg_UnpackTuple(args, "EvaluateNhalo", 7, 7, &arg0, &arg1, &arg2, &arg3, &arg4, &arg5, &arg6)) + return NULL; + + x0 = (double)_cffi_to_c_double(arg0); + if (x0 == (double)-1 && PyErr_Occurred()) + return NULL; + + x1 = (double)_cffi_to_c_double(arg1); + if (x1 == (double)-1 && PyErr_Occurred()) + return NULL; + + x2 = (double)_cffi_to_c_double(arg2); + if (x2 == (double)-1 && PyErr_Occurred()) + return NULL; + + x3 = (double)_cffi_to_c_double(arg3); + if (x3 == (double)-1 && PyErr_Occurred()) + return NULL; + + x4 = (double)_cffi_to_c_double(arg4); + if (x4 == (double)-1 && PyErr_Occurred()) + return NULL; + + x5 = (double)_cffi_to_c_double(arg5); + if (x5 == (double)-1 && PyErr_Occurred()) + return NULL; + + x6 = (double)_cffi_to_c_double(arg6); + if (x6 == (double)-1 && PyErr_Occurred()) + return NULL; + + Py_BEGIN_ALLOW_THREADS + _cffi_restore_errno(); + { result = EvaluateNhalo(x0, x1, x2, x3, x4, x5, x6); } + _cffi_save_errno(); + Py_END_ALLOW_THREADS + + (void)self; /* unused */ + pyresult = _cffi_from_c_double(result); + return pyresult; +} +#else +# define _cffi_f_EvaluateNhalo _cffi_d_EvaluateNhalo +#endif + +static double _cffi_d_EvaluateNhaloInv(double x0, double x1) +{ + return EvaluateNhaloInv(x0, x1); +} +#ifndef PYPY_VERSION +static PyObject * +_cffi_f_EvaluateNhaloInv(PyObject *self, PyObject *args) +{ + double x0; + double x1; + double result; + PyObject *pyresult; + PyObject *arg0; + PyObject *arg1; + + if (!PyArg_UnpackTuple(args, "EvaluateNhaloInv", 2, 2, &arg0, &arg1)) + return NULL; + + x0 = (double)_cffi_to_c_double(arg0); + if (x0 == (double)-1 && PyErr_Occurred()) + return NULL; + + x1 = (double)_cffi_to_c_double(arg1); + if (x1 == (double)-1 && PyErr_Occurred()) + return NULL; + + Py_BEGIN_ALLOW_THREADS + _cffi_restore_errno(); + { result = EvaluateNhaloInv(x0, x1); } + _cffi_save_errno(); + Py_END_ALLOW_THREADS + + (void)self; /* unused */ + pyresult = _cffi_from_c_double(result); + return pyresult; +} +#else +# define _cffi_f_EvaluateNhaloInv _cffi_d_EvaluateNhaloInv +#endif + +static double _cffi_d_EvaluateNionTs(double x0, double x1, double x2) +{ + return EvaluateNionTs(x0, x1, x2); +} +#ifndef PYPY_VERSION +static PyObject * +_cffi_f_EvaluateNionTs(PyObject *self, PyObject *args) +{ + double x0; + double x1; + double x2; + double result; + PyObject *pyresult; + PyObject *arg0; + PyObject *arg1; + PyObject *arg2; + + if (!PyArg_UnpackTuple(args, "EvaluateNionTs", 3, 3, &arg0, &arg1, &arg2)) + return NULL; + + x0 = (double)_cffi_to_c_double(arg0); + if (x0 == (double)-1 && PyErr_Occurred()) + return NULL; + + x1 = (double)_cffi_to_c_double(arg1); + if (x1 == (double)-1 && PyErr_Occurred()) + return NULL; + + x2 = (double)_cffi_to_c_double(arg2); + if (x2 == (double)-1 && PyErr_Occurred()) + return NULL; + + Py_BEGIN_ALLOW_THREADS + _cffi_restore_errno(); + { result = EvaluateNionTs(x0, x1, x2); } + _cffi_save_errno(); + Py_END_ALLOW_THREADS + + (void)self; /* unused */ + pyresult = _cffi_from_c_double(result); + return pyresult; +} +#else +# define _cffi_f_EvaluateNionTs _cffi_d_EvaluateNionTs +#endif + +static double _cffi_d_EvaluateNionTs_MINI(double x0, double x1, double x2, double x3) +{ + return EvaluateNionTs_MINI(x0, x1, x2, x3); +} +#ifndef PYPY_VERSION +static PyObject * +_cffi_f_EvaluateNionTs_MINI(PyObject *self, PyObject *args) +{ + double x0; + double x1; + double x2; + double x3; + double result; + PyObject *pyresult; + PyObject *arg0; + PyObject *arg1; + PyObject *arg2; + PyObject *arg3; + + if (!PyArg_UnpackTuple(args, "EvaluateNionTs_MINI", 4, 4, &arg0, &arg1, &arg2, &arg3)) + return NULL; + + x0 = (double)_cffi_to_c_double(arg0); + if (x0 == (double)-1 && PyErr_Occurred()) + return NULL; + + x1 = (double)_cffi_to_c_double(arg1); + if (x1 == (double)-1 && PyErr_Occurred()) + return NULL; + + x2 = (double)_cffi_to_c_double(arg2); + if (x2 == (double)-1 && PyErr_Occurred()) + return NULL; + + x3 = (double)_cffi_to_c_double(arg3); + if (x3 == (double)-1 && PyErr_Occurred()) + return NULL; + + Py_BEGIN_ALLOW_THREADS + _cffi_restore_errno(); + { result = EvaluateNionTs_MINI(x0, x1, x2, x3); } + _cffi_save_errno(); + Py_END_ALLOW_THREADS + + (void)self; /* unused */ + pyresult = _cffi_from_c_double(result); + return pyresult; +} +#else +# define _cffi_f_EvaluateNionTs_MINI _cffi_d_EvaluateNionTs_MINI +#endif + +static double _cffi_d_EvaluateNion_Conditional(double x0, double x1, double x2, double x3, double x4, double x5, double x6, double x7, double x8, _Bool x9) +{ + return EvaluateNion_Conditional(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9); +} +#ifndef PYPY_VERSION +static PyObject * +_cffi_f_EvaluateNion_Conditional(PyObject *self, PyObject *args) +{ + double x0; + double x1; + double x2; + double x3; + double x4; + double x5; + double x6; + double x7; + double x8; + _Bool x9; + double result; + PyObject *pyresult; + PyObject *arg0; + PyObject *arg1; + PyObject *arg2; + PyObject *arg3; + PyObject *arg4; + PyObject *arg5; + PyObject *arg6; + PyObject *arg7; + PyObject *arg8; + PyObject *arg9; + + if (!PyArg_UnpackTuple(args, "EvaluateNion_Conditional", 10, 10, &arg0, &arg1, &arg2, &arg3, &arg4, &arg5, &arg6, &arg7, &arg8, &arg9)) + return NULL; + + x0 = (double)_cffi_to_c_double(arg0); + if (x0 == (double)-1 && PyErr_Occurred()) + return NULL; + + x1 = (double)_cffi_to_c_double(arg1); + if (x1 == (double)-1 && PyErr_Occurred()) + return NULL; + + x2 = (double)_cffi_to_c_double(arg2); + if (x2 == (double)-1 && PyErr_Occurred()) + return NULL; + + x3 = (double)_cffi_to_c_double(arg3); + if (x3 == (double)-1 && PyErr_Occurred()) + return NULL; + + x4 = (double)_cffi_to_c_double(arg4); + if (x4 == (double)-1 && PyErr_Occurred()) + return NULL; + + x5 = (double)_cffi_to_c_double(arg5); + if (x5 == (double)-1 && PyErr_Occurred()) + return NULL; + + x6 = (double)_cffi_to_c_double(arg6); + if (x6 == (double)-1 && PyErr_Occurred()) + return NULL; + + x7 = (double)_cffi_to_c_double(arg7); + if (x7 == (double)-1 && PyErr_Occurred()) + return NULL; + + x8 = (double)_cffi_to_c_double(arg8); + if (x8 == (double)-1 && PyErr_Occurred()) + return NULL; + + x9 = (_Bool)_cffi_to_c__Bool(arg9); + if (x9 == (_Bool)-1 && PyErr_Occurred()) + return NULL; + + Py_BEGIN_ALLOW_THREADS + _cffi_restore_errno(); + { result = EvaluateNion_Conditional(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9); } + _cffi_save_errno(); + Py_END_ALLOW_THREADS + + (void)self; /* unused */ + pyresult = _cffi_from_c_double(result); + return pyresult; +} +#else +# define _cffi_f_EvaluateNion_Conditional _cffi_d_EvaluateNion_Conditional +#endif + +static double _cffi_d_EvaluateNion_Conditional_MINI(double x0, double x1, double x2, double x3, double x4, double x5, double x6, double x7, double x8, double x9, _Bool x10) +{ + return EvaluateNion_Conditional_MINI(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10); +} +#ifndef PYPY_VERSION +static PyObject * +_cffi_f_EvaluateNion_Conditional_MINI(PyObject *self, PyObject *args) +{ + double x0; + double x1; + double x2; + double x3; + double x4; + double x5; + double x6; + double x7; + double x8; + double x9; + _Bool x10; + double result; + PyObject *pyresult; + PyObject *arg0; + PyObject *arg1; + PyObject *arg2; + PyObject *arg3; + PyObject *arg4; + PyObject *arg5; + PyObject *arg6; + PyObject *arg7; + PyObject *arg8; + PyObject *arg9; + PyObject *arg10; + + if (!PyArg_UnpackTuple(args, "EvaluateNion_Conditional_MINI", 11, 11, &arg0, &arg1, &arg2, &arg3, &arg4, &arg5, &arg6, &arg7, &arg8, &arg9, &arg10)) + return NULL; + + x0 = (double)_cffi_to_c_double(arg0); + if (x0 == (double)-1 && PyErr_Occurred()) + return NULL; + + x1 = (double)_cffi_to_c_double(arg1); + if (x1 == (double)-1 && PyErr_Occurred()) + return NULL; + + x2 = (double)_cffi_to_c_double(arg2); + if (x2 == (double)-1 && PyErr_Occurred()) + return NULL; + + x3 = (double)_cffi_to_c_double(arg3); + if (x3 == (double)-1 && PyErr_Occurred()) + return NULL; + + x4 = (double)_cffi_to_c_double(arg4); + if (x4 == (double)-1 && PyErr_Occurred()) + return NULL; + + x5 = (double)_cffi_to_c_double(arg5); + if (x5 == (double)-1 && PyErr_Occurred()) + return NULL; + + x6 = (double)_cffi_to_c_double(arg6); + if (x6 == (double)-1 && PyErr_Occurred()) + return NULL; + + x7 = (double)_cffi_to_c_double(arg7); + if (x7 == (double)-1 && PyErr_Occurred()) + return NULL; + + x8 = (double)_cffi_to_c_double(arg8); + if (x8 == (double)-1 && PyErr_Occurred()) + return NULL; + + x9 = (double)_cffi_to_c_double(arg9); + if (x9 == (double)-1 && PyErr_Occurred()) + return NULL; + + x10 = (_Bool)_cffi_to_c__Bool(arg10); + if (x10 == (_Bool)-1 && PyErr_Occurred()) + return NULL; + + Py_BEGIN_ALLOW_THREADS + _cffi_restore_errno(); + { result = EvaluateNion_Conditional_MINI(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10); } + _cffi_save_errno(); + Py_END_ALLOW_THREADS + + (void)self; /* unused */ + pyresult = _cffi_from_c_double(result); + return pyresult; +} +#else +# define _cffi_f_EvaluateNion_Conditional_MINI _cffi_d_EvaluateNion_Conditional_MINI +#endif + +static double _cffi_d_EvaluateSFRD(double x0, double x1) +{ + return EvaluateSFRD(x0, x1); +} +#ifndef PYPY_VERSION +static PyObject * +_cffi_f_EvaluateSFRD(PyObject *self, PyObject *args) +{ + double x0; + double x1; + double result; + PyObject *pyresult; + PyObject *arg0; + PyObject *arg1; + + if (!PyArg_UnpackTuple(args, "EvaluateSFRD", 2, 2, &arg0, &arg1)) + return NULL; + + x0 = (double)_cffi_to_c_double(arg0); + if (x0 == (double)-1 && PyErr_Occurred()) + return NULL; + + x1 = (double)_cffi_to_c_double(arg1); + if (x1 == (double)-1 && PyErr_Occurred()) + return NULL; + + Py_BEGIN_ALLOW_THREADS + _cffi_restore_errno(); + { result = EvaluateSFRD(x0, x1); } + _cffi_save_errno(); + Py_END_ALLOW_THREADS + + (void)self; /* unused */ + pyresult = _cffi_from_c_double(result); + return pyresult; +} +#else +# define _cffi_f_EvaluateSFRD _cffi_d_EvaluateSFRD +#endif + +static double _cffi_d_EvaluateSFRD_Conditional(double x0, double x1, double x2, double x3, double x4, double x5, double x6, double x7) +{ + return EvaluateSFRD_Conditional(x0, x1, x2, x3, x4, x5, x6, x7); +} +#ifndef PYPY_VERSION +static PyObject * +_cffi_f_EvaluateSFRD_Conditional(PyObject *self, PyObject *args) +{ + double x0; + double x1; + double x2; + double x3; + double x4; + double x5; + double x6; + double x7; + double result; + PyObject *pyresult; + PyObject *arg0; + PyObject *arg1; + PyObject *arg2; + PyObject *arg3; + PyObject *arg4; + PyObject *arg5; + PyObject *arg6; + PyObject *arg7; + + if (!PyArg_UnpackTuple(args, "EvaluateSFRD_Conditional", 8, 8, &arg0, &arg1, &arg2, &arg3, &arg4, &arg5, &arg6, &arg7)) + return NULL; + + x0 = (double)_cffi_to_c_double(arg0); + if (x0 == (double)-1 && PyErr_Occurred()) + return NULL; + + x1 = (double)_cffi_to_c_double(arg1); + if (x1 == (double)-1 && PyErr_Occurred()) + return NULL; + + x2 = (double)_cffi_to_c_double(arg2); + if (x2 == (double)-1 && PyErr_Occurred()) + return NULL; + + x3 = (double)_cffi_to_c_double(arg3); + if (x3 == (double)-1 && PyErr_Occurred()) + return NULL; + + x4 = (double)_cffi_to_c_double(arg4); + if (x4 == (double)-1 && PyErr_Occurred()) + return NULL; + + x5 = (double)_cffi_to_c_double(arg5); + if (x5 == (double)-1 && PyErr_Occurred()) + return NULL; + + x6 = (double)_cffi_to_c_double(arg6); + if (x6 == (double)-1 && PyErr_Occurred()) + return NULL; + + x7 = (double)_cffi_to_c_double(arg7); + if (x7 == (double)-1 && PyErr_Occurred()) + return NULL; + + Py_BEGIN_ALLOW_THREADS + _cffi_restore_errno(); + { result = EvaluateSFRD_Conditional(x0, x1, x2, x3, x4, x5, x6, x7); } + _cffi_save_errno(); + Py_END_ALLOW_THREADS + + (void)self; /* unused */ + pyresult = _cffi_from_c_double(result); + return pyresult; +} +#else +# define _cffi_f_EvaluateSFRD_Conditional _cffi_d_EvaluateSFRD_Conditional +#endif + +static double _cffi_d_EvaluateSFRD_Conditional_MINI(double x0, double x1, double x2, double x3, double x4, double x5, double x6, double x7, double x8) +{ + return EvaluateSFRD_Conditional_MINI(x0, x1, x2, x3, x4, x5, x6, x7, x8); +} +#ifndef PYPY_VERSION +static PyObject * +_cffi_f_EvaluateSFRD_Conditional_MINI(PyObject *self, PyObject *args) +{ + double x0; + double x1; + double x2; + double x3; + double x4; + double x5; + double x6; + double x7; + double x8; + double result; + PyObject *pyresult; + PyObject *arg0; + PyObject *arg1; + PyObject *arg2; + PyObject *arg3; + PyObject *arg4; + PyObject *arg5; + PyObject *arg6; + PyObject *arg7; + PyObject *arg8; + + if (!PyArg_UnpackTuple(args, "EvaluateSFRD_Conditional_MINI", 9, 9, &arg0, &arg1, &arg2, &arg3, &arg4, &arg5, &arg6, &arg7, &arg8)) + return NULL; + + x0 = (double)_cffi_to_c_double(arg0); + if (x0 == (double)-1 && PyErr_Occurred()) + return NULL; + + x1 = (double)_cffi_to_c_double(arg1); + if (x1 == (double)-1 && PyErr_Occurred()) + return NULL; + + x2 = (double)_cffi_to_c_double(arg2); + if (x2 == (double)-1 && PyErr_Occurred()) + return NULL; + + x3 = (double)_cffi_to_c_double(arg3); + if (x3 == (double)-1 && PyErr_Occurred()) + return NULL; + + x4 = (double)_cffi_to_c_double(arg4); + if (x4 == (double)-1 && PyErr_Occurred()) + return NULL; + + x5 = (double)_cffi_to_c_double(arg5); + if (x5 == (double)-1 && PyErr_Occurred()) + return NULL; + + x6 = (double)_cffi_to_c_double(arg6); + if (x6 == (double)-1 && PyErr_Occurred()) + return NULL; + + x7 = (double)_cffi_to_c_double(arg7); + if (x7 == (double)-1 && PyErr_Occurred()) + return NULL; + + x8 = (double)_cffi_to_c_double(arg8); + if (x8 == (double)-1 && PyErr_Occurred()) + return NULL; + + Py_BEGIN_ALLOW_THREADS + _cffi_restore_errno(); + { result = EvaluateSFRD_Conditional_MINI(x0, x1, x2, x3, x4, x5, x6, x7, x8); } + _cffi_save_errno(); + Py_END_ALLOW_THREADS + + (void)self; /* unused */ + pyresult = _cffi_from_c_double(result); + return pyresult; +} +#else +# define _cffi_f_EvaluateSFRD_Conditional_MINI _cffi_d_EvaluateSFRD_Conditional_MINI +#endif + +static double _cffi_d_EvaluateSFRD_MINI(double x0, double x1, double x2) +{ + return EvaluateSFRD_MINI(x0, x1, x2); +} +#ifndef PYPY_VERSION +static PyObject * +_cffi_f_EvaluateSFRD_MINI(PyObject *self, PyObject *args) +{ + double x0; + double x1; + double x2; + double result; + PyObject *pyresult; + PyObject *arg0; + PyObject *arg1; + PyObject *arg2; + + if (!PyArg_UnpackTuple(args, "EvaluateSFRD_MINI", 3, 3, &arg0, &arg1, &arg2)) + return NULL; + + x0 = (double)_cffi_to_c_double(arg0); + if (x0 == (double)-1 && PyErr_Occurred()) + return NULL; + + x1 = (double)_cffi_to_c_double(arg1); + if (x1 == (double)-1 && PyErr_Occurred()) + return NULL; + + x2 = (double)_cffi_to_c_double(arg2); + if (x2 == (double)-1 && PyErr_Occurred()) + return NULL; + + Py_BEGIN_ALLOW_THREADS + _cffi_restore_errno(); + { result = EvaluateSFRD_MINI(x0, x1, x2); } + _cffi_save_errno(); + Py_END_ALLOW_THREADS + + (void)self; /* unused */ + pyresult = _cffi_from_c_double(result); + return pyresult; +} +#else +# define _cffi_f_EvaluateSFRD_MINI _cffi_d_EvaluateSFRD_MINI +#endif + +static double _cffi_d_EvaluateSigma(double x0) +{ + return EvaluateSigma(x0); +} +#ifndef PYPY_VERSION +static PyObject * +_cffi_f_EvaluateSigma(PyObject *self, PyObject *arg0) +{ + double x0; + double result; + PyObject *pyresult; + + x0 = (double)_cffi_to_c_double(arg0); + if (x0 == (double)-1 && PyErr_Occurred()) + return NULL; + + Py_BEGIN_ALLOW_THREADS + _cffi_restore_errno(); + { result = EvaluateSigma(x0); } + _cffi_save_errno(); + Py_END_ALLOW_THREADS + + (void)self; /* unused */ + pyresult = _cffi_from_c_double(result); + return pyresult; +} +#else +# define _cffi_f_EvaluateSigma _cffi_d_EvaluateSigma +#endif + +static double _cffi_d_EvaluatedFcolldz(double x0, double x1, double x2, double x3) +{ + return EvaluatedFcolldz(x0, x1, x2, x3); +} +#ifndef PYPY_VERSION +static PyObject * +_cffi_f_EvaluatedFcolldz(PyObject *self, PyObject *args) +{ + double x0; + double x1; + double x2; + double x3; + double result; + PyObject *pyresult; + PyObject *arg0; + PyObject *arg1; + PyObject *arg2; + PyObject *arg3; + + if (!PyArg_UnpackTuple(args, "EvaluatedFcolldz", 4, 4, &arg0, &arg1, &arg2, &arg3)) + return NULL; + + x0 = (double)_cffi_to_c_double(arg0); + if (x0 == (double)-1 && PyErr_Occurred()) + return NULL; + + x1 = (double)_cffi_to_c_double(arg1); + if (x1 == (double)-1 && PyErr_Occurred()) + return NULL; + + x2 = (double)_cffi_to_c_double(arg2); + if (x2 == (double)-1 && PyErr_Occurred()) + return NULL; + + x3 = (double)_cffi_to_c_double(arg3); + if (x3 == (double)-1 && PyErr_Occurred()) + return NULL; + + Py_BEGIN_ALLOW_THREADS + _cffi_restore_errno(); + { result = EvaluatedFcolldz(x0, x1, x2, x3); } + _cffi_save_errno(); + Py_END_ALLOW_THREADS + + (void)self; /* unused */ + pyresult = _cffi_from_c_double(result); + return pyresult; +} +#else +# define _cffi_f_EvaluatedFcolldz _cffi_d_EvaluatedFcolldz +#endif + +static double _cffi_d_EvaluatedSigmasqdm(double x0) +{ + return EvaluatedSigmasqdm(x0); +} +#ifndef PYPY_VERSION +static PyObject * +_cffi_f_EvaluatedSigmasqdm(PyObject *self, PyObject *arg0) +{ + double x0; + double result; + PyObject *pyresult; + + x0 = (double)_cffi_to_c_double(arg0); + if (x0 == (double)-1 && PyErr_Occurred()) + return NULL; + + Py_BEGIN_ALLOW_THREADS + _cffi_restore_errno(); + { result = EvaluatedSigmasqdm(x0); } + _cffi_save_errno(); + Py_END_ALLOW_THREADS + + (void)self; /* unused */ + pyresult = _cffi_from_c_double(result); + return pyresult; +} +#else +# define _cffi_f_EvaluatedSigmasqdm _cffi_d_EvaluatedSigmasqdm +#endif + +static double _cffi_d_Fcoll_General(double x0, double x1, double x2) +{ + return Fcoll_General(x0, x1, x2); +} +#ifndef PYPY_VERSION +static PyObject * +_cffi_f_Fcoll_General(PyObject *self, PyObject *args) +{ + double x0; + double x1; + double x2; + double result; + PyObject *pyresult; + PyObject *arg0; + PyObject *arg1; + PyObject *arg2; + + if (!PyArg_UnpackTuple(args, "Fcoll_General", 3, 3, &arg0, &arg1, &arg2)) + return NULL; + + x0 = (double)_cffi_to_c_double(arg0); + if (x0 == (double)-1 && PyErr_Occurred()) + return NULL; + + x1 = (double)_cffi_to_c_double(arg1); + if (x1 == (double)-1 && PyErr_Occurred()) + return NULL; + + x2 = (double)_cffi_to_c_double(arg2); + if (x2 == (double)-1 && PyErr_Occurred()) + return NULL; + + Py_BEGIN_ALLOW_THREADS + _cffi_restore_errno(); + { result = Fcoll_General(x0, x1, x2); } + _cffi_save_errno(); + Py_END_ALLOW_THREADS + + (void)self; /* unused */ + pyresult = _cffi_from_c_double(result); + return pyresult; +} +#else +# define _cffi_f_Fcoll_General _cffi_d_Fcoll_General +#endif + +static void _cffi_d_FreePhotonConsMemory(void) +{ + FreePhotonConsMemory(); +} +#ifndef PYPY_VERSION +static PyObject * +_cffi_f_FreePhotonConsMemory(PyObject *self, PyObject *noarg) +{ + + Py_BEGIN_ALLOW_THREADS + _cffi_restore_errno(); + { FreePhotonConsMemory(); } + _cffi_save_errno(); + Py_END_ALLOW_THREADS + + (void)self; /* unused */ + (void)noarg; /* unused */ + Py_INCREF(Py_None); + return Py_None; +} +#else +# define _cffi_f_FreePhotonConsMemory _cffi_d_FreePhotonConsMemory +#endif + +static int _cffi_d_FunctionThatCatches(_Bool x0, _Bool x1, double * x2) +{ + return FunctionThatCatches(x0, x1, x2); +} +#ifndef PYPY_VERSION +static PyObject * +_cffi_f_FunctionThatCatches(PyObject *self, PyObject *args) +{ + _Bool x0; + _Bool x1; + double * x2; + Py_ssize_t datasize; + struct _cffi_freeme_s *large_args_free = NULL; + int result; + PyObject *pyresult; + PyObject *arg0; + PyObject *arg1; + PyObject *arg2; + + if (!PyArg_UnpackTuple(args, "FunctionThatCatches", 3, 3, &arg0, &arg1, &arg2)) + return NULL; + + x0 = (_Bool)_cffi_to_c__Bool(arg0); + if (x0 == (_Bool)-1 && PyErr_Occurred()) + return NULL; + + x1 = (_Bool)_cffi_to_c__Bool(arg1); + if (x1 == (_Bool)-1 && PyErr_Occurred()) + return NULL; + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(207), arg2, (char **)&x2); + if (datasize != 0) { + x2 = ((size_t)datasize) <= 640 ? (double *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(207), arg2, (char **)&x2, + datasize, &large_args_free) < 0) + return NULL; + } + + Py_BEGIN_ALLOW_THREADS + _cffi_restore_errno(); + { result = FunctionThatCatches(x0, x1, x2); } + _cffi_save_errno(); + Py_END_ALLOW_THREADS + + (void)self; /* unused */ + pyresult = _cffi_from_c_int(result, int); + if (large_args_free != NULL) _cffi_free_array_arguments(large_args_free); + return pyresult; +} +#else +# define _cffi_f_FunctionThatCatches _cffi_d_FunctionThatCatches +#endif + +static void _cffi_d_FunctionThatThrows(void) +{ + FunctionThatThrows(); +} +#ifndef PYPY_VERSION +static PyObject * +_cffi_f_FunctionThatThrows(PyObject *self, PyObject *noarg) +{ + + Py_BEGIN_ALLOW_THREADS + _cffi_restore_errno(); + { FunctionThatThrows(); } + _cffi_save_errno(); + Py_END_ALLOW_THREADS + + (void)self; /* unused */ + (void)noarg; /* unused */ + Py_INCREF(Py_None); + return Py_None; +} +#else +# define _cffi_f_FunctionThatThrows _cffi_d_FunctionThatThrows +#endif + +static int _cffi_d_InitialisePhotonCons(UserParams * x0, CosmoParams * x1, AstroParams * x2, FlagOptions * x3) +{ + return InitialisePhotonCons(x0, x1, x2, x3); +} +#ifndef PYPY_VERSION +static PyObject * +_cffi_f_InitialisePhotonCons(PyObject *self, PyObject *args) +{ + UserParams * x0; + CosmoParams * x1; + AstroParams * x2; + FlagOptions * x3; + Py_ssize_t datasize; + struct _cffi_freeme_s *large_args_free = NULL; + int result; + PyObject *pyresult; + PyObject *arg0; + PyObject *arg1; + PyObject *arg2; + PyObject *arg3; + + if (!PyArg_UnpackTuple(args, "InitialisePhotonCons", 4, 4, &arg0, &arg1, &arg2, &arg3)) + return NULL; + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(5), arg0, (char **)&x0); + if (datasize != 0) { + x0 = ((size_t)datasize) <= 640 ? (UserParams *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(5), arg0, (char **)&x0, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(6), arg1, (char **)&x1); + if (datasize != 0) { + x1 = ((size_t)datasize) <= 640 ? (CosmoParams *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(6), arg1, (char **)&x1, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(7), arg2, (char **)&x2); + if (datasize != 0) { + x2 = ((size_t)datasize) <= 640 ? (AstroParams *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(7), arg2, (char **)&x2, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(8), arg3, (char **)&x3); + if (datasize != 0) { + x3 = ((size_t)datasize) <= 640 ? (FlagOptions *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(8), arg3, (char **)&x3, + datasize, &large_args_free) < 0) + return NULL; + } + + Py_BEGIN_ALLOW_THREADS + _cffi_restore_errno(); + { result = InitialisePhotonCons(x0, x1, x2, x3); } + _cffi_save_errno(); + Py_END_ALLOW_THREADS + + (void)self; /* unused */ + pyresult = _cffi_from_c_int(result, int); + if (large_args_free != NULL) _cffi_free_array_arguments(large_args_free); + return pyresult; +} +#else +# define _cffi_f_InitialisePhotonCons _cffi_d_InitialisePhotonCons +#endif + +static double _cffi_d_Mcoll_Conditional(double x0, double x1, double x2, double x3, double x4, double x5, int x6) +{ + return Mcoll_Conditional(x0, x1, x2, x3, x4, x5, x6); +} +#ifndef PYPY_VERSION +static PyObject * +_cffi_f_Mcoll_Conditional(PyObject *self, PyObject *args) +{ + double x0; + double x1; + double x2; + double x3; + double x4; + double x5; + int x6; + double result; + PyObject *pyresult; + PyObject *arg0; + PyObject *arg1; + PyObject *arg2; + PyObject *arg3; + PyObject *arg4; + PyObject *arg5; + PyObject *arg6; + + if (!PyArg_UnpackTuple(args, "Mcoll_Conditional", 7, 7, &arg0, &arg1, &arg2, &arg3, &arg4, &arg5, &arg6)) + return NULL; + + x0 = (double)_cffi_to_c_double(arg0); + if (x0 == (double)-1 && PyErr_Occurred()) + return NULL; + + x1 = (double)_cffi_to_c_double(arg1); + if (x1 == (double)-1 && PyErr_Occurred()) + return NULL; + + x2 = (double)_cffi_to_c_double(arg2); + if (x2 == (double)-1 && PyErr_Occurred()) + return NULL; + + x3 = (double)_cffi_to_c_double(arg3); + if (x3 == (double)-1 && PyErr_Occurred()) + return NULL; + + x4 = (double)_cffi_to_c_double(arg4); + if (x4 == (double)-1 && PyErr_Occurred()) + return NULL; + + x5 = (double)_cffi_to_c_double(arg5); + if (x5 == (double)-1 && PyErr_Occurred()) + return NULL; + + x6 = _cffi_to_c_int(arg6, int); + if (x6 == (int)-1 && PyErr_Occurred()) + return NULL; + + Py_BEGIN_ALLOW_THREADS + _cffi_restore_errno(); + { result = Mcoll_Conditional(x0, x1, x2, x3, x4, x5, x6); } + _cffi_save_errno(); + Py_END_ALLOW_THREADS + + (void)self; /* unused */ + pyresult = _cffi_from_c_double(result); + return pyresult; +} +#else +# define _cffi_f_Mcoll_Conditional _cffi_d_Mcoll_Conditional +#endif + +static double _cffi_d_Nhalo_Conditional(double x0, double x1, double x2, double x3, double x4, double x5, int x6) +{ + return Nhalo_Conditional(x0, x1, x2, x3, x4, x5, x6); +} +#ifndef PYPY_VERSION +static PyObject * +_cffi_f_Nhalo_Conditional(PyObject *self, PyObject *args) +{ + double x0; + double x1; + double x2; + double x3; + double x4; + double x5; + int x6; + double result; + PyObject *pyresult; + PyObject *arg0; + PyObject *arg1; + PyObject *arg2; + PyObject *arg3; + PyObject *arg4; + PyObject *arg5; + PyObject *arg6; + + if (!PyArg_UnpackTuple(args, "Nhalo_Conditional", 7, 7, &arg0, &arg1, &arg2, &arg3, &arg4, &arg5, &arg6)) + return NULL; + + x0 = (double)_cffi_to_c_double(arg0); + if (x0 == (double)-1 && PyErr_Occurred()) + return NULL; + + x1 = (double)_cffi_to_c_double(arg1); + if (x1 == (double)-1 && PyErr_Occurred()) + return NULL; + + x2 = (double)_cffi_to_c_double(arg2); + if (x2 == (double)-1 && PyErr_Occurred()) + return NULL; + + x3 = (double)_cffi_to_c_double(arg3); + if (x3 == (double)-1 && PyErr_Occurred()) + return NULL; + + x4 = (double)_cffi_to_c_double(arg4); + if (x4 == (double)-1 && PyErr_Occurred()) + return NULL; + + x5 = (double)_cffi_to_c_double(arg5); + if (x5 == (double)-1 && PyErr_Occurred()) + return NULL; + + x6 = _cffi_to_c_int(arg6, int); + if (x6 == (int)-1 && PyErr_Occurred()) + return NULL; + + Py_BEGIN_ALLOW_THREADS + _cffi_restore_errno(); + { result = Nhalo_Conditional(x0, x1, x2, x3, x4, x5, x6); } + _cffi_save_errno(); + Py_END_ALLOW_THREADS + + (void)self; /* unused */ + pyresult = _cffi_from_c_double(result); + return pyresult; +} +#else +# define _cffi_f_Nhalo_Conditional _cffi_d_Nhalo_Conditional +#endif + +static double _cffi_d_Nion_ConditionalM(double x0, double x1, double x2, double x3, double x4, double x5, double x6, double x7, double x8, double x9, double x10, double x11, double x12, int x13) +{ + return Nion_ConditionalM(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13); +} +#ifndef PYPY_VERSION +static PyObject * +_cffi_f_Nion_ConditionalM(PyObject *self, PyObject *args) +{ + double x0; + double x1; + double x2; + double x3; + double x4; + double x5; + double x6; + double x7; + double x8; + double x9; + double x10; + double x11; + double x12; + int x13; + double result; + PyObject *pyresult; + PyObject *arg0; + PyObject *arg1; + PyObject *arg2; + PyObject *arg3; + PyObject *arg4; + PyObject *arg5; + PyObject *arg6; + PyObject *arg7; + PyObject *arg8; + PyObject *arg9; + PyObject *arg10; + PyObject *arg11; + PyObject *arg12; + PyObject *arg13; + + if (!PyArg_UnpackTuple(args, "Nion_ConditionalM", 14, 14, &arg0, &arg1, &arg2, &arg3, &arg4, &arg5, &arg6, &arg7, &arg8, &arg9, &arg10, &arg11, &arg12, &arg13)) + return NULL; + + x0 = (double)_cffi_to_c_double(arg0); + if (x0 == (double)-1 && PyErr_Occurred()) + return NULL; + + x1 = (double)_cffi_to_c_double(arg1); + if (x1 == (double)-1 && PyErr_Occurred()) + return NULL; + + x2 = (double)_cffi_to_c_double(arg2); + if (x2 == (double)-1 && PyErr_Occurred()) + return NULL; + + x3 = (double)_cffi_to_c_double(arg3); + if (x3 == (double)-1 && PyErr_Occurred()) + return NULL; + + x4 = (double)_cffi_to_c_double(arg4); + if (x4 == (double)-1 && PyErr_Occurred()) + return NULL; + + x5 = (double)_cffi_to_c_double(arg5); + if (x5 == (double)-1 && PyErr_Occurred()) + return NULL; + + x6 = (double)_cffi_to_c_double(arg6); + if (x6 == (double)-1 && PyErr_Occurred()) + return NULL; + + x7 = (double)_cffi_to_c_double(arg7); + if (x7 == (double)-1 && PyErr_Occurred()) + return NULL; + + x8 = (double)_cffi_to_c_double(arg8); + if (x8 == (double)-1 && PyErr_Occurred()) + return NULL; + + x9 = (double)_cffi_to_c_double(arg9); + if (x9 == (double)-1 && PyErr_Occurred()) + return NULL; + + x10 = (double)_cffi_to_c_double(arg10); + if (x10 == (double)-1 && PyErr_Occurred()) + return NULL; + + x11 = (double)_cffi_to_c_double(arg11); + if (x11 == (double)-1 && PyErr_Occurred()) + return NULL; + + x12 = (double)_cffi_to_c_double(arg12); + if (x12 == (double)-1 && PyErr_Occurred()) + return NULL; + + x13 = _cffi_to_c_int(arg13, int); + if (x13 == (int)-1 && PyErr_Occurred()) + return NULL; + + Py_BEGIN_ALLOW_THREADS + _cffi_restore_errno(); + { result = Nion_ConditionalM(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13); } + _cffi_save_errno(); + Py_END_ALLOW_THREADS + + (void)self; /* unused */ + pyresult = _cffi_from_c_double(result); + return pyresult; +} +#else +# define _cffi_f_Nion_ConditionalM _cffi_d_Nion_ConditionalM +#endif + +static double _cffi_d_Nion_ConditionalM_MINI(double x0, double x1, double x2, double x3, double x4, double x5, double x6, double x7, double x8, double x9, double x10, double x11, double x12, double x13, int x14) +{ + return Nion_ConditionalM_MINI(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14); +} +#ifndef PYPY_VERSION +static PyObject * +_cffi_f_Nion_ConditionalM_MINI(PyObject *self, PyObject *args) +{ + double x0; + double x1; + double x2; + double x3; + double x4; + double x5; + double x6; + double x7; + double x8; + double x9; + double x10; + double x11; + double x12; + double x13; + int x14; + double result; + PyObject *pyresult; + PyObject *arg0; + PyObject *arg1; + PyObject *arg2; + PyObject *arg3; + PyObject *arg4; + PyObject *arg5; + PyObject *arg6; + PyObject *arg7; + PyObject *arg8; + PyObject *arg9; + PyObject *arg10; + PyObject *arg11; + PyObject *arg12; + PyObject *arg13; + PyObject *arg14; + + if (!PyArg_UnpackTuple(args, "Nion_ConditionalM_MINI", 15, 15, &arg0, &arg1, &arg2, &arg3, &arg4, &arg5, &arg6, &arg7, &arg8, &arg9, &arg10, &arg11, &arg12, &arg13, &arg14)) + return NULL; + + x0 = (double)_cffi_to_c_double(arg0); + if (x0 == (double)-1 && PyErr_Occurred()) + return NULL; + + x1 = (double)_cffi_to_c_double(arg1); + if (x1 == (double)-1 && PyErr_Occurred()) + return NULL; + + x2 = (double)_cffi_to_c_double(arg2); + if (x2 == (double)-1 && PyErr_Occurred()) + return NULL; + + x3 = (double)_cffi_to_c_double(arg3); + if (x3 == (double)-1 && PyErr_Occurred()) + return NULL; + + x4 = (double)_cffi_to_c_double(arg4); + if (x4 == (double)-1 && PyErr_Occurred()) + return NULL; + + x5 = (double)_cffi_to_c_double(arg5); + if (x5 == (double)-1 && PyErr_Occurred()) + return NULL; + + x6 = (double)_cffi_to_c_double(arg6); + if (x6 == (double)-1 && PyErr_Occurred()) + return NULL; + + x7 = (double)_cffi_to_c_double(arg7); + if (x7 == (double)-1 && PyErr_Occurred()) + return NULL; + + x8 = (double)_cffi_to_c_double(arg8); + if (x8 == (double)-1 && PyErr_Occurred()) + return NULL; + + x9 = (double)_cffi_to_c_double(arg9); + if (x9 == (double)-1 && PyErr_Occurred()) + return NULL; + + x10 = (double)_cffi_to_c_double(arg10); + if (x10 == (double)-1 && PyErr_Occurred()) + return NULL; + + x11 = (double)_cffi_to_c_double(arg11); + if (x11 == (double)-1 && PyErr_Occurred()) + return NULL; + + x12 = (double)_cffi_to_c_double(arg12); + if (x12 == (double)-1 && PyErr_Occurred()) + return NULL; + + x13 = (double)_cffi_to_c_double(arg13); + if (x13 == (double)-1 && PyErr_Occurred()) + return NULL; + + x14 = _cffi_to_c_int(arg14, int); + if (x14 == (int)-1 && PyErr_Occurred()) + return NULL; + + Py_BEGIN_ALLOW_THREADS + _cffi_restore_errno(); + { result = Nion_ConditionalM_MINI(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14); } + _cffi_save_errno(); + Py_END_ALLOW_THREADS + + (void)self; /* unused */ + pyresult = _cffi_from_c_double(result); + return pyresult; +} +#else +# define _cffi_f_Nion_ConditionalM_MINI _cffi_d_Nion_ConditionalM_MINI +#endif + +static double _cffi_d_Nion_General(double x0, double x1, double x2, double x3, double x4, double x5, double x6, double x7, double x8, double x9) +{ + return Nion_General(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9); +} +#ifndef PYPY_VERSION +static PyObject * +_cffi_f_Nion_General(PyObject *self, PyObject *args) +{ + double x0; + double x1; + double x2; + double x3; + double x4; + double x5; + double x6; + double x7; + double x8; + double x9; + double result; + PyObject *pyresult; + PyObject *arg0; + PyObject *arg1; + PyObject *arg2; + PyObject *arg3; + PyObject *arg4; + PyObject *arg5; + PyObject *arg6; + PyObject *arg7; + PyObject *arg8; + PyObject *arg9; + + if (!PyArg_UnpackTuple(args, "Nion_General", 10, 10, &arg0, &arg1, &arg2, &arg3, &arg4, &arg5, &arg6, &arg7, &arg8, &arg9)) + return NULL; + + x0 = (double)_cffi_to_c_double(arg0); + if (x0 == (double)-1 && PyErr_Occurred()) + return NULL; + + x1 = (double)_cffi_to_c_double(arg1); + if (x1 == (double)-1 && PyErr_Occurred()) + return NULL; + + x2 = (double)_cffi_to_c_double(arg2); + if (x2 == (double)-1 && PyErr_Occurred()) + return NULL; + + x3 = (double)_cffi_to_c_double(arg3); + if (x3 == (double)-1 && PyErr_Occurred()) + return NULL; + + x4 = (double)_cffi_to_c_double(arg4); + if (x4 == (double)-1 && PyErr_Occurred()) + return NULL; + + x5 = (double)_cffi_to_c_double(arg5); + if (x5 == (double)-1 && PyErr_Occurred()) + return NULL; + + x6 = (double)_cffi_to_c_double(arg6); + if (x6 == (double)-1 && PyErr_Occurred()) + return NULL; + + x7 = (double)_cffi_to_c_double(arg7); + if (x7 == (double)-1 && PyErr_Occurred()) + return NULL; + + x8 = (double)_cffi_to_c_double(arg8); + if (x8 == (double)-1 && PyErr_Occurred()) + return NULL; + + x9 = (double)_cffi_to_c_double(arg9); + if (x9 == (double)-1 && PyErr_Occurred()) + return NULL; + + Py_BEGIN_ALLOW_THREADS + _cffi_restore_errno(); + { result = Nion_General(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9); } + _cffi_save_errno(); + Py_END_ALLOW_THREADS + + (void)self; /* unused */ + pyresult = _cffi_from_c_double(result); + return pyresult; +} +#else +# define _cffi_f_Nion_General _cffi_d_Nion_General +#endif + +static double _cffi_d_Nion_General_MINI(double x0, double x1, double x2, double x3, double x4, double x5, double x6, double x7, double x8, double x9, double x10) +{ + return Nion_General_MINI(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10); +} +#ifndef PYPY_VERSION +static PyObject * +_cffi_f_Nion_General_MINI(PyObject *self, PyObject *args) +{ + double x0; + double x1; + double x2; + double x3; + double x4; + double x5; + double x6; + double x7; + double x8; + double x9; + double x10; + double result; + PyObject *pyresult; + PyObject *arg0; + PyObject *arg1; + PyObject *arg2; + PyObject *arg3; + PyObject *arg4; + PyObject *arg5; + PyObject *arg6; + PyObject *arg7; + PyObject *arg8; + PyObject *arg9; + PyObject *arg10; + + if (!PyArg_UnpackTuple(args, "Nion_General_MINI", 11, 11, &arg0, &arg1, &arg2, &arg3, &arg4, &arg5, &arg6, &arg7, &arg8, &arg9, &arg10)) + return NULL; + + x0 = (double)_cffi_to_c_double(arg0); + if (x0 == (double)-1 && PyErr_Occurred()) + return NULL; + + x1 = (double)_cffi_to_c_double(arg1); + if (x1 == (double)-1 && PyErr_Occurred()) + return NULL; + + x2 = (double)_cffi_to_c_double(arg2); + if (x2 == (double)-1 && PyErr_Occurred()) + return NULL; + + x3 = (double)_cffi_to_c_double(arg3); + if (x3 == (double)-1 && PyErr_Occurred()) + return NULL; + + x4 = (double)_cffi_to_c_double(arg4); + if (x4 == (double)-1 && PyErr_Occurred()) + return NULL; + + x5 = (double)_cffi_to_c_double(arg5); + if (x5 == (double)-1 && PyErr_Occurred()) + return NULL; + + x6 = (double)_cffi_to_c_double(arg6); + if (x6 == (double)-1 && PyErr_Occurred()) + return NULL; + + x7 = (double)_cffi_to_c_double(arg7); + if (x7 == (double)-1 && PyErr_Occurred()) + return NULL; + + x8 = (double)_cffi_to_c_double(arg8); + if (x8 == (double)-1 && PyErr_Occurred()) + return NULL; + + x9 = (double)_cffi_to_c_double(arg9); + if (x9 == (double)-1 && PyErr_Occurred()) + return NULL; + + x10 = (double)_cffi_to_c_double(arg10); + if (x10 == (double)-1 && PyErr_Occurred()) + return NULL; + + Py_BEGIN_ALLOW_THREADS + _cffi_restore_errno(); + { result = Nion_General_MINI(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10); } + _cffi_save_errno(); + Py_END_ALLOW_THREADS + + (void)self; /* unused */ + pyresult = _cffi_from_c_double(result); + return pyresult; +} +#else +# define _cffi_f_Nion_General_MINI _cffi_d_Nion_General_MINI +#endif + +static int _cffi_d_ObtainPhotonConsData(double * x0, double * x1, int * x2, double * x3, double * x4, int * x5, double * x6, double * x7, int * x8) +{ + return ObtainPhotonConsData(x0, x1, x2, x3, x4, x5, x6, x7, x8); +} +#ifndef PYPY_VERSION +static PyObject * +_cffi_f_ObtainPhotonConsData(PyObject *self, PyObject *args) +{ + double * x0; + double * x1; + int * x2; + double * x3; + double * x4; + int * x5; + double * x6; + double * x7; + int * x8; + Py_ssize_t datasize; + struct _cffi_freeme_s *large_args_free = NULL; + int result; + PyObject *pyresult; + PyObject *arg0; + PyObject *arg1; + PyObject *arg2; + PyObject *arg3; + PyObject *arg4; + PyObject *arg5; + PyObject *arg6; + PyObject *arg7; + PyObject *arg8; + + if (!PyArg_UnpackTuple(args, "ObtainPhotonConsData", 9, 9, &arg0, &arg1, &arg2, &arg3, &arg4, &arg5, &arg6, &arg7, &arg8)) + return NULL; + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(207), arg0, (char **)&x0); + if (datasize != 0) { + x0 = ((size_t)datasize) <= 640 ? (double *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(207), arg0, (char **)&x0, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(207), arg1, (char **)&x1); + if (datasize != 0) { + x1 = ((size_t)datasize) <= 640 ? (double *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(207), arg1, (char **)&x1, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(217), arg2, (char **)&x2); + if (datasize != 0) { + x2 = ((size_t)datasize) <= 640 ? (int *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(217), arg2, (char **)&x2, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(207), arg3, (char **)&x3); + if (datasize != 0) { + x3 = ((size_t)datasize) <= 640 ? (double *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(207), arg3, (char **)&x3, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(207), arg4, (char **)&x4); + if (datasize != 0) { + x4 = ((size_t)datasize) <= 640 ? (double *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(207), arg4, (char **)&x4, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(217), arg5, (char **)&x5); + if (datasize != 0) { + x5 = ((size_t)datasize) <= 640 ? (int *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(217), arg5, (char **)&x5, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(207), arg6, (char **)&x6); + if (datasize != 0) { + x6 = ((size_t)datasize) <= 640 ? (double *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(207), arg6, (char **)&x6, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(207), arg7, (char **)&x7); + if (datasize != 0) { + x7 = ((size_t)datasize) <= 640 ? (double *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(207), arg7, (char **)&x7, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(217), arg8, (char **)&x8); + if (datasize != 0) { + x8 = ((size_t)datasize) <= 640 ? (int *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(217), arg8, (char **)&x8, + datasize, &large_args_free) < 0) + return NULL; + } + + Py_BEGIN_ALLOW_THREADS + _cffi_restore_errno(); + { result = ObtainPhotonConsData(x0, x1, x2, x3, x4, x5, x6, x7, x8); } + _cffi_save_errno(); + Py_END_ALLOW_THREADS + + (void)self; /* unused */ + pyresult = _cffi_from_c_int(result, int); + if (large_args_free != NULL) _cffi_free_array_arguments(large_args_free); + return pyresult; +} +#else +# define _cffi_f_ObtainPhotonConsData _cffi_d_ObtainPhotonConsData +#endif + +static int _cffi_d_PhotonCons_Calibration(double * x0, double * x1, int x2) +{ + return PhotonCons_Calibration(x0, x1, x2); +} +#ifndef PYPY_VERSION +static PyObject * +_cffi_f_PhotonCons_Calibration(PyObject *self, PyObject *args) +{ + double * x0; + double * x1; + int x2; + Py_ssize_t datasize; + struct _cffi_freeme_s *large_args_free = NULL; + int result; + PyObject *pyresult; + PyObject *arg0; + PyObject *arg1; + PyObject *arg2; + + if (!PyArg_UnpackTuple(args, "PhotonCons_Calibration", 3, 3, &arg0, &arg1, &arg2)) + return NULL; + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(207), arg0, (char **)&x0); + if (datasize != 0) { + x0 = ((size_t)datasize) <= 640 ? (double *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(207), arg0, (char **)&x0, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(207), arg1, (char **)&x1); + if (datasize != 0) { + x1 = ((size_t)datasize) <= 640 ? (double *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(207), arg1, (char **)&x1, + datasize, &large_args_free) < 0) + return NULL; + } + + x2 = _cffi_to_c_int(arg2, int); + if (x2 == (int)-1 && PyErr_Occurred()) + return NULL; + + Py_BEGIN_ALLOW_THREADS + _cffi_restore_errno(); + { result = PhotonCons_Calibration(x0, x1, x2); } + _cffi_save_errno(); + Py_END_ALLOW_THREADS + + (void)self; /* unused */ + pyresult = _cffi_from_c_int(result, int); + if (large_args_free != NULL) _cffi_free_array_arguments(large_args_free); + return pyresult; +} +#else +# define _cffi_f_PhotonCons_Calibration _cffi_d_PhotonCons_Calibration +#endif + +static int _cffi_d_SomethingThatCatches(_Bool x0) +{ + return SomethingThatCatches(x0); +} +#ifndef PYPY_VERSION +static PyObject * +_cffi_f_SomethingThatCatches(PyObject *self, PyObject *arg0) +{ + _Bool x0; + int result; + PyObject *pyresult; + + x0 = (_Bool)_cffi_to_c__Bool(arg0); + if (x0 == (_Bool)-1 && PyErr_Occurred()) + return NULL; + + Py_BEGIN_ALLOW_THREADS + _cffi_restore_errno(); + { result = SomethingThatCatches(x0); } + _cffi_save_errno(); + Py_END_ALLOW_THREADS + + (void)self; /* unused */ + pyresult = _cffi_from_c_int(result, int); + return pyresult; +} +#else +# define _cffi_f_SomethingThatCatches _cffi_d_SomethingThatCatches +#endif + +static int _cffi_d_UpdateXraySourceBox(UserParams * x0, CosmoParams * x1, AstroParams * x2, FlagOptions * x3, HaloBox * x4, double x5, double x6, int x7, XraySourceBox * x8) +{ + return UpdateXraySourceBox(x0, x1, x2, x3, x4, x5, x6, x7, x8); +} +#ifndef PYPY_VERSION +static PyObject * +_cffi_f_UpdateXraySourceBox(PyObject *self, PyObject *args) +{ + UserParams * x0; + CosmoParams * x1; + AstroParams * x2; + FlagOptions * x3; + HaloBox * x4; + double x5; + double x6; + int x7; + XraySourceBox * x8; + Py_ssize_t datasize; + struct _cffi_freeme_s *large_args_free = NULL; + int result; + PyObject *pyresult; + PyObject *arg0; + PyObject *arg1; + PyObject *arg2; + PyObject *arg3; + PyObject *arg4; + PyObject *arg5; + PyObject *arg6; + PyObject *arg7; + PyObject *arg8; + + if (!PyArg_UnpackTuple(args, "UpdateXraySourceBox", 9, 9, &arg0, &arg1, &arg2, &arg3, &arg4, &arg5, &arg6, &arg7, &arg8)) + return NULL; + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(5), arg0, (char **)&x0); + if (datasize != 0) { + x0 = ((size_t)datasize) <= 640 ? (UserParams *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(5), arg0, (char **)&x0, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(6), arg1, (char **)&x1); + if (datasize != 0) { + x1 = ((size_t)datasize) <= 640 ? (CosmoParams *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(6), arg1, (char **)&x1, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(7), arg2, (char **)&x2); + if (datasize != 0) { + x2 = ((size_t)datasize) <= 640 ? (AstroParams *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(7), arg2, (char **)&x2, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(8), arg3, (char **)&x3); + if (datasize != 0) { + x3 = ((size_t)datasize) <= 640 ? (FlagOptions *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(8), arg3, (char **)&x3, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(192), arg4, (char **)&x4); + if (datasize != 0) { + x4 = ((size_t)datasize) <= 640 ? (HaloBox *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(192), arg4, (char **)&x4, + datasize, &large_args_free) < 0) + return NULL; + } + + x5 = (double)_cffi_to_c_double(arg5); + if (x5 == (double)-1 && PyErr_Occurred()) + return NULL; + + x6 = (double)_cffi_to_c_double(arg6); + if (x6 == (double)-1 && PyErr_Occurred()) + return NULL; + + x7 = _cffi_to_c_int(arg7, int); + if (x7 == (int)-1 && PyErr_Occurred()) + return NULL; + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(196), arg8, (char **)&x8); + if (datasize != 0) { + x8 = ((size_t)datasize) <= 640 ? (XraySourceBox *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(196), arg8, (char **)&x8, + datasize, &large_args_free) < 0) + return NULL; + } + + Py_BEGIN_ALLOW_THREADS + _cffi_restore_errno(); + { result = UpdateXraySourceBox(x0, x1, x2, x3, x4, x5, x6, x7, x8); } + _cffi_save_errno(); + Py_END_ALLOW_THREADS + + (void)self; /* unused */ + pyresult = _cffi_from_c_int(result, int); + if (large_args_free != NULL) _cffi_free_array_arguments(large_args_free); + return pyresult; +} +#else +# define _cffi_f_UpdateXraySourceBox _cffi_d_UpdateXraySourceBox +#endif + +static void _cffi_d_adjust_redshifts_for_photoncons(AstroParams * x0, FlagOptions * x1, float * x2, float * x3, float * x4) +{ + adjust_redshifts_for_photoncons(x0, x1, x2, x3, x4); +} +#ifndef PYPY_VERSION +static PyObject * +_cffi_f_adjust_redshifts_for_photoncons(PyObject *self, PyObject *args) +{ + AstroParams * x0; + FlagOptions * x1; + float * x2; + float * x3; + float * x4; + Py_ssize_t datasize; + struct _cffi_freeme_s *large_args_free = NULL; + PyObject *arg0; + PyObject *arg1; + PyObject *arg2; + PyObject *arg3; + PyObject *arg4; + + if (!PyArg_UnpackTuple(args, "adjust_redshifts_for_photoncons", 5, 5, &arg0, &arg1, &arg2, &arg3, &arg4)) + return NULL; + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(7), arg0, (char **)&x0); + if (datasize != 0) { + x0 = ((size_t)datasize) <= 640 ? (AstroParams *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(7), arg0, (char **)&x0, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(8), arg1, (char **)&x1); + if (datasize != 0) { + x1 = ((size_t)datasize) <= 640 ? (FlagOptions *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(8), arg1, (char **)&x1, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(174), arg2, (char **)&x2); + if (datasize != 0) { + x2 = ((size_t)datasize) <= 640 ? (float *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(174), arg2, (char **)&x2, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(174), arg3, (char **)&x3); + if (datasize != 0) { + x3 = ((size_t)datasize) <= 640 ? (float *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(174), arg3, (char **)&x3, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(174), arg4, (char **)&x4); + if (datasize != 0) { + x4 = ((size_t)datasize) <= 640 ? (float *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(174), arg4, (char **)&x4, + datasize, &large_args_free) < 0) + return NULL; + } + + Py_BEGIN_ALLOW_THREADS + _cffi_restore_errno(); + { adjust_redshifts_for_photoncons(x0, x1, x2, x3, x4); } + _cffi_save_errno(); + Py_END_ALLOW_THREADS + + (void)self; /* unused */ + if (large_args_free != NULL) _cffi_free_array_arguments(large_args_free); + Py_INCREF(Py_None); + return Py_None; +} +#else +# define _cffi_f_adjust_redshifts_for_photoncons _cffi_d_adjust_redshifts_for_photoncons +#endif + +static double _cffi_d_atomic_cooling_threshold(float x0) +{ + return atomic_cooling_threshold(x0); +} +#ifndef PYPY_VERSION +static PyObject * +_cffi_f_atomic_cooling_threshold(PyObject *self, PyObject *arg0) +{ + float x0; + double result; + PyObject *pyresult; + + x0 = (float)_cffi_to_c_float(arg0); + if (x0 == (float)-1 && PyErr_Occurred()) + return NULL; + + Py_BEGIN_ALLOW_THREADS + _cffi_restore_errno(); + { result = atomic_cooling_threshold(x0); } + _cffi_save_errno(); + Py_END_ALLOW_THREADS + + (void)self; /* unused */ + pyresult = _cffi_from_c_double(result); + return pyresult; +} +#else +# define _cffi_f_atomic_cooling_threshold _cffi_d_atomic_cooling_threshold +#endif + +static double _cffi_d_conditional_mf(double x0, double x1, double x2, double x3, int x4) +{ + return conditional_mf(x0, x1, x2, x3, x4); +} +#ifndef PYPY_VERSION +static PyObject * +_cffi_f_conditional_mf(PyObject *self, PyObject *args) +{ + double x0; + double x1; + double x2; + double x3; + int x4; + double result; + PyObject *pyresult; + PyObject *arg0; + PyObject *arg1; + PyObject *arg2; + PyObject *arg3; + PyObject *arg4; + + if (!PyArg_UnpackTuple(args, "conditional_mf", 5, 5, &arg0, &arg1, &arg2, &arg3, &arg4)) + return NULL; + + x0 = (double)_cffi_to_c_double(arg0); + if (x0 == (double)-1 && PyErr_Occurred()) + return NULL; + + x1 = (double)_cffi_to_c_double(arg1); + if (x1 == (double)-1 && PyErr_Occurred()) + return NULL; + + x2 = (double)_cffi_to_c_double(arg2); + if (x2 == (double)-1 && PyErr_Occurred()) + return NULL; + + x3 = (double)_cffi_to_c_double(arg3); + if (x3 == (double)-1 && PyErr_Occurred()) + return NULL; + + x4 = _cffi_to_c_int(arg4, int); + if (x4 == (int)-1 && PyErr_Occurred()) + return NULL; + + Py_BEGIN_ALLOW_THREADS + _cffi_restore_errno(); + { result = conditional_mf(x0, x1, x2, x3, x4); } + _cffi_save_errno(); + Py_END_ALLOW_THREADS + + (void)self; /* unused */ + pyresult = _cffi_from_c_double(result); + return pyresult; +} +#else +# define _cffi_f_conditional_mf _cffi_d_conditional_mf +#endif + +static void _cffi_d_determine_deltaz_for_photoncons(void) +{ + determine_deltaz_for_photoncons(); +} +#ifndef PYPY_VERSION +static PyObject * +_cffi_f_determine_deltaz_for_photoncons(PyObject *self, PyObject *noarg) +{ + + Py_BEGIN_ALLOW_THREADS + _cffi_restore_errno(); + { determine_deltaz_for_photoncons(); } + _cffi_save_errno(); + Py_END_ALLOW_THREADS + + (void)self; /* unused */ + (void)noarg; /* unused */ + Py_INCREF(Py_None); + return Py_None; +} +#else +# define _cffi_f_determine_deltaz_for_photoncons _cffi_d_determine_deltaz_for_photoncons +#endif + +static double _cffi_d_dicke(double x0) +{ + return dicke(x0); +} +#ifndef PYPY_VERSION +static PyObject * +_cffi_f_dicke(PyObject *self, PyObject *arg0) +{ + double x0; + double result; + PyObject *pyresult; + + x0 = (double)_cffi_to_c_double(arg0); + if (x0 == (double)-1 && PyErr_Occurred()) + return NULL; + + Py_BEGIN_ALLOW_THREADS + _cffi_restore_errno(); + { result = dicke(x0); } + _cffi_save_errno(); + Py_END_ALLOW_THREADS + + (void)self; /* unused */ + pyresult = _cffi_from_c_double(result); + return pyresult; +} +#else +# define _cffi_f_dicke _cffi_d_dicke +#endif + +static double _cffi_d_dsigmasqdm_z0(double x0) +{ + return dsigmasqdm_z0(x0); +} +#ifndef PYPY_VERSION +static PyObject * +_cffi_f_dsigmasqdm_z0(PyObject *self, PyObject *arg0) +{ + double x0; + double result; + PyObject *pyresult; + + x0 = (double)_cffi_to_c_double(arg0); + if (x0 == (double)-1 && PyErr_Occurred()) + return NULL; + + Py_BEGIN_ALLOW_THREADS + _cffi_restore_errno(); + { result = dsigmasqdm_z0(x0); } + _cffi_save_errno(); + Py_END_ALLOW_THREADS + + (void)self; /* unused */ + pyresult = _cffi_from_c_double(result); + return pyresult; +} +#else +# define _cffi_f_dsigmasqdm_z0 _cffi_d_dsigmasqdm_z0 +#endif + +static double _cffi_d_expected_nhalo(double x0, UserParams * x1, CosmoParams * x2, AstroParams * x3, FlagOptions * x4) +{ + return expected_nhalo(x0, x1, x2, x3, x4); +} +#ifndef PYPY_VERSION +static PyObject * +_cffi_f_expected_nhalo(PyObject *self, PyObject *args) +{ + double x0; + UserParams * x1; + CosmoParams * x2; + AstroParams * x3; + FlagOptions * x4; + Py_ssize_t datasize; + struct _cffi_freeme_s *large_args_free = NULL; + double result; + PyObject *pyresult; + PyObject *arg0; + PyObject *arg1; + PyObject *arg2; + PyObject *arg3; + PyObject *arg4; + + if (!PyArg_UnpackTuple(args, "expected_nhalo", 5, 5, &arg0, &arg1, &arg2, &arg3, &arg4)) + return NULL; + + x0 = (double)_cffi_to_c_double(arg0); + if (x0 == (double)-1 && PyErr_Occurred()) + return NULL; + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(5), arg1, (char **)&x1); + if (datasize != 0) { + x1 = ((size_t)datasize) <= 640 ? (UserParams *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(5), arg1, (char **)&x1, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(6), arg2, (char **)&x2); + if (datasize != 0) { + x2 = ((size_t)datasize) <= 640 ? (CosmoParams *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(6), arg2, (char **)&x2, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(7), arg3, (char **)&x3); + if (datasize != 0) { + x3 = ((size_t)datasize) <= 640 ? (AstroParams *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(7), arg3, (char **)&x3, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(8), arg4, (char **)&x4); + if (datasize != 0) { + x4 = ((size_t)datasize) <= 640 ? (FlagOptions *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(8), arg4, (char **)&x4, + datasize, &large_args_free) < 0) + return NULL; + } + + Py_BEGIN_ALLOW_THREADS + _cffi_restore_errno(); + { result = expected_nhalo(x0, x1, x2, x3, x4); } + _cffi_save_errno(); + Py_END_ALLOW_THREADS + + (void)self; /* unused */ + pyresult = _cffi_from_c_double(result); + if (large_args_free != NULL) _cffi_free_array_arguments(large_args_free); + return pyresult; +} +#else +# define _cffi_f_expected_nhalo _cffi_d_expected_nhalo +#endif + +static void _cffi_d_free(void * x0) +{ + free(x0); +} +#ifndef PYPY_VERSION +static PyObject * +_cffi_f_free(PyObject *self, PyObject *arg0) +{ + void * x0; + Py_ssize_t datasize; + struct _cffi_freeme_s *large_args_free = NULL; + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(498), arg0, (char **)&x0); + if (datasize != 0) { + x0 = ((size_t)datasize) <= 640 ? (void *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(498), arg0, (char **)&x0, + datasize, &large_args_free) < 0) + return NULL; + } + + Py_BEGIN_ALLOW_THREADS + _cffi_restore_errno(); + { free(x0); } + _cffi_save_errno(); + Py_END_ALLOW_THREADS + + (void)self; /* unused */ + if (large_args_free != NULL) _cffi_free_array_arguments(large_args_free); + Py_INCREF(Py_None); + return Py_None; +} +#else +# define _cffi_f_free _cffi_d_free +#endif + +static double _cffi_d_get_delta_crit(int x0, double x1, double x2) +{ + return get_delta_crit(x0, x1, x2); +} +#ifndef PYPY_VERSION +static PyObject * +_cffi_f_get_delta_crit(PyObject *self, PyObject *args) +{ + int x0; + double x1; + double x2; + double result; + PyObject *pyresult; + PyObject *arg0; + PyObject *arg1; + PyObject *arg2; + + if (!PyArg_UnpackTuple(args, "get_delta_crit", 3, 3, &arg0, &arg1, &arg2)) + return NULL; + + x0 = _cffi_to_c_int(arg0, int); + if (x0 == (int)-1 && PyErr_Occurred()) + return NULL; + + x1 = (double)_cffi_to_c_double(arg1); + if (x1 == (double)-1 && PyErr_Occurred()) + return NULL; + + x2 = (double)_cffi_to_c_double(arg2); + if (x2 == (double)-1 && PyErr_Occurred()) + return NULL; + + Py_BEGIN_ALLOW_THREADS + _cffi_restore_errno(); + { result = get_delta_crit(x0, x1, x2); } + _cffi_save_errno(); + Py_END_ALLOW_THREADS + + (void)self; /* unused */ + pyresult = _cffi_from_c_double(result); + return pyresult; +} +#else +# define _cffi_f_get_delta_crit _cffi_d_get_delta_crit +#endif + +static void _cffi_d_init_FcollTable(double x0, double x1, _Bool x2) +{ + init_FcollTable(x0, x1, x2); +} +#ifndef PYPY_VERSION +static PyObject * +_cffi_f_init_FcollTable(PyObject *self, PyObject *args) +{ + double x0; + double x1; + _Bool x2; + PyObject *arg0; + PyObject *arg1; + PyObject *arg2; + + if (!PyArg_UnpackTuple(args, "init_FcollTable", 3, 3, &arg0, &arg1, &arg2)) + return NULL; + + x0 = (double)_cffi_to_c_double(arg0); + if (x0 == (double)-1 && PyErr_Occurred()) + return NULL; + + x1 = (double)_cffi_to_c_double(arg1); + if (x1 == (double)-1 && PyErr_Occurred()) + return NULL; + + x2 = (_Bool)_cffi_to_c__Bool(arg2); + if (x2 == (_Bool)-1 && PyErr_Occurred()) + return NULL; + + Py_BEGIN_ALLOW_THREADS + _cffi_restore_errno(); + { init_FcollTable(x0, x1, x2); } + _cffi_save_errno(); + Py_END_ALLOW_THREADS + + (void)self; /* unused */ + Py_INCREF(Py_None); + return Py_None; +} +#else +# define _cffi_f_init_FcollTable _cffi_d_init_FcollTable +#endif + +static int _cffi_d_init_heat(void) +{ + return init_heat(); +} +#ifndef PYPY_VERSION +static PyObject * +_cffi_f_init_heat(PyObject *self, PyObject *noarg) +{ + int result; + PyObject *pyresult; + + Py_BEGIN_ALLOW_THREADS + _cffi_restore_errno(); + { result = init_heat(); } + _cffi_save_errno(); + Py_END_ALLOW_THREADS + + (void)self; /* unused */ + (void)noarg; /* unused */ + pyresult = _cffi_from_c_int(result, int); + return pyresult; +} +#else +# define _cffi_f_init_heat _cffi_d_init_heat +#endif + +static double _cffi_d_init_ps(void) +{ + return init_ps(); +} +#ifndef PYPY_VERSION +static PyObject * +_cffi_f_init_ps(PyObject *self, PyObject *noarg) +{ + double result; + PyObject *pyresult; + + Py_BEGIN_ALLOW_THREADS + _cffi_restore_errno(); + { result = init_ps(); } + _cffi_save_errno(); + Py_END_ALLOW_THREADS + + (void)self; /* unused */ + (void)noarg; /* unused */ + pyresult = _cffi_from_c_double(result); + return pyresult; +} +#else +# define _cffi_f_init_ps _cffi_d_init_ps +#endif + +static void _cffi_d_initialiseSigmaMInterpTable(float x0, float x1) +{ + initialiseSigmaMInterpTable(x0, x1); +} +#ifndef PYPY_VERSION +static PyObject * +_cffi_f_initialiseSigmaMInterpTable(PyObject *self, PyObject *args) +{ + float x0; + float x1; + PyObject *arg0; + PyObject *arg1; + + if (!PyArg_UnpackTuple(args, "initialiseSigmaMInterpTable", 2, 2, &arg0, &arg1)) + return NULL; + + x0 = (float)_cffi_to_c_float(arg0); + if (x0 == (float)-1 && PyErr_Occurred()) + return NULL; + + x1 = (float)_cffi_to_c_float(arg1); + if (x1 == (float)-1 && PyErr_Occurred()) + return NULL; + + Py_BEGIN_ALLOW_THREADS + _cffi_restore_errno(); + { initialiseSigmaMInterpTable(x0, x1); } + _cffi_save_errno(); + Py_END_ALLOW_THREADS + + (void)self; /* unused */ + Py_INCREF(Py_None); + return Py_None; +} +#else +# define _cffi_f_initialiseSigmaMInterpTable _cffi_d_initialiseSigmaMInterpTable +#endif + +static void _cffi_d_initialise_FgtrM_delta_table(double x0, double x1, double x2, double x3, double x4, double x5) +{ + initialise_FgtrM_delta_table(x0, x1, x2, x3, x4, x5); +} +#ifndef PYPY_VERSION +static PyObject * +_cffi_f_initialise_FgtrM_delta_table(PyObject *self, PyObject *args) +{ + double x0; + double x1; + double x2; + double x3; + double x4; + double x5; + PyObject *arg0; + PyObject *arg1; + PyObject *arg2; + PyObject *arg3; + PyObject *arg4; + PyObject *arg5; + + if (!PyArg_UnpackTuple(args, "initialise_FgtrM_delta_table", 6, 6, &arg0, &arg1, &arg2, &arg3, &arg4, &arg5)) + return NULL; + + x0 = (double)_cffi_to_c_double(arg0); + if (x0 == (double)-1 && PyErr_Occurred()) + return NULL; + + x1 = (double)_cffi_to_c_double(arg1); + if (x1 == (double)-1 && PyErr_Occurred()) + return NULL; + + x2 = (double)_cffi_to_c_double(arg2); + if (x2 == (double)-1 && PyErr_Occurred()) + return NULL; + + x3 = (double)_cffi_to_c_double(arg3); + if (x3 == (double)-1 && PyErr_Occurred()) + return NULL; + + x4 = (double)_cffi_to_c_double(arg4); + if (x4 == (double)-1 && PyErr_Occurred()) + return NULL; + + x5 = (double)_cffi_to_c_double(arg5); + if (x5 == (double)-1 && PyErr_Occurred()) + return NULL; + + Py_BEGIN_ALLOW_THREADS + _cffi_restore_errno(); + { initialise_FgtrM_delta_table(x0, x1, x2, x3, x4, x5); } + _cffi_save_errno(); + Py_END_ALLOW_THREADS + + (void)self; /* unused */ + Py_INCREF(Py_None); + return Py_None; +} +#else +# define _cffi_f_initialise_FgtrM_delta_table _cffi_d_initialise_FgtrM_delta_table +#endif + +static void _cffi_d_initialise_GL(float x0, float x1) +{ + initialise_GL(x0, x1); +} +#ifndef PYPY_VERSION +static PyObject * +_cffi_f_initialise_GL(PyObject *self, PyObject *args) +{ + float x0; + float x1; + PyObject *arg0; + PyObject *arg1; + + if (!PyArg_UnpackTuple(args, "initialise_GL", 2, 2, &arg0, &arg1)) + return NULL; + + x0 = (float)_cffi_to_c_float(arg0); + if (x0 == (float)-1 && PyErr_Occurred()) + return NULL; + + x1 = (float)_cffi_to_c_float(arg1); + if (x1 == (float)-1 && PyErr_Occurred()) + return NULL; + + Py_BEGIN_ALLOW_THREADS + _cffi_restore_errno(); + { initialise_GL(x0, x1); } + _cffi_save_errno(); + Py_END_ALLOW_THREADS + + (void)self; /* unused */ + Py_INCREF(Py_None); + return Py_None; +} +#else +# define _cffi_f_initialise_GL _cffi_d_initialise_GL +#endif + +static void _cffi_d_initialise_Nion_Conditional_spline(float x0, float x1, float x2, float x3, float x4, float x5, float x6, float x7, float x8, float x9, float x10, float x11, float x12, float x13, float x14, float x15, float x16, float x17, float x18, float x19, float x20, float x21, int x22, int x23, _Bool x24, _Bool x25) +{ + initialise_Nion_Conditional_spline(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, x16, x17, x18, x19, x20, x21, x22, x23, x24, x25); +} +#ifndef PYPY_VERSION +static PyObject * +_cffi_f_initialise_Nion_Conditional_spline(PyObject *self, PyObject *args) +{ + float x0; + float x1; + float x2; + float x3; + float x4; + float x5; + float x6; + float x7; + float x8; + float x9; + float x10; + float x11; + float x12; + float x13; + float x14; + float x15; + float x16; + float x17; + float x18; + float x19; + float x20; + float x21; + int x22; + int x23; + _Bool x24; + _Bool x25; + PyObject *arg0; + PyObject *arg1; + PyObject *arg2; + PyObject *arg3; + PyObject *arg4; + PyObject *arg5; + PyObject *arg6; + PyObject *arg7; + PyObject *arg8; + PyObject *arg9; + PyObject *arg10; + PyObject *arg11; + PyObject *arg12; + PyObject *arg13; + PyObject *arg14; + PyObject *arg15; + PyObject *arg16; + PyObject *arg17; + PyObject *arg18; + PyObject *arg19; + PyObject *arg20; + PyObject *arg21; + PyObject *arg22; + PyObject *arg23; + PyObject *arg24; + PyObject *arg25; + + if (!PyArg_UnpackTuple(args, "initialise_Nion_Conditional_spline", 26, 26, &arg0, &arg1, &arg2, &arg3, &arg4, &arg5, &arg6, &arg7, &arg8, &arg9, &arg10, &arg11, &arg12, &arg13, &arg14, &arg15, &arg16, &arg17, &arg18, &arg19, &arg20, &arg21, &arg22, &arg23, &arg24, &arg25)) + return NULL; + + x0 = (float)_cffi_to_c_float(arg0); + if (x0 == (float)-1 && PyErr_Occurred()) + return NULL; + + x1 = (float)_cffi_to_c_float(arg1); + if (x1 == (float)-1 && PyErr_Occurred()) + return NULL; + + x2 = (float)_cffi_to_c_float(arg2); + if (x2 == (float)-1 && PyErr_Occurred()) + return NULL; + + x3 = (float)_cffi_to_c_float(arg3); + if (x3 == (float)-1 && PyErr_Occurred()) + return NULL; + + x4 = (float)_cffi_to_c_float(arg4); + if (x4 == (float)-1 && PyErr_Occurred()) + return NULL; + + x5 = (float)_cffi_to_c_float(arg5); + if (x5 == (float)-1 && PyErr_Occurred()) + return NULL; + + x6 = (float)_cffi_to_c_float(arg6); + if (x6 == (float)-1 && PyErr_Occurred()) + return NULL; + + x7 = (float)_cffi_to_c_float(arg7); + if (x7 == (float)-1 && PyErr_Occurred()) + return NULL; + + x8 = (float)_cffi_to_c_float(arg8); + if (x8 == (float)-1 && PyErr_Occurred()) + return NULL; + + x9 = (float)_cffi_to_c_float(arg9); + if (x9 == (float)-1 && PyErr_Occurred()) + return NULL; + + x10 = (float)_cffi_to_c_float(arg10); + if (x10 == (float)-1 && PyErr_Occurred()) + return NULL; + + x11 = (float)_cffi_to_c_float(arg11); + if (x11 == (float)-1 && PyErr_Occurred()) + return NULL; + + x12 = (float)_cffi_to_c_float(arg12); + if (x12 == (float)-1 && PyErr_Occurred()) + return NULL; + + x13 = (float)_cffi_to_c_float(arg13); + if (x13 == (float)-1 && PyErr_Occurred()) + return NULL; + + x14 = (float)_cffi_to_c_float(arg14); + if (x14 == (float)-1 && PyErr_Occurred()) + return NULL; + + x15 = (float)_cffi_to_c_float(arg15); + if (x15 == (float)-1 && PyErr_Occurred()) + return NULL; + + x16 = (float)_cffi_to_c_float(arg16); + if (x16 == (float)-1 && PyErr_Occurred()) + return NULL; + + x17 = (float)_cffi_to_c_float(arg17); + if (x17 == (float)-1 && PyErr_Occurred()) + return NULL; + + x18 = (float)_cffi_to_c_float(arg18); + if (x18 == (float)-1 && PyErr_Occurred()) + return NULL; + + x19 = (float)_cffi_to_c_float(arg19); + if (x19 == (float)-1 && PyErr_Occurred()) + return NULL; + + x20 = (float)_cffi_to_c_float(arg20); + if (x20 == (float)-1 && PyErr_Occurred()) + return NULL; + + x21 = (float)_cffi_to_c_float(arg21); + if (x21 == (float)-1 && PyErr_Occurred()) + return NULL; + + x22 = _cffi_to_c_int(arg22, int); + if (x22 == (int)-1 && PyErr_Occurred()) + return NULL; + + x23 = _cffi_to_c_int(arg23, int); + if (x23 == (int)-1 && PyErr_Occurred()) + return NULL; + + x24 = (_Bool)_cffi_to_c__Bool(arg24); + if (x24 == (_Bool)-1 && PyErr_Occurred()) + return NULL; + + x25 = (_Bool)_cffi_to_c__Bool(arg25); + if (x25 == (_Bool)-1 && PyErr_Occurred()) + return NULL; + + Py_BEGIN_ALLOW_THREADS + _cffi_restore_errno(); + { initialise_Nion_Conditional_spline(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, x16, x17, x18, x19, x20, x21, x22, x23, x24, x25); } + _cffi_save_errno(); + Py_END_ALLOW_THREADS + + (void)self; /* unused */ + Py_INCREF(Py_None); + return Py_None; +} +#else +# define _cffi_f_initialise_Nion_Conditional_spline _cffi_d_initialise_Nion_Conditional_spline +#endif + +static void _cffi_d_initialise_Nion_Ts_spline(int x0, float x1, float x2, float x3, float x4, float x5, float x6, float x7, float x8, float x9, float x10, _Bool x11) +{ + initialise_Nion_Ts_spline(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11); +} +#ifndef PYPY_VERSION +static PyObject * +_cffi_f_initialise_Nion_Ts_spline(PyObject *self, PyObject *args) +{ + int x0; + float x1; + float x2; + float x3; + float x4; + float x5; + float x6; + float x7; + float x8; + float x9; + float x10; + _Bool x11; + PyObject *arg0; + PyObject *arg1; + PyObject *arg2; + PyObject *arg3; + PyObject *arg4; + PyObject *arg5; + PyObject *arg6; + PyObject *arg7; + PyObject *arg8; + PyObject *arg9; + PyObject *arg10; + PyObject *arg11; + + if (!PyArg_UnpackTuple(args, "initialise_Nion_Ts_spline", 12, 12, &arg0, &arg1, &arg2, &arg3, &arg4, &arg5, &arg6, &arg7, &arg8, &arg9, &arg10, &arg11)) + return NULL; + + x0 = _cffi_to_c_int(arg0, int); + if (x0 == (int)-1 && PyErr_Occurred()) + return NULL; + + x1 = (float)_cffi_to_c_float(arg1); + if (x1 == (float)-1 && PyErr_Occurred()) + return NULL; + + x2 = (float)_cffi_to_c_float(arg2); + if (x2 == (float)-1 && PyErr_Occurred()) + return NULL; + + x3 = (float)_cffi_to_c_float(arg3); + if (x3 == (float)-1 && PyErr_Occurred()) + return NULL; + + x4 = (float)_cffi_to_c_float(arg4); + if (x4 == (float)-1 && PyErr_Occurred()) + return NULL; + + x5 = (float)_cffi_to_c_float(arg5); + if (x5 == (float)-1 && PyErr_Occurred()) + return NULL; + + x6 = (float)_cffi_to_c_float(arg6); + if (x6 == (float)-1 && PyErr_Occurred()) + return NULL; + + x7 = (float)_cffi_to_c_float(arg7); + if (x7 == (float)-1 && PyErr_Occurred()) + return NULL; + + x8 = (float)_cffi_to_c_float(arg8); + if (x8 == (float)-1 && PyErr_Occurred()) + return NULL; + + x9 = (float)_cffi_to_c_float(arg9); + if (x9 == (float)-1 && PyErr_Occurred()) + return NULL; + + x10 = (float)_cffi_to_c_float(arg10); + if (x10 == (float)-1 && PyErr_Occurred()) + return NULL; + + x11 = (_Bool)_cffi_to_c__Bool(arg11); + if (x11 == (_Bool)-1 && PyErr_Occurred()) + return NULL; + + Py_BEGIN_ALLOW_THREADS + _cffi_restore_errno(); + { initialise_Nion_Ts_spline(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11); } + _cffi_save_errno(); + Py_END_ALLOW_THREADS + + (void)self; /* unused */ + Py_INCREF(Py_None); + return Py_None; +} +#else +# define _cffi_f_initialise_Nion_Ts_spline _cffi_d_initialise_Nion_Ts_spline +#endif + +static void _cffi_d_initialise_SFRD_Conditional_table(double x0, double x1, double x2, float x3, double x4, double x5, double x6, float x7, float x8, float x9, float x10, int x11, int x12, _Bool x13) +{ + initialise_SFRD_Conditional_table(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13); +} +#ifndef PYPY_VERSION +static PyObject * +_cffi_f_initialise_SFRD_Conditional_table(PyObject *self, PyObject *args) +{ + double x0; + double x1; + double x2; + float x3; + double x4; + double x5; + double x6; + float x7; + float x8; + float x9; + float x10; + int x11; + int x12; + _Bool x13; + PyObject *arg0; + PyObject *arg1; + PyObject *arg2; + PyObject *arg3; + PyObject *arg4; + PyObject *arg5; + PyObject *arg6; + PyObject *arg7; + PyObject *arg8; + PyObject *arg9; + PyObject *arg10; + PyObject *arg11; + PyObject *arg12; + PyObject *arg13; + + if (!PyArg_UnpackTuple(args, "initialise_SFRD_Conditional_table", 14, 14, &arg0, &arg1, &arg2, &arg3, &arg4, &arg5, &arg6, &arg7, &arg8, &arg9, &arg10, &arg11, &arg12, &arg13)) + return NULL; + + x0 = (double)_cffi_to_c_double(arg0); + if (x0 == (double)-1 && PyErr_Occurred()) + return NULL; + + x1 = (double)_cffi_to_c_double(arg1); + if (x1 == (double)-1 && PyErr_Occurred()) + return NULL; + + x2 = (double)_cffi_to_c_double(arg2); + if (x2 == (double)-1 && PyErr_Occurred()) + return NULL; + + x3 = (float)_cffi_to_c_float(arg3); + if (x3 == (float)-1 && PyErr_Occurred()) + return NULL; + + x4 = (double)_cffi_to_c_double(arg4); + if (x4 == (double)-1 && PyErr_Occurred()) + return NULL; + + x5 = (double)_cffi_to_c_double(arg5); + if (x5 == (double)-1 && PyErr_Occurred()) + return NULL; + + x6 = (double)_cffi_to_c_double(arg6); + if (x6 == (double)-1 && PyErr_Occurred()) + return NULL; + + x7 = (float)_cffi_to_c_float(arg7); + if (x7 == (float)-1 && PyErr_Occurred()) + return NULL; + + x8 = (float)_cffi_to_c_float(arg8); + if (x8 == (float)-1 && PyErr_Occurred()) + return NULL; + + x9 = (float)_cffi_to_c_float(arg9); + if (x9 == (float)-1 && PyErr_Occurred()) + return NULL; + + x10 = (float)_cffi_to_c_float(arg10); + if (x10 == (float)-1 && PyErr_Occurred()) + return NULL; + + x11 = _cffi_to_c_int(arg11, int); + if (x11 == (int)-1 && PyErr_Occurred()) + return NULL; + + x12 = _cffi_to_c_int(arg12, int); + if (x12 == (int)-1 && PyErr_Occurred()) + return NULL; + + x13 = (_Bool)_cffi_to_c__Bool(arg13); + if (x13 == (_Bool)-1 && PyErr_Occurred()) + return NULL; + + Py_BEGIN_ALLOW_THREADS + _cffi_restore_errno(); + { initialise_SFRD_Conditional_table(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13); } + _cffi_save_errno(); + Py_END_ALLOW_THREADS + + (void)self; /* unused */ + Py_INCREF(Py_None); + return Py_None; +} +#else +# define _cffi_f_initialise_SFRD_Conditional_table _cffi_d_initialise_SFRD_Conditional_table +#endif + +static void _cffi_d_initialise_SFRD_spline(int x0, float x1, float x2, float x3, float x4, float x5, float x6, float x7, _Bool x8) +{ + initialise_SFRD_spline(x0, x1, x2, x3, x4, x5, x6, x7, x8); +} +#ifndef PYPY_VERSION +static PyObject * +_cffi_f_initialise_SFRD_spline(PyObject *self, PyObject *args) +{ + int x0; + float x1; + float x2; + float x3; + float x4; + float x5; + float x6; + float x7; + _Bool x8; + PyObject *arg0; + PyObject *arg1; + PyObject *arg2; + PyObject *arg3; + PyObject *arg4; + PyObject *arg5; + PyObject *arg6; + PyObject *arg7; + PyObject *arg8; + + if (!PyArg_UnpackTuple(args, "initialise_SFRD_spline", 9, 9, &arg0, &arg1, &arg2, &arg3, &arg4, &arg5, &arg6, &arg7, &arg8)) + return NULL; + + x0 = _cffi_to_c_int(arg0, int); + if (x0 == (int)-1 && PyErr_Occurred()) + return NULL; + + x1 = (float)_cffi_to_c_float(arg1); + if (x1 == (float)-1 && PyErr_Occurred()) + return NULL; + + x2 = (float)_cffi_to_c_float(arg2); + if (x2 == (float)-1 && PyErr_Occurred()) + return NULL; + + x3 = (float)_cffi_to_c_float(arg3); + if (x3 == (float)-1 && PyErr_Occurred()) + return NULL; + + x4 = (float)_cffi_to_c_float(arg4); + if (x4 == (float)-1 && PyErr_Occurred()) + return NULL; + + x5 = (float)_cffi_to_c_float(arg5); + if (x5 == (float)-1 && PyErr_Occurred()) + return NULL; + + x6 = (float)_cffi_to_c_float(arg6); + if (x6 == (float)-1 && PyErr_Occurred()) + return NULL; + + x7 = (float)_cffi_to_c_float(arg7); + if (x7 == (float)-1 && PyErr_Occurred()) + return NULL; + + x8 = (_Bool)_cffi_to_c__Bool(arg8); + if (x8 == (_Bool)-1 && PyErr_Occurred()) + return NULL; + + Py_BEGIN_ALLOW_THREADS + _cffi_restore_errno(); + { initialise_SFRD_spline(x0, x1, x2, x3, x4, x5, x6, x7, x8); } + _cffi_save_errno(); + Py_END_ALLOW_THREADS + + (void)self; /* unused */ + Py_INCREF(Py_None); + return Py_None; +} +#else +# define _cffi_f_initialise_SFRD_spline _cffi_d_initialise_SFRD_spline +#endif + +static void _cffi_d_initialise_dNdM_inverse_table(double x0, double x1, double x2, double x3, double x4, _Bool x5) +{ + initialise_dNdM_inverse_table(x0, x1, x2, x3, x4, x5); +} +#ifndef PYPY_VERSION +static PyObject * +_cffi_f_initialise_dNdM_inverse_table(PyObject *self, PyObject *args) +{ + double x0; + double x1; + double x2; + double x3; + double x4; + _Bool x5; + PyObject *arg0; + PyObject *arg1; + PyObject *arg2; + PyObject *arg3; + PyObject *arg4; + PyObject *arg5; + + if (!PyArg_UnpackTuple(args, "initialise_dNdM_inverse_table", 6, 6, &arg0, &arg1, &arg2, &arg3, &arg4, &arg5)) + return NULL; + + x0 = (double)_cffi_to_c_double(arg0); + if (x0 == (double)-1 && PyErr_Occurred()) + return NULL; + + x1 = (double)_cffi_to_c_double(arg1); + if (x1 == (double)-1 && PyErr_Occurred()) + return NULL; + + x2 = (double)_cffi_to_c_double(arg2); + if (x2 == (double)-1 && PyErr_Occurred()) + return NULL; + + x3 = (double)_cffi_to_c_double(arg3); + if (x3 == (double)-1 && PyErr_Occurred()) + return NULL; + + x4 = (double)_cffi_to_c_double(arg4); + if (x4 == (double)-1 && PyErr_Occurred()) + return NULL; + + x5 = (_Bool)_cffi_to_c__Bool(arg5); + if (x5 == (_Bool)-1 && PyErr_Occurred()) + return NULL; + + Py_BEGIN_ALLOW_THREADS + _cffi_restore_errno(); + { initialise_dNdM_inverse_table(x0, x1, x2, x3, x4, x5); } + _cffi_save_errno(); + Py_END_ALLOW_THREADS + + (void)self; /* unused */ + Py_INCREF(Py_None); + return Py_None; +} +#else +# define _cffi_f_initialise_dNdM_inverse_table _cffi_d_initialise_dNdM_inverse_table +#endif + +static void _cffi_d_initialise_dNdM_tables(double x0, double x1, double x2, double x3, double x4, double x5, _Bool x6) +{ + initialise_dNdM_tables(x0, x1, x2, x3, x4, x5, x6); +} +#ifndef PYPY_VERSION +static PyObject * +_cffi_f_initialise_dNdM_tables(PyObject *self, PyObject *args) +{ + double x0; + double x1; + double x2; + double x3; + double x4; + double x5; + _Bool x6; + PyObject *arg0; + PyObject *arg1; + PyObject *arg2; + PyObject *arg3; + PyObject *arg4; + PyObject *arg5; + PyObject *arg6; + + if (!PyArg_UnpackTuple(args, "initialise_dNdM_tables", 7, 7, &arg0, &arg1, &arg2, &arg3, &arg4, &arg5, &arg6)) + return NULL; + + x0 = (double)_cffi_to_c_double(arg0); + if (x0 == (double)-1 && PyErr_Occurred()) + return NULL; + + x1 = (double)_cffi_to_c_double(arg1); + if (x1 == (double)-1 && PyErr_Occurred()) + return NULL; + + x2 = (double)_cffi_to_c_double(arg2); + if (x2 == (double)-1 && PyErr_Occurred()) + return NULL; + + x3 = (double)_cffi_to_c_double(arg3); + if (x3 == (double)-1 && PyErr_Occurred()) + return NULL; + + x4 = (double)_cffi_to_c_double(arg4); + if (x4 == (double)-1 && PyErr_Occurred()) + return NULL; + + x5 = (double)_cffi_to_c_double(arg5); + if (x5 == (double)-1 && PyErr_Occurred()) + return NULL; + + x6 = (_Bool)_cffi_to_c__Bool(arg6); + if (x6 == (_Bool)-1 && PyErr_Occurred()) + return NULL; + + Py_BEGIN_ALLOW_THREADS + _cffi_restore_errno(); + { initialise_dNdM_tables(x0, x1, x2, x3, x4, x5, x6); } + _cffi_save_errno(); + Py_END_ALLOW_THREADS + + (void)self; /* unused */ + Py_INCREF(Py_None); + return Py_None; +} +#else +# define _cffi_f_initialise_dNdM_tables _cffi_d_initialise_dNdM_tables +#endif + +static void _cffi_d_set_alphacons_params(double x0, double x1) +{ + set_alphacons_params(x0, x1); +} +#ifndef PYPY_VERSION +static PyObject * +_cffi_f_set_alphacons_params(PyObject *self, PyObject *args) +{ + double x0; + double x1; + PyObject *arg0; + PyObject *arg1; + + if (!PyArg_UnpackTuple(args, "set_alphacons_params", 2, 2, &arg0, &arg1)) + return NULL; + + x0 = (double)_cffi_to_c_double(arg0); + if (x0 == (double)-1 && PyErr_Occurred()) + return NULL; + + x1 = (double)_cffi_to_c_double(arg1); + if (x1 == (double)-1 && PyErr_Occurred()) + return NULL; + + Py_BEGIN_ALLOW_THREADS + _cffi_restore_errno(); + { set_alphacons_params(x0, x1); } + _cffi_save_errno(); + Py_END_ALLOW_THREADS + + (void)self; /* unused */ + Py_INCREF(Py_None); + return Py_None; +} +#else +# define _cffi_f_set_alphacons_params _cffi_d_set_alphacons_params +#endif + +static double _cffi_d_sigma_z0(double x0) +{ + return sigma_z0(x0); +} +#ifndef PYPY_VERSION +static PyObject * +_cffi_f_sigma_z0(PyObject *self, PyObject *arg0) +{ + double x0; + double result; + PyObject *pyresult; + + x0 = (double)_cffi_to_c_double(arg0); + if (x0 == (double)-1 && PyErr_Occurred()) + return NULL; + + Py_BEGIN_ALLOW_THREADS + _cffi_restore_errno(); + { result = sigma_z0(x0); } + _cffi_save_errno(); + Py_END_ALLOW_THREADS + + (void)self; /* unused */ + pyresult = _cffi_from_c_double(result); + return pyresult; +} +#else +# define _cffi_f_sigma_z0 _cffi_d_sigma_z0 +#endif + +static int _cffi_d_single_test_sample(UserParams * x0, CosmoParams * x1, AstroParams * x2, FlagOptions * x3, int x4, int x5, float * x6, int * x7, double x8, double x9, int * x10, int * x11, double * x12, double * x13, double * x14, float * x15, int * x16) +{ + return single_test_sample(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, x16); +} +#ifndef PYPY_VERSION +static PyObject * +_cffi_f_single_test_sample(PyObject *self, PyObject *args) +{ + UserParams * x0; + CosmoParams * x1; + AstroParams * x2; + FlagOptions * x3; + int x4; + int x5; + float * x6; + int * x7; + double x8; + double x9; + int * x10; + int * x11; + double * x12; + double * x13; + double * x14; + float * x15; + int * x16; + Py_ssize_t datasize; + struct _cffi_freeme_s *large_args_free = NULL; + int result; + PyObject *pyresult; + PyObject *arg0; + PyObject *arg1; + PyObject *arg2; + PyObject *arg3; + PyObject *arg4; + PyObject *arg5; + PyObject *arg6; + PyObject *arg7; + PyObject *arg8; + PyObject *arg9; + PyObject *arg10; + PyObject *arg11; + PyObject *arg12; + PyObject *arg13; + PyObject *arg14; + PyObject *arg15; + PyObject *arg16; + + if (!PyArg_UnpackTuple(args, "single_test_sample", 17, 17, &arg0, &arg1, &arg2, &arg3, &arg4, &arg5, &arg6, &arg7, &arg8, &arg9, &arg10, &arg11, &arg12, &arg13, &arg14, &arg15, &arg16)) + return NULL; + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(5), arg0, (char **)&x0); + if (datasize != 0) { + x0 = ((size_t)datasize) <= 640 ? (UserParams *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(5), arg0, (char **)&x0, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(6), arg1, (char **)&x1); + if (datasize != 0) { + x1 = ((size_t)datasize) <= 640 ? (CosmoParams *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(6), arg1, (char **)&x1, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(7), arg2, (char **)&x2); + if (datasize != 0) { + x2 = ((size_t)datasize) <= 640 ? (AstroParams *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(7), arg2, (char **)&x2, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(8), arg3, (char **)&x3); + if (datasize != 0) { + x3 = ((size_t)datasize) <= 640 ? (FlagOptions *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(8), arg3, (char **)&x3, + datasize, &large_args_free) < 0) + return NULL; + } + + x4 = _cffi_to_c_int(arg4, int); + if (x4 == (int)-1 && PyErr_Occurred()) + return NULL; + + x5 = _cffi_to_c_int(arg5, int); + if (x5 == (int)-1 && PyErr_Occurred()) + return NULL; + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(174), arg6, (char **)&x6); + if (datasize != 0) { + x6 = ((size_t)datasize) <= 640 ? (float *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(174), arg6, (char **)&x6, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(217), arg7, (char **)&x7); + if (datasize != 0) { + x7 = ((size_t)datasize) <= 640 ? (int *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(217), arg7, (char **)&x7, + datasize, &large_args_free) < 0) + return NULL; + } + + x8 = (double)_cffi_to_c_double(arg8); + if (x8 == (double)-1 && PyErr_Occurred()) + return NULL; + + x9 = (double)_cffi_to_c_double(arg9); + if (x9 == (double)-1 && PyErr_Occurred()) + return NULL; + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(217), arg10, (char **)&x10); + if (datasize != 0) { + x10 = ((size_t)datasize) <= 640 ? (int *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(217), arg10, (char **)&x10, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(217), arg11, (char **)&x11); + if (datasize != 0) { + x11 = ((size_t)datasize) <= 640 ? (int *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(217), arg11, (char **)&x11, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(207), arg12, (char **)&x12); + if (datasize != 0) { + x12 = ((size_t)datasize) <= 640 ? (double *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(207), arg12, (char **)&x12, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(207), arg13, (char **)&x13); + if (datasize != 0) { + x13 = ((size_t)datasize) <= 640 ? (double *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(207), arg13, (char **)&x13, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(207), arg14, (char **)&x14); + if (datasize != 0) { + x14 = ((size_t)datasize) <= 640 ? (double *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(207), arg14, (char **)&x14, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(174), arg15, (char **)&x15); + if (datasize != 0) { + x15 = ((size_t)datasize) <= 640 ? (float *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(174), arg15, (char **)&x15, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(217), arg16, (char **)&x16); + if (datasize != 0) { + x16 = ((size_t)datasize) <= 640 ? (int *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(217), arg16, (char **)&x16, + datasize, &large_args_free) < 0) + return NULL; + } + + Py_BEGIN_ALLOW_THREADS + _cffi_restore_errno(); + { result = single_test_sample(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, x16); } + _cffi_save_errno(); + Py_END_ALLOW_THREADS + + (void)self; /* unused */ + pyresult = _cffi_from_c_int(result, int); + if (large_args_free != NULL) _cffi_free_array_arguments(large_args_free); + return pyresult; +} +#else +# define _cffi_f_single_test_sample _cffi_d_single_test_sample +#endif + +static int _cffi_d_test_filter(UserParams * x0, CosmoParams * x1, AstroParams * x2, FlagOptions * x3, float * x4, double x5, double x6, int x7, double * x8) +{ + return test_filter(x0, x1, x2, x3, x4, x5, x6, x7, x8); +} +#ifndef PYPY_VERSION +static PyObject * +_cffi_f_test_filter(PyObject *self, PyObject *args) +{ + UserParams * x0; + CosmoParams * x1; + AstroParams * x2; + FlagOptions * x3; + float * x4; + double x5; + double x6; + int x7; + double * x8; + Py_ssize_t datasize; + struct _cffi_freeme_s *large_args_free = NULL; + int result; + PyObject *pyresult; + PyObject *arg0; + PyObject *arg1; + PyObject *arg2; + PyObject *arg3; + PyObject *arg4; + PyObject *arg5; + PyObject *arg6; + PyObject *arg7; + PyObject *arg8; + + if (!PyArg_UnpackTuple(args, "test_filter", 9, 9, &arg0, &arg1, &arg2, &arg3, &arg4, &arg5, &arg6, &arg7, &arg8)) + return NULL; + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(5), arg0, (char **)&x0); + if (datasize != 0) { + x0 = ((size_t)datasize) <= 640 ? (UserParams *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(5), arg0, (char **)&x0, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(6), arg1, (char **)&x1); + if (datasize != 0) { + x1 = ((size_t)datasize) <= 640 ? (CosmoParams *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(6), arg1, (char **)&x1, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(7), arg2, (char **)&x2); + if (datasize != 0) { + x2 = ((size_t)datasize) <= 640 ? (AstroParams *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(7), arg2, (char **)&x2, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(8), arg3, (char **)&x3); + if (datasize != 0) { + x3 = ((size_t)datasize) <= 640 ? (FlagOptions *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(8), arg3, (char **)&x3, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(174), arg4, (char **)&x4); + if (datasize != 0) { + x4 = ((size_t)datasize) <= 640 ? (float *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(174), arg4, (char **)&x4, + datasize, &large_args_free) < 0) + return NULL; + } + + x5 = (double)_cffi_to_c_double(arg5); + if (x5 == (double)-1 && PyErr_Occurred()) + return NULL; + + x6 = (double)_cffi_to_c_double(arg6); + if (x6 == (double)-1 && PyErr_Occurred()) + return NULL; + + x7 = _cffi_to_c_int(arg7, int); + if (x7 == (int)-1 && PyErr_Occurred()) + return NULL; + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(207), arg8, (char **)&x8); + if (datasize != 0) { + x8 = ((size_t)datasize) <= 640 ? (double *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(207), arg8, (char **)&x8, + datasize, &large_args_free) < 0) + return NULL; + } + + Py_BEGIN_ALLOW_THREADS + _cffi_restore_errno(); + { result = test_filter(x0, x1, x2, x3, x4, x5, x6, x7, x8); } + _cffi_save_errno(); + Py_END_ALLOW_THREADS + + (void)self; /* unused */ + pyresult = _cffi_from_c_int(result, int); + if (large_args_free != NULL) _cffi_free_array_arguments(large_args_free); + return pyresult; +} +#else +# define _cffi_f_test_filter _cffi_d_test_filter +#endif + +static int _cffi_d_test_halo_props(double x0, UserParams * x1, CosmoParams * x2, AstroParams * x3, FlagOptions * x4, float * x5, float * x6, float * x7, float * x8, PerturbHaloField * x9, float * x10) +{ + return test_halo_props(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10); +} +#ifndef PYPY_VERSION +static PyObject * +_cffi_f_test_halo_props(PyObject *self, PyObject *args) +{ + double x0; + UserParams * x1; + CosmoParams * x2; + AstroParams * x3; + FlagOptions * x4; + float * x5; + float * x6; + float * x7; + float * x8; + PerturbHaloField * x9; + float * x10; + Py_ssize_t datasize; + struct _cffi_freeme_s *large_args_free = NULL; + int result; + PyObject *pyresult; + PyObject *arg0; + PyObject *arg1; + PyObject *arg2; + PyObject *arg3; + PyObject *arg4; + PyObject *arg5; + PyObject *arg6; + PyObject *arg7; + PyObject *arg8; + PyObject *arg9; + PyObject *arg10; + + if (!PyArg_UnpackTuple(args, "test_halo_props", 11, 11, &arg0, &arg1, &arg2, &arg3, &arg4, &arg5, &arg6, &arg7, &arg8, &arg9, &arg10)) + return NULL; + + x0 = (double)_cffi_to_c_double(arg0); + if (x0 == (double)-1 && PyErr_Occurred()) + return NULL; + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(5), arg1, (char **)&x1); + if (datasize != 0) { + x1 = ((size_t)datasize) <= 640 ? (UserParams *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(5), arg1, (char **)&x1, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(6), arg2, (char **)&x2); + if (datasize != 0) { + x2 = ((size_t)datasize) <= 640 ? (CosmoParams *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(6), arg2, (char **)&x2, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(7), arg3, (char **)&x3); + if (datasize != 0) { + x3 = ((size_t)datasize) <= 640 ? (AstroParams *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(7), arg3, (char **)&x3, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(8), arg4, (char **)&x4); + if (datasize != 0) { + x4 = ((size_t)datasize) <= 640 ? (FlagOptions *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(8), arg4, (char **)&x4, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(174), arg5, (char **)&x5); + if (datasize != 0) { + x5 = ((size_t)datasize) <= 640 ? (float *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(174), arg5, (char **)&x5, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(174), arg6, (char **)&x6); + if (datasize != 0) { + x6 = ((size_t)datasize) <= 640 ? (float *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(174), arg6, (char **)&x6, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(174), arg7, (char **)&x7); + if (datasize != 0) { + x7 = ((size_t)datasize) <= 640 ? (float *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(174), arg7, (char **)&x7, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(174), arg8, (char **)&x8); + if (datasize != 0) { + x8 = ((size_t)datasize) <= 640 ? (float *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(174), arg8, (char **)&x8, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(263), arg9, (char **)&x9); + if (datasize != 0) { + x9 = ((size_t)datasize) <= 640 ? (PerturbHaloField *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(263), arg9, (char **)&x9, + datasize, &large_args_free) < 0) + return NULL; + } + + datasize = _cffi_prepare_pointer_call_argument( + _cffi_type(174), arg10, (char **)&x10); + if (datasize != 0) { + x10 = ((size_t)datasize) <= 640 ? (float *)alloca((size_t)datasize) : NULL; + if (_cffi_convert_array_argument(_cffi_type(174), arg10, (char **)&x10, + datasize, &large_args_free) < 0) + return NULL; + } + + Py_BEGIN_ALLOW_THREADS + _cffi_restore_errno(); + { result = test_halo_props(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10); } + _cffi_save_errno(); + Py_END_ALLOW_THREADS + + (void)self; /* unused */ + pyresult = _cffi_from_c_int(result, int); + if (large_args_free != NULL) _cffi_free_array_arguments(large_args_free); + return pyresult; +} +#else +# define _cffi_f_test_halo_props _cffi_d_test_halo_props +#endif + +static double _cffi_d_unconditional_mf(double x0, double x1, double x2, int x3) +{ + return unconditional_mf(x0, x1, x2, x3); +} +#ifndef PYPY_VERSION +static PyObject * +_cffi_f_unconditional_mf(PyObject *self, PyObject *args) +{ + double x0; + double x1; + double x2; + int x3; + double result; + PyObject *pyresult; + PyObject *arg0; + PyObject *arg1; + PyObject *arg2; + PyObject *arg3; + + if (!PyArg_UnpackTuple(args, "unconditional_mf", 4, 4, &arg0, &arg1, &arg2, &arg3)) + return NULL; + + x0 = (double)_cffi_to_c_double(arg0); + if (x0 == (double)-1 && PyErr_Occurred()) + return NULL; + + x1 = (double)_cffi_to_c_double(arg1); + if (x1 == (double)-1 && PyErr_Occurred()) + return NULL; + + x2 = (double)_cffi_to_c_double(arg2); + if (x2 == (double)-1 && PyErr_Occurred()) + return NULL; + + x3 = _cffi_to_c_int(arg3, int); + if (x3 == (int)-1 && PyErr_Occurred()) + return NULL; + + Py_BEGIN_ALLOW_THREADS + _cffi_restore_errno(); + { result = unconditional_mf(x0, x1, x2, x3); } + _cffi_save_errno(); + Py_END_ALLOW_THREADS + + (void)self; /* unused */ + pyresult = _cffi_from_c_double(result); + return pyresult; +} +#else +# define _cffi_f_unconditional_mf _cffi_d_unconditional_mf +#endif + +_CFFI_UNUSED_FN +static void _cffi_checkfld__AstroParams(AstroParams *p) +{ + /* only to generate compile-time warnings or errors */ + (void)p; + { float *tmp = &p->HII_EFF_FACTOR; (void)tmp; } + { float *tmp = &p->F_STAR10; (void)tmp; } + { float *tmp = &p->ALPHA_STAR; (void)tmp; } + { float *tmp = &p->ALPHA_STAR_MINI; (void)tmp; } + { float *tmp = &p->SIGMA_STAR; (void)tmp; } + { float *tmp = &p->CORR_STAR; (void)tmp; } + { double *tmp = &p->UPPER_STELLAR_TURNOVER_MASS; (void)tmp; } + { double *tmp = &p->UPPER_STELLAR_TURNOVER_INDEX; (void)tmp; } + { float *tmp = &p->F_STAR7_MINI; (void)tmp; } + { float *tmp = &p->t_STAR; (void)tmp; } + { float *tmp = &p->CORR_SFR; (void)tmp; } + { double *tmp = &p->SIGMA_SFR_INDEX; (void)tmp; } + { double *tmp = &p->SIGMA_SFR_LIM; (void)tmp; } + { double *tmp = &p->L_X; (void)tmp; } + { double *tmp = &p->L_X_MINI; (void)tmp; } + { double *tmp = &p->SIGMA_LX; (void)tmp; } + { double *tmp = &p->CORR_LX; (void)tmp; } + { float *tmp = &p->F_ESC10; (void)tmp; } + { float *tmp = &p->ALPHA_ESC; (void)tmp; } + { float *tmp = &p->F_ESC7_MINI; (void)tmp; } + { float *tmp = &p->M_TURN; (void)tmp; } + { float *tmp = &p->R_BUBBLE_MAX; (void)tmp; } + { float *tmp = &p->ION_Tvir_MIN; (void)tmp; } + { double *tmp = &p->F_H2_SHIELD; (void)tmp; } + { float *tmp = &p->NU_X_THRESH; (void)tmp; } + { float *tmp = &p->X_RAY_SPEC_INDEX; (void)tmp; } + { float *tmp = &p->X_RAY_Tvir_MIN; (void)tmp; } + { double *tmp = &p->A_LW; (void)tmp; } + { double *tmp = &p->BETA_LW; (void)tmp; } + { double *tmp = &p->A_VCB; (void)tmp; } + { double *tmp = &p->BETA_VCB; (void)tmp; } + (void)((p->N_RSD_STEPS) | 0); /* check that 'AstroParams.N_RSD_STEPS' is an integer */ +} +struct _cffi_align__AstroParams { char x; AstroParams y; }; + +_CFFI_UNUSED_FN +static void _cffi_checkfld__BrightnessTemp(BrightnessTemp *p) +{ + /* only to generate compile-time warnings or errors */ + (void)p; + { float * *tmp = &p->brightness_temp; (void)tmp; } +} +struct _cffi_align__BrightnessTemp { char x; BrightnessTemp y; }; + +_CFFI_UNUSED_FN +static void _cffi_checkfld__CosmoParams(CosmoParams *p) +{ + /* only to generate compile-time warnings or errors */ + (void)p; + { float *tmp = &p->SIGMA_8; (void)tmp; } + { float *tmp = &p->hlittle; (void)tmp; } + { float *tmp = &p->OMm; (void)tmp; } + { float *tmp = &p->OMl; (void)tmp; } + { float *tmp = &p->OMb; (void)tmp; } + { float *tmp = &p->POWER_INDEX; (void)tmp; } +} +struct _cffi_align__CosmoParams { char x; CosmoParams y; }; + +_CFFI_UNUSED_FN +static void _cffi_checkfld__FlagOptions(FlagOptions *p) +{ + /* only to generate compile-time warnings or errors */ + (void)p; + (void)((p->USE_HALO_FIELD) | 0); /* check that 'FlagOptions.USE_HALO_FIELD' is an integer */ + (void)((p->USE_MINI_HALOS) | 0); /* check that 'FlagOptions.USE_MINI_HALOS' is an integer */ + (void)((p->USE_CMB_HEATING) | 0); /* check that 'FlagOptions.USE_CMB_HEATING' is an integer */ + (void)((p->USE_LYA_HEATING) | 0); /* check that 'FlagOptions.USE_LYA_HEATING' is an integer */ + (void)((p->USE_MASS_DEPENDENT_ZETA) | 0); /* check that 'FlagOptions.USE_MASS_DEPENDENT_ZETA' is an integer */ + (void)((p->SUBCELL_RSD) | 0); /* check that 'FlagOptions.SUBCELL_RSD' is an integer */ + (void)((p->APPLY_RSDS) | 0); /* check that 'FlagOptions.APPLY_RSDS' is an integer */ + (void)((p->INHOMO_RECO) | 0); /* check that 'FlagOptions.INHOMO_RECO' is an integer */ + (void)((p->USE_TS_FLUCT) | 0); /* check that 'FlagOptions.USE_TS_FLUCT' is an integer */ + (void)((p->M_MIN_in_Mass) | 0); /* check that 'FlagOptions.M_MIN_in_Mass' is an integer */ + (void)((p->FIX_VCB_AVG) | 0); /* check that 'FlagOptions.FIX_VCB_AVG' is an integer */ + (void)((p->HALO_STOCHASTICITY) | 0); /* check that 'FlagOptions.HALO_STOCHASTICITY' is an integer */ + (void)((p->USE_EXP_FILTER) | 0); /* check that 'FlagOptions.USE_EXP_FILTER' is an integer */ + (void)((p->FIXED_HALO_GRIDS) | 0); /* check that 'FlagOptions.FIXED_HALO_GRIDS' is an integer */ + (void)((p->CELL_RECOMB) | 0); /* check that 'FlagOptions.CELL_RECOMB' is an integer */ + (void)((p->PHOTON_CONS_TYPE) | 0); /* check that 'FlagOptions.PHOTON_CONS_TYPE' is an integer */ + (void)((p->USE_UPPER_STELLAR_TURNOVER) | 0); /* check that 'FlagOptions.USE_UPPER_STELLAR_TURNOVER' is an integer */ +} +struct _cffi_align__FlagOptions { char x; FlagOptions y; }; + +_CFFI_UNUSED_FN +static void _cffi_checkfld__GlobalParams(GlobalParams *p) +{ + /* only to generate compile-time warnings or errors */ + (void)p; + { float *tmp = &p->ALPHA_UVB; (void)tmp; } + (void)((p->EVOLVE_DENSITY_LINEARLY) | 0); /* check that 'GlobalParams.EVOLVE_DENSITY_LINEARLY' is an integer */ + (void)((p->SMOOTH_EVOLVED_DENSITY_FIELD) | 0); /* check that 'GlobalParams.SMOOTH_EVOLVED_DENSITY_FIELD' is an integer */ + { float *tmp = &p->R_smooth_density; (void)tmp; } + { float *tmp = &p->HII_ROUND_ERR; (void)tmp; } + (void)((p->FIND_BUBBLE_ALGORITHM) | 0); /* check that 'GlobalParams.FIND_BUBBLE_ALGORITHM' is an integer */ + (void)((p->N_POISSON) | 0); /* check that 'GlobalParams.N_POISSON' is an integer */ + (void)((p->T_USE_VELOCITIES) | 0); /* check that 'GlobalParams.T_USE_VELOCITIES' is an integer */ + { float *tmp = &p->MAX_DVDR; (void)tmp; } + { float *tmp = &p->DELTA_R_HII_FACTOR; (void)tmp; } + { float *tmp = &p->DELTA_R_FACTOR; (void)tmp; } + (void)((p->HII_FILTER) | 0); /* check that 'GlobalParams.HII_FILTER' is an integer */ + { float *tmp = &p->INITIAL_REDSHIFT; (void)tmp; } + { float *tmp = &p->R_OVERLAP_FACTOR; (void)tmp; } + (void)((p->DELTA_CRIT_MODE) | 0); /* check that 'GlobalParams.DELTA_CRIT_MODE' is an integer */ + (void)((p->HALO_FILTER) | 0); /* check that 'GlobalParams.HALO_FILTER' is an integer */ + (void)((p->OPTIMIZE) | 0); /* check that 'GlobalParams.OPTIMIZE' is an integer */ + { float *tmp = &p->OPTIMIZE_MIN_MASS; (void)tmp; } + { float *tmp = &p->CRIT_DENS_TRANSITION; (void)tmp; } + { float *tmp = &p->MIN_DENSITY_LOW_LIMIT; (void)tmp; } + (void)((p->RecombPhotonCons) | 0); /* check that 'GlobalParams.RecombPhotonCons' is an integer */ + { float *tmp = &p->PhotonConsStart; (void)tmp; } + { float *tmp = &p->PhotonConsEnd; (void)tmp; } + { float *tmp = &p->PhotonConsAsymptoteTo; (void)tmp; } + { float *tmp = &p->PhotonConsEndCalibz; (void)tmp; } + (void)((p->PhotonConsSmoothing) | 0); /* check that 'GlobalParams.PhotonConsSmoothing' is an integer */ + (void)((p->HEAT_FILTER) | 0); /* check that 'GlobalParams.HEAT_FILTER' is an integer */ + { double *tmp = &p->CLUMPING_FACTOR; (void)tmp; } + { float *tmp = &p->Z_HEAT_MAX; (void)tmp; } + { float *tmp = &p->R_XLy_MAX; (void)tmp; } + (void)((p->NUM_FILTER_STEPS_FOR_Ts) | 0); /* check that 'GlobalParams.NUM_FILTER_STEPS_FOR_Ts' is an integer */ + { float *tmp = &p->ZPRIME_STEP_FACTOR; (void)tmp; } + { double *tmp = &p->TK_at_Z_HEAT_MAX; (void)tmp; } + { double *tmp = &p->XION_at_Z_HEAT_MAX; (void)tmp; } + (void)((p->Pop) | 0); /* check that 'GlobalParams.Pop' is an integer */ + { float *tmp = &p->Pop2_ion; (void)tmp; } + { float *tmp = &p->Pop3_ion; (void)tmp; } + { float *tmp = &p->NU_X_BAND_MAX; (void)tmp; } + { float *tmp = &p->NU_X_MAX; (void)tmp; } + (void)((p->NBINS_LF) | 0); /* check that 'GlobalParams.NBINS_LF' is an integer */ + (void)((p->P_CUTOFF) | 0); /* check that 'GlobalParams.P_CUTOFF' is an integer */ + { float *tmp = &p->M_WDM; (void)tmp; } + { float *tmp = &p->g_x; (void)tmp; } + { float *tmp = &p->OMn; (void)tmp; } + { float *tmp = &p->OMk; (void)tmp; } + { float *tmp = &p->OMr; (void)tmp; } + { float *tmp = &p->OMtot; (void)tmp; } + { float *tmp = &p->Y_He; (void)tmp; } + { float *tmp = &p->wl; (void)tmp; } + { float *tmp = &p->SHETH_b; (void)tmp; } + { float *tmp = &p->SHETH_c; (void)tmp; } + { double *tmp = &p->Zreion_HeII; (void)tmp; } + (void)((p->FILTER) | 0); /* check that 'GlobalParams.FILTER' is an integer */ + { char * *tmp = &p->external_table_path; (void)tmp; } + { char * *tmp = &p->wisdoms_path; (void)tmp; } + { float *tmp = &p->R_BUBBLE_MIN; (void)tmp; } + { float *tmp = &p->M_MIN_INTEGRAL; (void)tmp; } + { float *tmp = &p->M_MAX_INTEGRAL; (void)tmp; } + { float *tmp = &p->T_RE; (void)tmp; } + { float *tmp = &p->VAVG; (void)tmp; } + (void)((p->USE_ADIABATIC_FLUCTUATIONS) | 0); /* check that 'GlobalParams.USE_ADIABATIC_FLUCTUATIONS' is an integer */ +} +struct _cffi_align__GlobalParams { char x; GlobalParams y; }; + +_CFFI_UNUSED_FN +static void _cffi_checkfld__HaloBox(HaloBox *p) +{ + /* only to generate compile-time warnings or errors */ + (void)p; + { float * *tmp = &p->halo_mass; (void)tmp; } + { float * *tmp = &p->halo_stars; (void)tmp; } + { float * *tmp = &p->halo_stars_mini; (void)tmp; } + { int * *tmp = &p->count; (void)tmp; } + { float * *tmp = &p->n_ion; (void)tmp; } + { float * *tmp = &p->halo_sfr; (void)tmp; } + { float * *tmp = &p->halo_xray; (void)tmp; } + { float * *tmp = &p->halo_sfr_mini; (void)tmp; } + { float * *tmp = &p->whalo_sfr; (void)tmp; } + { double *tmp = &p->log10_Mcrit_ACG_ave; (void)tmp; } + { double *tmp = &p->log10_Mcrit_MCG_ave; (void)tmp; } +} +struct _cffi_align__HaloBox { char x; HaloBox y; }; + +_CFFI_UNUSED_FN +static void _cffi_checkfld__HaloField(HaloField *p) +{ + /* only to generate compile-time warnings or errors */ + (void)p; + (void)((p->n_halos) | 0); /* check that 'HaloField.n_halos' is an integer */ + (void)((p->buffer_size) | 0); /* check that 'HaloField.buffer_size' is an integer */ + { float * *tmp = &p->halo_masses; (void)tmp; } + { int * *tmp = &p->halo_coords; (void)tmp; } + { float * *tmp = &p->star_rng; (void)tmp; } + { float * *tmp = &p->sfr_rng; (void)tmp; } + { float * *tmp = &p->xray_rng; (void)tmp; } +} +struct _cffi_align__HaloField { char x; HaloField y; }; + +_CFFI_UNUSED_FN +static void _cffi_checkfld__InitialConditions(InitialConditions *p) +{ + /* only to generate compile-time warnings or errors */ + (void)p; + { float * *tmp = &p->lowres_density; (void)tmp; } + { float * *tmp = &p->lowres_vx; (void)tmp; } + { float * *tmp = &p->lowres_vy; (void)tmp; } + { float * *tmp = &p->lowres_vz; (void)tmp; } + { float * *tmp = &p->lowres_vx_2LPT; (void)tmp; } + { float * *tmp = &p->lowres_vy_2LPT; (void)tmp; } + { float * *tmp = &p->lowres_vz_2LPT; (void)tmp; } + { float * *tmp = &p->hires_density; (void)tmp; } + { float * *tmp = &p->hires_vx; (void)tmp; } + { float * *tmp = &p->hires_vy; (void)tmp; } + { float * *tmp = &p->hires_vz; (void)tmp; } + { float * *tmp = &p->hires_vx_2LPT; (void)tmp; } + { float * *tmp = &p->hires_vy_2LPT; (void)tmp; } + { float * *tmp = &p->hires_vz_2LPT; (void)tmp; } + { float * *tmp = &p->lowres_vcb; (void)tmp; } +} +struct _cffi_align__InitialConditions { char x; InitialConditions y; }; + +_CFFI_UNUSED_FN +static void _cffi_checkfld__IonizedBox(IonizedBox *p) +{ + /* only to generate compile-time warnings or errors */ + (void)p; + { double *tmp = &p->mean_f_coll; (void)tmp; } + { double *tmp = &p->mean_f_coll_MINI; (void)tmp; } + { double *tmp = &p->log10_Mturnover_ave; (void)tmp; } + { double *tmp = &p->log10_Mturnover_MINI_ave; (void)tmp; } + { float * *tmp = &p->xH_box; (void)tmp; } + { float * *tmp = &p->Gamma12_box; (void)tmp; } + { float * *tmp = &p->MFP_box; (void)tmp; } + { float * *tmp = &p->z_re_box; (void)tmp; } + { float * *tmp = &p->dNrec_box; (void)tmp; } + { float * *tmp = &p->temp_kinetic_all_gas; (void)tmp; } + { float * *tmp = &p->Fcoll; (void)tmp; } + { float * *tmp = &p->Fcoll_MINI; (void)tmp; } +} +struct _cffi_align__IonizedBox { char x; IonizedBox y; }; + +_CFFI_UNUSED_FN +static void _cffi_checkfld__PerturbHaloField(PerturbHaloField *p) +{ + /* only to generate compile-time warnings or errors */ + (void)p; + (void)((p->n_halos) | 0); /* check that 'PerturbHaloField.n_halos' is an integer */ + (void)((p->buffer_size) | 0); /* check that 'PerturbHaloField.buffer_size' is an integer */ + { float * *tmp = &p->halo_masses; (void)tmp; } + { int * *tmp = &p->halo_coords; (void)tmp; } + { float * *tmp = &p->star_rng; (void)tmp; } + { float * *tmp = &p->sfr_rng; (void)tmp; } + { float * *tmp = &p->xray_rng; (void)tmp; } +} +struct _cffi_align__PerturbHaloField { char x; PerturbHaloField y; }; + +_CFFI_UNUSED_FN +static void _cffi_checkfld__PerturbedField(PerturbedField *p) +{ + /* only to generate compile-time warnings or errors */ + (void)p; + { float * *tmp = &p->density; (void)tmp; } + { float * *tmp = &p->velocity_x; (void)tmp; } + { float * *tmp = &p->velocity_y; (void)tmp; } + { float * *tmp = &p->velocity_z; (void)tmp; } +} +struct _cffi_align__PerturbedField { char x; PerturbedField y; }; + +_CFFI_UNUSED_FN +static void _cffi_checkfld__TsBox(TsBox *p) +{ + /* only to generate compile-time warnings or errors */ + (void)p; + { float * *tmp = &p->Ts_box; (void)tmp; } + { float * *tmp = &p->x_e_box; (void)tmp; } + { float * *tmp = &p->Tk_box; (void)tmp; } + { float * *tmp = &p->J_21_LW_box; (void)tmp; } +} +struct _cffi_align__TsBox { char x; TsBox y; }; + +_CFFI_UNUSED_FN +static void _cffi_checkfld__UserParams(UserParams *p) +{ + /* only to generate compile-time warnings or errors */ + (void)p; + (void)((p->HII_DIM) | 0); /* check that 'UserParams.HII_DIM' is an integer */ + (void)((p->DIM) | 0); /* check that 'UserParams.DIM' is an integer */ + { float *tmp = &p->BOX_LEN; (void)tmp; } + { float *tmp = &p->NON_CUBIC_FACTOR; (void)tmp; } + (void)((p->USE_FFTW_WISDOM) | 0); /* check that 'UserParams.USE_FFTW_WISDOM' is an integer */ + (void)((p->HMF) | 0); /* check that 'UserParams.HMF' is an integer */ + (void)((p->USE_RELATIVE_VELOCITIES) | 0); /* check that 'UserParams.USE_RELATIVE_VELOCITIES' is an integer */ + (void)((p->POWER_SPECTRUM) | 0); /* check that 'UserParams.POWER_SPECTRUM' is an integer */ + (void)((p->N_THREADS) | 0); /* check that 'UserParams.N_THREADS' is an integer */ + (void)((p->PERTURB_ON_HIGH_RES) | 0); /* check that 'UserParams.PERTURB_ON_HIGH_RES' is an integer */ + (void)((p->NO_RNG) | 0); /* check that 'UserParams.NO_RNG' is an integer */ + (void)((p->USE_INTERPOLATION_TABLES) | 0); /* check that 'UserParams.USE_INTERPOLATION_TABLES' is an integer */ + (void)((p->INTEGRATION_METHOD_ATOMIC) | 0); /* check that 'UserParams.INTEGRATION_METHOD_ATOMIC' is an integer */ + (void)((p->INTEGRATION_METHOD_MINI) | 0); /* check that 'UserParams.INTEGRATION_METHOD_MINI' is an integer */ + (void)((p->USE_2LPT) | 0); /* check that 'UserParams.USE_2LPT' is an integer */ + (void)((p->MINIMIZE_MEMORY) | 0); /* check that 'UserParams.MINIMIZE_MEMORY' is an integer */ + (void)((p->KEEP_3D_VELOCITIES) | 0); /* check that 'UserParams.KEEP_3D_VELOCITIES' is an integer */ + { float *tmp = &p->SAMPLER_MIN_MASS; (void)tmp; } + { double *tmp = &p->SAMPLER_BUFFER_FACTOR; (void)tmp; } + { float *tmp = &p->MAXHALO_FACTOR; (void)tmp; } + (void)((p->N_COND_INTERP) | 0); /* check that 'UserParams.N_COND_INTERP' is an integer */ + (void)((p->N_PROB_INTERP) | 0); /* check that 'UserParams.N_PROB_INTERP' is an integer */ + { double *tmp = &p->MIN_LOGPROB; (void)tmp; } + (void)((p->SAMPLE_METHOD) | 0); /* check that 'UserParams.SAMPLE_METHOD' is an integer */ + (void)((p->AVG_BELOW_SAMPLER) | 0); /* check that 'UserParams.AVG_BELOW_SAMPLER' is an integer */ + { double *tmp = &p->HALOMASS_CORRECTION; (void)tmp; } + { double *tmp = &p->PARKINSON_G0; (void)tmp; } + { double *tmp = &p->PARKINSON_y1; (void)tmp; } + { double *tmp = &p->PARKINSON_y2; (void)tmp; } +} +struct _cffi_align__UserParams { char x; UserParams y; }; + +_CFFI_UNUSED_FN +static void _cffi_checkfld__XraySourceBox(XraySourceBox *p) +{ + /* only to generate compile-time warnings or errors */ + (void)p; + { float * *tmp = &p->filtered_sfr; (void)tmp; } + { float * *tmp = &p->filtered_xray; (void)tmp; } + { float * *tmp = &p->filtered_sfr_mini; (void)tmp; } + { double * *tmp = &p->mean_log10_Mcrit_LW; (void)tmp; } + { double * *tmp = &p->mean_sfr; (void)tmp; } + { double * *tmp = &p->mean_sfr_mini; (void)tmp; } +} +struct _cffi_align__XraySourceBox { char x; XraySourceBox y; }; + +static AstroParams * *_cffi_var_astro_params_global(void) +{ + return &(astro_params_global); +} + +static CosmoParams * *_cffi_var_cosmo_params_global(void) +{ + return &(cosmo_params_global); +} + +static FlagOptions * *_cffi_var_flag_options_global(void) +{ + return &(flag_options_global); +} + +static GlobalParams *_cffi_var_global_params(void) +{ + return &(global_params); +} + +static _Bool *_cffi_var_photon_cons_allocated(void) +{ + return &(photon_cons_allocated); +} + +static UserParams * *_cffi_var_user_params_global(void) +{ + return &(user_params_global); +} + +static const struct _cffi_global_s _cffi_globals[] = { + { "Broadcast_struct_global_all", (void *)_cffi_f_Broadcast_struct_global_all, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 384), (void *)_cffi_d_Broadcast_struct_global_all }, + { "Broadcast_struct_global_noastro", (void *)_cffi_f_Broadcast_struct_global_noastro, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 380), (void *)_cffi_d_Broadcast_struct_global_noastro }, + { "ComputeBrightnessTemp", (void *)_cffi_f_ComputeBrightnessTemp, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 291), (void *)_cffi_d_ComputeBrightnessTemp }, + { "ComputeHaloBox", (void *)_cffi_f_ComputeHaloBox, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 255), (void *)_cffi_d_ComputeHaloBox }, + { "ComputeHaloField", (void *)_cffi_f_ComputeHaloField, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 309), (void *)_cffi_d_ComputeHaloField }, + { "ComputeInitialConditions", (void *)_cffi_f_ComputeInitialConditions, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 365), (void *)_cffi_d_ComputeInitialConditions }, + { "ComputeIonizedBox", (void *)_cffi_f_ComputeIonizedBox, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 321), (void *)_cffi_d_ComputeIonizedBox }, + { "ComputeLF", (void *)_cffi_f_ComputeLF, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 351), (void *)_cffi_d_ComputeLF }, + { "ComputePerturbField", (void *)_cffi_f_ComputePerturbField, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 302), (void *)_cffi_d_ComputePerturbField }, + { "ComputePerturbHaloField", (void *)_cffi_f_ComputePerturbHaloField, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 281), (void *)_cffi_d_ComputePerturbHaloField }, + { "ComputeTau", (void *)_cffi_f_ComputeTau, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 170), (void *)_cffi_d_ComputeTau }, + { "ComputeTsBox", (void *)_cffi_f_ComputeTsBox, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 336), (void *)_cffi_d_ComputeTsBox }, + { "ComputeZstart_PhotonCons", (void *)_cffi_f_ComputeZstart_PhotonCons, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_O, 236), (void *)_cffi_d_ComputeZstart_PhotonCons }, + { "CreateFFTWWisdoms", (void *)_cffi_f_CreateFFTWWisdoms, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 177), (void *)_cffi_d_CreateFFTWWisdoms }, + { "EvaluateFcoll_delta", (void *)_cffi_f_EvaluateFcoll_delta, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 19), (void *)_cffi_d_EvaluateFcoll_delta }, + { "EvaluateMcoll", (void *)_cffi_f_EvaluateMcoll, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 25), (void *)_cffi_d_EvaluateMcoll }, + { "EvaluateNhalo", (void *)_cffi_f_EvaluateNhalo, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 25), (void *)_cffi_d_EvaluateNhalo }, + { "EvaluateNhaloInv", (void *)_cffi_f_EvaluateNhaloInv, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 10), (void *)_cffi_d_EvaluateNhaloInv }, + { "EvaluateNionTs", (void *)_cffi_f_EvaluateNionTs, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 14), (void *)_cffi_d_EvaluateNionTs }, + { "EvaluateNionTs_MINI", (void *)_cffi_f_EvaluateNionTs_MINI, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 19), (void *)_cffi_d_EvaluateNionTs_MINI }, + { "EvaluateNion_Conditional", (void *)_cffi_f_EvaluateNion_Conditional, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 55), (void *)_cffi_d_EvaluateNion_Conditional }, + { "EvaluateNion_Conditional_MINI", (void *)_cffi_f_EvaluateNion_Conditional_MINI, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 79), (void *)_cffi_d_EvaluateNion_Conditional_MINI }, + { "EvaluateSFRD", (void *)_cffi_f_EvaluateSFRD, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 10), (void *)_cffi_d_EvaluateSFRD }, + { "EvaluateSFRD_Conditional", (void *)_cffi_f_EvaluateSFRD_Conditional, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 34), (void *)_cffi_d_EvaluateSFRD_Conditional }, + { "EvaluateSFRD_Conditional_MINI", (void *)_cffi_f_EvaluateSFRD_Conditional_MINI, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 44), (void *)_cffi_d_EvaluateSFRD_Conditional_MINI }, + { "EvaluateSFRD_MINI", (void *)_cffi_f_EvaluateSFRD_MINI, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 14), (void *)_cffi_d_EvaluateSFRD_MINI }, + { "EvaluateSigma", (void *)_cffi_f_EvaluateSigma, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_O, 0), (void *)_cffi_d_EvaluateSigma }, + { "EvaluatedFcolldz", (void *)_cffi_f_EvaluatedFcolldz, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 19), (void *)_cffi_d_EvaluatedFcolldz }, + { "EvaluatedSigmasqdm", (void *)_cffi_f_EvaluatedSigmasqdm, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_O, 0), (void *)_cffi_d_EvaluatedSigmasqdm }, + { "Fcoll_General", (void *)_cffi_f_Fcoll_General, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 14), (void *)_cffi_d_Fcoll_General }, + { "FreePhotonConsMemory", (void *)_cffi_f_FreePhotonConsMemory, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_N, 500), (void *)_cffi_d_FreePhotonConsMemory }, + { "FunctionThatCatches", (void *)_cffi_f_FunctionThatCatches, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 231), (void *)_cffi_d_FunctionThatCatches }, + { "FunctionThatThrows", (void *)_cffi_f_FunctionThatThrows, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_N, 500), (void *)_cffi_d_FunctionThatThrows }, + { "InitialisePhotonCons", (void *)_cffi_f_InitialisePhotonCons, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 181), (void *)_cffi_d_InitialisePhotonCons }, + { "Mcoll_Conditional", (void *)_cffi_f_Mcoll_Conditional, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 138), (void *)_cffi_d_Mcoll_Conditional }, + { "Nhalo_Conditional", (void *)_cffi_f_Nhalo_Conditional, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 138), (void *)_cffi_d_Nhalo_Conditional }, + { "Nion_ConditionalM", (void *)_cffi_f_Nion_ConditionalM, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 122), (void *)_cffi_d_Nion_ConditionalM }, + { "Nion_ConditionalM_MINI", (void *)_cffi_f_Nion_ConditionalM_MINI, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 105), (void *)_cffi_d_Nion_ConditionalM_MINI }, + { "Nion_General", (void *)_cffi_f_Nion_General, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 67), (void *)_cffi_d_Nion_General }, + { "Nion_General_MINI", (void *)_cffi_f_Nion_General_MINI, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 92), (void *)_cffi_d_Nion_General_MINI }, + { "ObtainPhotonConsData", (void *)_cffi_f_ObtainPhotonConsData, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 239), (void *)_cffi_d_ObtainPhotonConsData }, + { "PhotonCons_Calibration", (void *)_cffi_f_PhotonCons_Calibration, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 250), (void *)_cffi_d_PhotonCons_Calibration }, + { "SomethingThatCatches", (void *)_cffi_f_SomethingThatCatches, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_O, 228), (void *)_cffi_d_SomethingThatCatches }, + { "UpdateXraySourceBox", (void *)_cffi_f_UpdateXraySourceBox, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 187), (void *)_cffi_d_UpdateXraySourceBox }, + { "adjust_redshifts_for_photoncons", (void *)_cffi_f_adjust_redshifts_for_photoncons, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 373), (void *)_cffi_d_adjust_redshifts_for_photoncons }, + { "astro_params_global", (void *)_cffi_var_astro_params_global, _CFFI_OP(_CFFI_OP_GLOBAL_VAR_F, 7), (void *)0 }, + { "atomic_cooling_threshold", (void *)_cffi_f_atomic_cooling_threshold, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_O, 160), (void *)_cffi_d_atomic_cooling_threshold }, + { "conditional_mf", (void *)_cffi_f_conditional_mf, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 147), (void *)_cffi_d_conditional_mf }, + { "cosmo_params_global", (void *)_cffi_var_cosmo_params_global, _CFFI_OP(_CFFI_OP_GLOBAL_VAR_F, 6), (void *)0 }, + { "determine_deltaz_for_photoncons", (void *)_cffi_f_determine_deltaz_for_photoncons, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_N, 500), (void *)_cffi_d_determine_deltaz_for_photoncons }, + { "dicke", (void *)_cffi_f_dicke, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_O, 0), (void *)_cffi_d_dicke }, + { "dsigmasqdm_z0", (void *)_cffi_f_dsigmasqdm_z0, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_O, 0), (void *)_cffi_d_dsigmasqdm_z0 }, + { "expected_nhalo", (void *)_cffi_f_expected_nhalo, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 3), (void *)_cffi_d_expected_nhalo }, + { "flag_options_global", (void *)_cffi_var_flag_options_global, _CFFI_OP(_CFFI_OP_GLOBAL_VAR_F, 8), (void *)0 }, + { "free", (void *)_cffi_f_free, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_O, 497), (void *)_cffi_d_free }, + { "get_delta_crit", (void *)_cffi_f_get_delta_crit, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 163), (void *)_cffi_d_get_delta_crit }, + { "global_params", (void *)_cffi_var_global_params, _CFFI_OP(_CFFI_OP_GLOBAL_VAR_F, 506), (void *)0 }, + { "init_FcollTable", (void *)_cffi_f_init_FcollTable, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 394), (void *)_cffi_d_init_FcollTable }, + { "init_heat", (void *)_cffi_f_init_heat, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_N, 371), (void *)_cffi_d_init_heat }, + { "init_ps", (void *)_cffi_f_init_ps, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_N, 168), (void *)_cffi_d_init_ps }, + { "initialiseSigmaMInterpTable", (void *)_cffi_f_initialiseSigmaMInterpTable, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 440), (void *)_cffi_d_initialiseSigmaMInterpTable }, + { "initialise_FgtrM_delta_table", (void *)_cffi_f_initialise_FgtrM_delta_table, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 407), (void *)_cffi_d_initialise_FgtrM_delta_table }, + { "initialise_GL", (void *)_cffi_f_initialise_GL, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 440), (void *)_cffi_d_initialise_GL }, + { "initialise_Nion_Conditional_spline", (void *)_cffi_f_initialise_Nion_Conditional_spline, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 444), (void *)_cffi_d_initialise_Nion_Conditional_spline }, + { "initialise_Nion_Ts_spline", (void *)_cffi_f_initialise_Nion_Ts_spline, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 483), (void *)_cffi_d_initialise_Nion_Ts_spline }, + { "initialise_SFRD_Conditional_table", (void *)_cffi_f_initialise_SFRD_Conditional_table, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 424), (void *)_cffi_d_initialise_SFRD_Conditional_table }, + { "initialise_SFRD_spline", (void *)_cffi_f_initialise_SFRD_spline, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 472), (void *)_cffi_d_initialise_SFRD_spline }, + { "initialise_dNdM_inverse_table", (void *)_cffi_f_initialise_dNdM_inverse_table, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 399), (void *)_cffi_d_initialise_dNdM_inverse_table }, + { "initialise_dNdM_tables", (void *)_cffi_f_initialise_dNdM_tables, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 415), (void *)_cffi_d_initialise_dNdM_tables }, + { "photon_cons_allocated", (void *)_cffi_var_photon_cons_allocated, _CFFI_OP(_CFFI_OP_GLOBAL_VAR_F, 65), (void *)0 }, + { "set_alphacons_params", (void *)_cffi_f_set_alphacons_params, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 390), (void *)_cffi_d_set_alphacons_params }, + { "sigma_z0", (void *)_cffi_f_sigma_z0, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_O, 0), (void *)_cffi_d_sigma_z0 }, + { "single_test_sample", (void *)_cffi_f_single_test_sample, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 209), (void *)_cffi_d_single_test_sample }, + { "test_filter", (void *)_cffi_f_test_filter, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 198), (void *)_cffi_d_test_filter }, + { "test_halo_props", (void *)_cffi_f_test_halo_props, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 268), (void *)_cffi_d_test_halo_props }, + { "unconditional_mf", (void *)_cffi_f_unconditional_mf, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 154), (void *)_cffi_d_unconditional_mf }, + { "user_params_global", (void *)_cffi_var_user_params_global, _CFFI_OP(_CFFI_OP_GLOBAL_VAR_F, 5), (void *)0 }, +}; + +static const struct _cffi_field_s _cffi_fields[] = { + { "HII_EFF_FACTOR", offsetof(AstroParams, HII_EFF_FACTOR), + sizeof(((AstroParams *)0)->HII_EFF_FACTOR), + _CFFI_OP(_CFFI_OP_NOOP, 161) }, + { "F_STAR10", offsetof(AstroParams, F_STAR10), + sizeof(((AstroParams *)0)->F_STAR10), + _CFFI_OP(_CFFI_OP_NOOP, 161) }, + { "ALPHA_STAR", offsetof(AstroParams, ALPHA_STAR), + sizeof(((AstroParams *)0)->ALPHA_STAR), + _CFFI_OP(_CFFI_OP_NOOP, 161) }, + { "ALPHA_STAR_MINI", offsetof(AstroParams, ALPHA_STAR_MINI), + sizeof(((AstroParams *)0)->ALPHA_STAR_MINI), + _CFFI_OP(_CFFI_OP_NOOP, 161) }, + { "SIGMA_STAR", offsetof(AstroParams, SIGMA_STAR), + sizeof(((AstroParams *)0)->SIGMA_STAR), + _CFFI_OP(_CFFI_OP_NOOP, 161) }, + { "CORR_STAR", offsetof(AstroParams, CORR_STAR), + sizeof(((AstroParams *)0)->CORR_STAR), + _CFFI_OP(_CFFI_OP_NOOP, 161) }, + { "UPPER_STELLAR_TURNOVER_MASS", offsetof(AstroParams, UPPER_STELLAR_TURNOVER_MASS), + sizeof(((AstroParams *)0)->UPPER_STELLAR_TURNOVER_MASS), + _CFFI_OP(_CFFI_OP_NOOP, 1) }, + { "UPPER_STELLAR_TURNOVER_INDEX", offsetof(AstroParams, UPPER_STELLAR_TURNOVER_INDEX), + sizeof(((AstroParams *)0)->UPPER_STELLAR_TURNOVER_INDEX), + _CFFI_OP(_CFFI_OP_NOOP, 1) }, + { "F_STAR7_MINI", offsetof(AstroParams, F_STAR7_MINI), + sizeof(((AstroParams *)0)->F_STAR7_MINI), + _CFFI_OP(_CFFI_OP_NOOP, 161) }, + { "t_STAR", offsetof(AstroParams, t_STAR), + sizeof(((AstroParams *)0)->t_STAR), + _CFFI_OP(_CFFI_OP_NOOP, 161) }, + { "CORR_SFR", offsetof(AstroParams, CORR_SFR), + sizeof(((AstroParams *)0)->CORR_SFR), + _CFFI_OP(_CFFI_OP_NOOP, 161) }, + { "SIGMA_SFR_INDEX", offsetof(AstroParams, SIGMA_SFR_INDEX), + sizeof(((AstroParams *)0)->SIGMA_SFR_INDEX), + _CFFI_OP(_CFFI_OP_NOOP, 1) }, + { "SIGMA_SFR_LIM", offsetof(AstroParams, SIGMA_SFR_LIM), + sizeof(((AstroParams *)0)->SIGMA_SFR_LIM), + _CFFI_OP(_CFFI_OP_NOOP, 1) }, + { "L_X", offsetof(AstroParams, L_X), + sizeof(((AstroParams *)0)->L_X), + _CFFI_OP(_CFFI_OP_NOOP, 1) }, + { "L_X_MINI", offsetof(AstroParams, L_X_MINI), + sizeof(((AstroParams *)0)->L_X_MINI), + _CFFI_OP(_CFFI_OP_NOOP, 1) }, + { "SIGMA_LX", offsetof(AstroParams, SIGMA_LX), + sizeof(((AstroParams *)0)->SIGMA_LX), + _CFFI_OP(_CFFI_OP_NOOP, 1) }, + { "CORR_LX", offsetof(AstroParams, CORR_LX), + sizeof(((AstroParams *)0)->CORR_LX), + _CFFI_OP(_CFFI_OP_NOOP, 1) }, + { "F_ESC10", offsetof(AstroParams, F_ESC10), + sizeof(((AstroParams *)0)->F_ESC10), + _CFFI_OP(_CFFI_OP_NOOP, 161) }, + { "ALPHA_ESC", offsetof(AstroParams, ALPHA_ESC), + sizeof(((AstroParams *)0)->ALPHA_ESC), + _CFFI_OP(_CFFI_OP_NOOP, 161) }, + { "F_ESC7_MINI", offsetof(AstroParams, F_ESC7_MINI), + sizeof(((AstroParams *)0)->F_ESC7_MINI), + _CFFI_OP(_CFFI_OP_NOOP, 161) }, + { "M_TURN", offsetof(AstroParams, M_TURN), + sizeof(((AstroParams *)0)->M_TURN), + _CFFI_OP(_CFFI_OP_NOOP, 161) }, + { "R_BUBBLE_MAX", offsetof(AstroParams, R_BUBBLE_MAX), + sizeof(((AstroParams *)0)->R_BUBBLE_MAX), + _CFFI_OP(_CFFI_OP_NOOP, 161) }, + { "ION_Tvir_MIN", offsetof(AstroParams, ION_Tvir_MIN), + sizeof(((AstroParams *)0)->ION_Tvir_MIN), + _CFFI_OP(_CFFI_OP_NOOP, 161) }, + { "F_H2_SHIELD", offsetof(AstroParams, F_H2_SHIELD), + sizeof(((AstroParams *)0)->F_H2_SHIELD), + _CFFI_OP(_CFFI_OP_NOOP, 1) }, + { "NU_X_THRESH", offsetof(AstroParams, NU_X_THRESH), + sizeof(((AstroParams *)0)->NU_X_THRESH), + _CFFI_OP(_CFFI_OP_NOOP, 161) }, + { "X_RAY_SPEC_INDEX", offsetof(AstroParams, X_RAY_SPEC_INDEX), + sizeof(((AstroParams *)0)->X_RAY_SPEC_INDEX), + _CFFI_OP(_CFFI_OP_NOOP, 161) }, + { "X_RAY_Tvir_MIN", offsetof(AstroParams, X_RAY_Tvir_MIN), + sizeof(((AstroParams *)0)->X_RAY_Tvir_MIN), + _CFFI_OP(_CFFI_OP_NOOP, 161) }, + { "A_LW", offsetof(AstroParams, A_LW), + sizeof(((AstroParams *)0)->A_LW), + _CFFI_OP(_CFFI_OP_NOOP, 1) }, + { "BETA_LW", offsetof(AstroParams, BETA_LW), + sizeof(((AstroParams *)0)->BETA_LW), + _CFFI_OP(_CFFI_OP_NOOP, 1) }, + { "A_VCB", offsetof(AstroParams, A_VCB), + sizeof(((AstroParams *)0)->A_VCB), + _CFFI_OP(_CFFI_OP_NOOP, 1) }, + { "BETA_VCB", offsetof(AstroParams, BETA_VCB), + sizeof(((AstroParams *)0)->BETA_VCB), + _CFFI_OP(_CFFI_OP_NOOP, 1) }, + { "N_RSD_STEPS", offsetof(AstroParams, N_RSD_STEPS), + sizeof(((AstroParams *)0)->N_RSD_STEPS), + _CFFI_OP(_CFFI_OP_NOOP, 120) }, + { "brightness_temp", offsetof(BrightnessTemp, brightness_temp), + sizeof(((BrightnessTemp *)0)->brightness_temp), + _CFFI_OP(_CFFI_OP_NOOP, 174) }, + { "SIGMA_8", offsetof(CosmoParams, SIGMA_8), + sizeof(((CosmoParams *)0)->SIGMA_8), + _CFFI_OP(_CFFI_OP_NOOP, 161) }, + { "hlittle", offsetof(CosmoParams, hlittle), + sizeof(((CosmoParams *)0)->hlittle), + _CFFI_OP(_CFFI_OP_NOOP, 161) }, + { "OMm", offsetof(CosmoParams, OMm), + sizeof(((CosmoParams *)0)->OMm), + _CFFI_OP(_CFFI_OP_NOOP, 161) }, + { "OMl", offsetof(CosmoParams, OMl), + sizeof(((CosmoParams *)0)->OMl), + _CFFI_OP(_CFFI_OP_NOOP, 161) }, + { "OMb", offsetof(CosmoParams, OMb), + sizeof(((CosmoParams *)0)->OMb), + _CFFI_OP(_CFFI_OP_NOOP, 161) }, + { "POWER_INDEX", offsetof(CosmoParams, POWER_INDEX), + sizeof(((CosmoParams *)0)->POWER_INDEX), + _CFFI_OP(_CFFI_OP_NOOP, 161) }, + { "USE_HALO_FIELD", offsetof(FlagOptions, USE_HALO_FIELD), + sizeof(((FlagOptions *)0)->USE_HALO_FIELD), + _CFFI_OP(_CFFI_OP_NOOP, 65) }, + { "USE_MINI_HALOS", offsetof(FlagOptions, USE_MINI_HALOS), + sizeof(((FlagOptions *)0)->USE_MINI_HALOS), + _CFFI_OP(_CFFI_OP_NOOP, 65) }, + { "USE_CMB_HEATING", offsetof(FlagOptions, USE_CMB_HEATING), + sizeof(((FlagOptions *)0)->USE_CMB_HEATING), + _CFFI_OP(_CFFI_OP_NOOP, 65) }, + { "USE_LYA_HEATING", offsetof(FlagOptions, USE_LYA_HEATING), + sizeof(((FlagOptions *)0)->USE_LYA_HEATING), + _CFFI_OP(_CFFI_OP_NOOP, 65) }, + { "USE_MASS_DEPENDENT_ZETA", offsetof(FlagOptions, USE_MASS_DEPENDENT_ZETA), + sizeof(((FlagOptions *)0)->USE_MASS_DEPENDENT_ZETA), + _CFFI_OP(_CFFI_OP_NOOP, 65) }, + { "SUBCELL_RSD", offsetof(FlagOptions, SUBCELL_RSD), + sizeof(((FlagOptions *)0)->SUBCELL_RSD), + _CFFI_OP(_CFFI_OP_NOOP, 65) }, + { "APPLY_RSDS", offsetof(FlagOptions, APPLY_RSDS), + sizeof(((FlagOptions *)0)->APPLY_RSDS), + _CFFI_OP(_CFFI_OP_NOOP, 65) }, + { "INHOMO_RECO", offsetof(FlagOptions, INHOMO_RECO), + sizeof(((FlagOptions *)0)->INHOMO_RECO), + _CFFI_OP(_CFFI_OP_NOOP, 65) }, + { "USE_TS_FLUCT", offsetof(FlagOptions, USE_TS_FLUCT), + sizeof(((FlagOptions *)0)->USE_TS_FLUCT), + _CFFI_OP(_CFFI_OP_NOOP, 65) }, + { "M_MIN_in_Mass", offsetof(FlagOptions, M_MIN_in_Mass), + sizeof(((FlagOptions *)0)->M_MIN_in_Mass), + _CFFI_OP(_CFFI_OP_NOOP, 65) }, + { "FIX_VCB_AVG", offsetof(FlagOptions, FIX_VCB_AVG), + sizeof(((FlagOptions *)0)->FIX_VCB_AVG), + _CFFI_OP(_CFFI_OP_NOOP, 65) }, + { "HALO_STOCHASTICITY", offsetof(FlagOptions, HALO_STOCHASTICITY), + sizeof(((FlagOptions *)0)->HALO_STOCHASTICITY), + _CFFI_OP(_CFFI_OP_NOOP, 65) }, + { "USE_EXP_FILTER", offsetof(FlagOptions, USE_EXP_FILTER), + sizeof(((FlagOptions *)0)->USE_EXP_FILTER), + _CFFI_OP(_CFFI_OP_NOOP, 65) }, + { "FIXED_HALO_GRIDS", offsetof(FlagOptions, FIXED_HALO_GRIDS), + sizeof(((FlagOptions *)0)->FIXED_HALO_GRIDS), + _CFFI_OP(_CFFI_OP_NOOP, 65) }, + { "CELL_RECOMB", offsetof(FlagOptions, CELL_RECOMB), + sizeof(((FlagOptions *)0)->CELL_RECOMB), + _CFFI_OP(_CFFI_OP_NOOP, 65) }, + { "PHOTON_CONS_TYPE", offsetof(FlagOptions, PHOTON_CONS_TYPE), + sizeof(((FlagOptions *)0)->PHOTON_CONS_TYPE), + _CFFI_OP(_CFFI_OP_NOOP, 120) }, + { "USE_UPPER_STELLAR_TURNOVER", offsetof(FlagOptions, USE_UPPER_STELLAR_TURNOVER), + sizeof(((FlagOptions *)0)->USE_UPPER_STELLAR_TURNOVER), + _CFFI_OP(_CFFI_OP_NOOP, 65) }, + { "ALPHA_UVB", offsetof(GlobalParams, ALPHA_UVB), + sizeof(((GlobalParams *)0)->ALPHA_UVB), + _CFFI_OP(_CFFI_OP_NOOP, 161) }, + { "EVOLVE_DENSITY_LINEARLY", offsetof(GlobalParams, EVOLVE_DENSITY_LINEARLY), + sizeof(((GlobalParams *)0)->EVOLVE_DENSITY_LINEARLY), + _CFFI_OP(_CFFI_OP_NOOP, 120) }, + { "SMOOTH_EVOLVED_DENSITY_FIELD", offsetof(GlobalParams, SMOOTH_EVOLVED_DENSITY_FIELD), + sizeof(((GlobalParams *)0)->SMOOTH_EVOLVED_DENSITY_FIELD), + _CFFI_OP(_CFFI_OP_NOOP, 120) }, + { "R_smooth_density", offsetof(GlobalParams, R_smooth_density), + sizeof(((GlobalParams *)0)->R_smooth_density), + _CFFI_OP(_CFFI_OP_NOOP, 161) }, + { "HII_ROUND_ERR", offsetof(GlobalParams, HII_ROUND_ERR), + sizeof(((GlobalParams *)0)->HII_ROUND_ERR), + _CFFI_OP(_CFFI_OP_NOOP, 161) }, + { "FIND_BUBBLE_ALGORITHM", offsetof(GlobalParams, FIND_BUBBLE_ALGORITHM), + sizeof(((GlobalParams *)0)->FIND_BUBBLE_ALGORITHM), + _CFFI_OP(_CFFI_OP_NOOP, 120) }, + { "N_POISSON", offsetof(GlobalParams, N_POISSON), + sizeof(((GlobalParams *)0)->N_POISSON), + _CFFI_OP(_CFFI_OP_NOOP, 120) }, + { "T_USE_VELOCITIES", offsetof(GlobalParams, T_USE_VELOCITIES), + sizeof(((GlobalParams *)0)->T_USE_VELOCITIES), + _CFFI_OP(_CFFI_OP_NOOP, 120) }, + { "MAX_DVDR", offsetof(GlobalParams, MAX_DVDR), + sizeof(((GlobalParams *)0)->MAX_DVDR), + _CFFI_OP(_CFFI_OP_NOOP, 161) }, + { "DELTA_R_HII_FACTOR", offsetof(GlobalParams, DELTA_R_HII_FACTOR), + sizeof(((GlobalParams *)0)->DELTA_R_HII_FACTOR), + _CFFI_OP(_CFFI_OP_NOOP, 161) }, + { "DELTA_R_FACTOR", offsetof(GlobalParams, DELTA_R_FACTOR), + sizeof(((GlobalParams *)0)->DELTA_R_FACTOR), + _CFFI_OP(_CFFI_OP_NOOP, 161) }, + { "HII_FILTER", offsetof(GlobalParams, HII_FILTER), + sizeof(((GlobalParams *)0)->HII_FILTER), + _CFFI_OP(_CFFI_OP_NOOP, 120) }, + { "INITIAL_REDSHIFT", offsetof(GlobalParams, INITIAL_REDSHIFT), + sizeof(((GlobalParams *)0)->INITIAL_REDSHIFT), + _CFFI_OP(_CFFI_OP_NOOP, 161) }, + { "R_OVERLAP_FACTOR", offsetof(GlobalParams, R_OVERLAP_FACTOR), + sizeof(((GlobalParams *)0)->R_OVERLAP_FACTOR), + _CFFI_OP(_CFFI_OP_NOOP, 161) }, + { "DELTA_CRIT_MODE", offsetof(GlobalParams, DELTA_CRIT_MODE), + sizeof(((GlobalParams *)0)->DELTA_CRIT_MODE), + _CFFI_OP(_CFFI_OP_NOOP, 120) }, + { "HALO_FILTER", offsetof(GlobalParams, HALO_FILTER), + sizeof(((GlobalParams *)0)->HALO_FILTER), + _CFFI_OP(_CFFI_OP_NOOP, 120) }, + { "OPTIMIZE", offsetof(GlobalParams, OPTIMIZE), + sizeof(((GlobalParams *)0)->OPTIMIZE), + _CFFI_OP(_CFFI_OP_NOOP, 120) }, + { "OPTIMIZE_MIN_MASS", offsetof(GlobalParams, OPTIMIZE_MIN_MASS), + sizeof(((GlobalParams *)0)->OPTIMIZE_MIN_MASS), + _CFFI_OP(_CFFI_OP_NOOP, 161) }, + { "CRIT_DENS_TRANSITION", offsetof(GlobalParams, CRIT_DENS_TRANSITION), + sizeof(((GlobalParams *)0)->CRIT_DENS_TRANSITION), + _CFFI_OP(_CFFI_OP_NOOP, 161) }, + { "MIN_DENSITY_LOW_LIMIT", offsetof(GlobalParams, MIN_DENSITY_LOW_LIMIT), + sizeof(((GlobalParams *)0)->MIN_DENSITY_LOW_LIMIT), + _CFFI_OP(_CFFI_OP_NOOP, 161) }, + { "RecombPhotonCons", offsetof(GlobalParams, RecombPhotonCons), + sizeof(((GlobalParams *)0)->RecombPhotonCons), + _CFFI_OP(_CFFI_OP_NOOP, 120) }, + { "PhotonConsStart", offsetof(GlobalParams, PhotonConsStart), + sizeof(((GlobalParams *)0)->PhotonConsStart), + _CFFI_OP(_CFFI_OP_NOOP, 161) }, + { "PhotonConsEnd", offsetof(GlobalParams, PhotonConsEnd), + sizeof(((GlobalParams *)0)->PhotonConsEnd), + _CFFI_OP(_CFFI_OP_NOOP, 161) }, + { "PhotonConsAsymptoteTo", offsetof(GlobalParams, PhotonConsAsymptoteTo), + sizeof(((GlobalParams *)0)->PhotonConsAsymptoteTo), + _CFFI_OP(_CFFI_OP_NOOP, 161) }, + { "PhotonConsEndCalibz", offsetof(GlobalParams, PhotonConsEndCalibz), + sizeof(((GlobalParams *)0)->PhotonConsEndCalibz), + _CFFI_OP(_CFFI_OP_NOOP, 161) }, + { "PhotonConsSmoothing", offsetof(GlobalParams, PhotonConsSmoothing), + sizeof(((GlobalParams *)0)->PhotonConsSmoothing), + _CFFI_OP(_CFFI_OP_NOOP, 120) }, + { "HEAT_FILTER", offsetof(GlobalParams, HEAT_FILTER), + sizeof(((GlobalParams *)0)->HEAT_FILTER), + _CFFI_OP(_CFFI_OP_NOOP, 120) }, + { "CLUMPING_FACTOR", offsetof(GlobalParams, CLUMPING_FACTOR), + sizeof(((GlobalParams *)0)->CLUMPING_FACTOR), + _CFFI_OP(_CFFI_OP_NOOP, 1) }, + { "Z_HEAT_MAX", offsetof(GlobalParams, Z_HEAT_MAX), + sizeof(((GlobalParams *)0)->Z_HEAT_MAX), + _CFFI_OP(_CFFI_OP_NOOP, 161) }, + { "R_XLy_MAX", offsetof(GlobalParams, R_XLy_MAX), + sizeof(((GlobalParams *)0)->R_XLy_MAX), + _CFFI_OP(_CFFI_OP_NOOP, 161) }, + { "NUM_FILTER_STEPS_FOR_Ts", offsetof(GlobalParams, NUM_FILTER_STEPS_FOR_Ts), + sizeof(((GlobalParams *)0)->NUM_FILTER_STEPS_FOR_Ts), + _CFFI_OP(_CFFI_OP_NOOP, 120) }, + { "ZPRIME_STEP_FACTOR", offsetof(GlobalParams, ZPRIME_STEP_FACTOR), + sizeof(((GlobalParams *)0)->ZPRIME_STEP_FACTOR), + _CFFI_OP(_CFFI_OP_NOOP, 161) }, + { "TK_at_Z_HEAT_MAX", offsetof(GlobalParams, TK_at_Z_HEAT_MAX), + sizeof(((GlobalParams *)0)->TK_at_Z_HEAT_MAX), + _CFFI_OP(_CFFI_OP_NOOP, 1) }, + { "XION_at_Z_HEAT_MAX", offsetof(GlobalParams, XION_at_Z_HEAT_MAX), + sizeof(((GlobalParams *)0)->XION_at_Z_HEAT_MAX), + _CFFI_OP(_CFFI_OP_NOOP, 1) }, + { "Pop", offsetof(GlobalParams, Pop), + sizeof(((GlobalParams *)0)->Pop), + _CFFI_OP(_CFFI_OP_NOOP, 120) }, + { "Pop2_ion", offsetof(GlobalParams, Pop2_ion), + sizeof(((GlobalParams *)0)->Pop2_ion), + _CFFI_OP(_CFFI_OP_NOOP, 161) }, + { "Pop3_ion", offsetof(GlobalParams, Pop3_ion), + sizeof(((GlobalParams *)0)->Pop3_ion), + _CFFI_OP(_CFFI_OP_NOOP, 161) }, + { "NU_X_BAND_MAX", offsetof(GlobalParams, NU_X_BAND_MAX), + sizeof(((GlobalParams *)0)->NU_X_BAND_MAX), + _CFFI_OP(_CFFI_OP_NOOP, 161) }, + { "NU_X_MAX", offsetof(GlobalParams, NU_X_MAX), + sizeof(((GlobalParams *)0)->NU_X_MAX), + _CFFI_OP(_CFFI_OP_NOOP, 161) }, + { "NBINS_LF", offsetof(GlobalParams, NBINS_LF), + sizeof(((GlobalParams *)0)->NBINS_LF), + _CFFI_OP(_CFFI_OP_NOOP, 120) }, + { "P_CUTOFF", offsetof(GlobalParams, P_CUTOFF), + sizeof(((GlobalParams *)0)->P_CUTOFF), + _CFFI_OP(_CFFI_OP_NOOP, 120) }, + { "M_WDM", offsetof(GlobalParams, M_WDM), + sizeof(((GlobalParams *)0)->M_WDM), + _CFFI_OP(_CFFI_OP_NOOP, 161) }, + { "g_x", offsetof(GlobalParams, g_x), + sizeof(((GlobalParams *)0)->g_x), + _CFFI_OP(_CFFI_OP_NOOP, 161) }, + { "OMn", offsetof(GlobalParams, OMn), + sizeof(((GlobalParams *)0)->OMn), + _CFFI_OP(_CFFI_OP_NOOP, 161) }, + { "OMk", offsetof(GlobalParams, OMk), + sizeof(((GlobalParams *)0)->OMk), + _CFFI_OP(_CFFI_OP_NOOP, 161) }, + { "OMr", offsetof(GlobalParams, OMr), + sizeof(((GlobalParams *)0)->OMr), + _CFFI_OP(_CFFI_OP_NOOP, 161) }, + { "OMtot", offsetof(GlobalParams, OMtot), + sizeof(((GlobalParams *)0)->OMtot), + _CFFI_OP(_CFFI_OP_NOOP, 161) }, + { "Y_He", offsetof(GlobalParams, Y_He), + sizeof(((GlobalParams *)0)->Y_He), + _CFFI_OP(_CFFI_OP_NOOP, 161) }, + { "wl", offsetof(GlobalParams, wl), + sizeof(((GlobalParams *)0)->wl), + _CFFI_OP(_CFFI_OP_NOOP, 161) }, + { "SHETH_b", offsetof(GlobalParams, SHETH_b), + sizeof(((GlobalParams *)0)->SHETH_b), + _CFFI_OP(_CFFI_OP_NOOP, 161) }, + { "SHETH_c", offsetof(GlobalParams, SHETH_c), + sizeof(((GlobalParams *)0)->SHETH_c), + _CFFI_OP(_CFFI_OP_NOOP, 161) }, + { "Zreion_HeII", offsetof(GlobalParams, Zreion_HeII), + sizeof(((GlobalParams *)0)->Zreion_HeII), + _CFFI_OP(_CFFI_OP_NOOP, 1) }, + { "FILTER", offsetof(GlobalParams, FILTER), + sizeof(((GlobalParams *)0)->FILTER), + _CFFI_OP(_CFFI_OP_NOOP, 120) }, + { "external_table_path", offsetof(GlobalParams, external_table_path), + sizeof(((GlobalParams *)0)->external_table_path), + _CFFI_OP(_CFFI_OP_NOOP, 516) }, + { "wisdoms_path", offsetof(GlobalParams, wisdoms_path), + sizeof(((GlobalParams *)0)->wisdoms_path), + _CFFI_OP(_CFFI_OP_NOOP, 516) }, + { "R_BUBBLE_MIN", offsetof(GlobalParams, R_BUBBLE_MIN), + sizeof(((GlobalParams *)0)->R_BUBBLE_MIN), + _CFFI_OP(_CFFI_OP_NOOP, 161) }, + { "M_MIN_INTEGRAL", offsetof(GlobalParams, M_MIN_INTEGRAL), + sizeof(((GlobalParams *)0)->M_MIN_INTEGRAL), + _CFFI_OP(_CFFI_OP_NOOP, 161) }, + { "M_MAX_INTEGRAL", offsetof(GlobalParams, M_MAX_INTEGRAL), + sizeof(((GlobalParams *)0)->M_MAX_INTEGRAL), + _CFFI_OP(_CFFI_OP_NOOP, 161) }, + { "T_RE", offsetof(GlobalParams, T_RE), + sizeof(((GlobalParams *)0)->T_RE), + _CFFI_OP(_CFFI_OP_NOOP, 161) }, + { "VAVG", offsetof(GlobalParams, VAVG), + sizeof(((GlobalParams *)0)->VAVG), + _CFFI_OP(_CFFI_OP_NOOP, 161) }, + { "USE_ADIABATIC_FLUCTUATIONS", offsetof(GlobalParams, USE_ADIABATIC_FLUCTUATIONS), + sizeof(((GlobalParams *)0)->USE_ADIABATIC_FLUCTUATIONS), + _CFFI_OP(_CFFI_OP_NOOP, 65) }, + { "halo_mass", offsetof(HaloBox, halo_mass), + sizeof(((HaloBox *)0)->halo_mass), + _CFFI_OP(_CFFI_OP_NOOP, 174) }, + { "halo_stars", offsetof(HaloBox, halo_stars), + sizeof(((HaloBox *)0)->halo_stars), + _CFFI_OP(_CFFI_OP_NOOP, 174) }, + { "halo_stars_mini", offsetof(HaloBox, halo_stars_mini), + sizeof(((HaloBox *)0)->halo_stars_mini), + _CFFI_OP(_CFFI_OP_NOOP, 174) }, + { "count", offsetof(HaloBox, count), + sizeof(((HaloBox *)0)->count), + _CFFI_OP(_CFFI_OP_NOOP, 217) }, + { "n_ion", offsetof(HaloBox, n_ion), + sizeof(((HaloBox *)0)->n_ion), + _CFFI_OP(_CFFI_OP_NOOP, 174) }, + { "halo_sfr", offsetof(HaloBox, halo_sfr), + sizeof(((HaloBox *)0)->halo_sfr), + _CFFI_OP(_CFFI_OP_NOOP, 174) }, + { "halo_xray", offsetof(HaloBox, halo_xray), + sizeof(((HaloBox *)0)->halo_xray), + _CFFI_OP(_CFFI_OP_NOOP, 174) }, + { "halo_sfr_mini", offsetof(HaloBox, halo_sfr_mini), + sizeof(((HaloBox *)0)->halo_sfr_mini), + _CFFI_OP(_CFFI_OP_NOOP, 174) }, + { "whalo_sfr", offsetof(HaloBox, whalo_sfr), + sizeof(((HaloBox *)0)->whalo_sfr), + _CFFI_OP(_CFFI_OP_NOOP, 174) }, + { "log10_Mcrit_ACG_ave", offsetof(HaloBox, log10_Mcrit_ACG_ave), + sizeof(((HaloBox *)0)->log10_Mcrit_ACG_ave), + _CFFI_OP(_CFFI_OP_NOOP, 1) }, + { "log10_Mcrit_MCG_ave", offsetof(HaloBox, log10_Mcrit_MCG_ave), + sizeof(((HaloBox *)0)->log10_Mcrit_MCG_ave), + _CFFI_OP(_CFFI_OP_NOOP, 1) }, + { "n_halos", offsetof(HaloField, n_halos), + sizeof(((HaloField *)0)->n_halos), + _CFFI_OP(_CFFI_OP_NOOP, 317) }, + { "buffer_size", offsetof(HaloField, buffer_size), + sizeof(((HaloField *)0)->buffer_size), + _CFFI_OP(_CFFI_OP_NOOP, 317) }, + { "halo_masses", offsetof(HaloField, halo_masses), + sizeof(((HaloField *)0)->halo_masses), + _CFFI_OP(_CFFI_OP_NOOP, 174) }, + { "halo_coords", offsetof(HaloField, halo_coords), + sizeof(((HaloField *)0)->halo_coords), + _CFFI_OP(_CFFI_OP_NOOP, 217) }, + { "star_rng", offsetof(HaloField, star_rng), + sizeof(((HaloField *)0)->star_rng), + _CFFI_OP(_CFFI_OP_NOOP, 174) }, + { "sfr_rng", offsetof(HaloField, sfr_rng), + sizeof(((HaloField *)0)->sfr_rng), + _CFFI_OP(_CFFI_OP_NOOP, 174) }, + { "xray_rng", offsetof(HaloField, xray_rng), + sizeof(((HaloField *)0)->xray_rng), + _CFFI_OP(_CFFI_OP_NOOP, 174) }, + { "lowres_density", offsetof(InitialConditions, lowres_density), + sizeof(((InitialConditions *)0)->lowres_density), + _CFFI_OP(_CFFI_OP_NOOP, 174) }, + { "lowres_vx", offsetof(InitialConditions, lowres_vx), + sizeof(((InitialConditions *)0)->lowres_vx), + _CFFI_OP(_CFFI_OP_NOOP, 174) }, + { "lowres_vy", offsetof(InitialConditions, lowres_vy), + sizeof(((InitialConditions *)0)->lowres_vy), + _CFFI_OP(_CFFI_OP_NOOP, 174) }, + { "lowres_vz", offsetof(InitialConditions, lowres_vz), + sizeof(((InitialConditions *)0)->lowres_vz), + _CFFI_OP(_CFFI_OP_NOOP, 174) }, + { "lowres_vx_2LPT", offsetof(InitialConditions, lowres_vx_2LPT), + sizeof(((InitialConditions *)0)->lowres_vx_2LPT), + _CFFI_OP(_CFFI_OP_NOOP, 174) }, + { "lowres_vy_2LPT", offsetof(InitialConditions, lowres_vy_2LPT), + sizeof(((InitialConditions *)0)->lowres_vy_2LPT), + _CFFI_OP(_CFFI_OP_NOOP, 174) }, + { "lowres_vz_2LPT", offsetof(InitialConditions, lowres_vz_2LPT), + sizeof(((InitialConditions *)0)->lowres_vz_2LPT), + _CFFI_OP(_CFFI_OP_NOOP, 174) }, + { "hires_density", offsetof(InitialConditions, hires_density), + sizeof(((InitialConditions *)0)->hires_density), + _CFFI_OP(_CFFI_OP_NOOP, 174) }, + { "hires_vx", offsetof(InitialConditions, hires_vx), + sizeof(((InitialConditions *)0)->hires_vx), + _CFFI_OP(_CFFI_OP_NOOP, 174) }, + { "hires_vy", offsetof(InitialConditions, hires_vy), + sizeof(((InitialConditions *)0)->hires_vy), + _CFFI_OP(_CFFI_OP_NOOP, 174) }, + { "hires_vz", offsetof(InitialConditions, hires_vz), + sizeof(((InitialConditions *)0)->hires_vz), + _CFFI_OP(_CFFI_OP_NOOP, 174) }, + { "hires_vx_2LPT", offsetof(InitialConditions, hires_vx_2LPT), + sizeof(((InitialConditions *)0)->hires_vx_2LPT), + _CFFI_OP(_CFFI_OP_NOOP, 174) }, + { "hires_vy_2LPT", offsetof(InitialConditions, hires_vy_2LPT), + sizeof(((InitialConditions *)0)->hires_vy_2LPT), + _CFFI_OP(_CFFI_OP_NOOP, 174) }, + { "hires_vz_2LPT", offsetof(InitialConditions, hires_vz_2LPT), + sizeof(((InitialConditions *)0)->hires_vz_2LPT), + _CFFI_OP(_CFFI_OP_NOOP, 174) }, + { "lowres_vcb", offsetof(InitialConditions, lowres_vcb), + sizeof(((InitialConditions *)0)->lowres_vcb), + _CFFI_OP(_CFFI_OP_NOOP, 174) }, + { "mean_f_coll", offsetof(IonizedBox, mean_f_coll), + sizeof(((IonizedBox *)0)->mean_f_coll), + _CFFI_OP(_CFFI_OP_NOOP, 1) }, + { "mean_f_coll_MINI", offsetof(IonizedBox, mean_f_coll_MINI), + sizeof(((IonizedBox *)0)->mean_f_coll_MINI), + _CFFI_OP(_CFFI_OP_NOOP, 1) }, + { "log10_Mturnover_ave", offsetof(IonizedBox, log10_Mturnover_ave), + sizeof(((IonizedBox *)0)->log10_Mturnover_ave), + _CFFI_OP(_CFFI_OP_NOOP, 1) }, + { "log10_Mturnover_MINI_ave", offsetof(IonizedBox, log10_Mturnover_MINI_ave), + sizeof(((IonizedBox *)0)->log10_Mturnover_MINI_ave), + _CFFI_OP(_CFFI_OP_NOOP, 1) }, + { "xH_box", offsetof(IonizedBox, xH_box), + sizeof(((IonizedBox *)0)->xH_box), + _CFFI_OP(_CFFI_OP_NOOP, 174) }, + { "Gamma12_box", offsetof(IonizedBox, Gamma12_box), + sizeof(((IonizedBox *)0)->Gamma12_box), + _CFFI_OP(_CFFI_OP_NOOP, 174) }, + { "MFP_box", offsetof(IonizedBox, MFP_box), + sizeof(((IonizedBox *)0)->MFP_box), + _CFFI_OP(_CFFI_OP_NOOP, 174) }, + { "z_re_box", offsetof(IonizedBox, z_re_box), + sizeof(((IonizedBox *)0)->z_re_box), + _CFFI_OP(_CFFI_OP_NOOP, 174) }, + { "dNrec_box", offsetof(IonizedBox, dNrec_box), + sizeof(((IonizedBox *)0)->dNrec_box), + _CFFI_OP(_CFFI_OP_NOOP, 174) }, + { "temp_kinetic_all_gas", offsetof(IonizedBox, temp_kinetic_all_gas), + sizeof(((IonizedBox *)0)->temp_kinetic_all_gas), + _CFFI_OP(_CFFI_OP_NOOP, 174) }, + { "Fcoll", offsetof(IonizedBox, Fcoll), + sizeof(((IonizedBox *)0)->Fcoll), + _CFFI_OP(_CFFI_OP_NOOP, 174) }, + { "Fcoll_MINI", offsetof(IonizedBox, Fcoll_MINI), + sizeof(((IonizedBox *)0)->Fcoll_MINI), + _CFFI_OP(_CFFI_OP_NOOP, 174) }, + { "n_halos", offsetof(PerturbHaloField, n_halos), + sizeof(((PerturbHaloField *)0)->n_halos), + _CFFI_OP(_CFFI_OP_NOOP, 317) }, + { "buffer_size", offsetof(PerturbHaloField, buffer_size), + sizeof(((PerturbHaloField *)0)->buffer_size), + _CFFI_OP(_CFFI_OP_NOOP, 317) }, + { "halo_masses", offsetof(PerturbHaloField, halo_masses), + sizeof(((PerturbHaloField *)0)->halo_masses), + _CFFI_OP(_CFFI_OP_NOOP, 174) }, + { "halo_coords", offsetof(PerturbHaloField, halo_coords), + sizeof(((PerturbHaloField *)0)->halo_coords), + _CFFI_OP(_CFFI_OP_NOOP, 217) }, + { "star_rng", offsetof(PerturbHaloField, star_rng), + sizeof(((PerturbHaloField *)0)->star_rng), + _CFFI_OP(_CFFI_OP_NOOP, 174) }, + { "sfr_rng", offsetof(PerturbHaloField, sfr_rng), + sizeof(((PerturbHaloField *)0)->sfr_rng), + _CFFI_OP(_CFFI_OP_NOOP, 174) }, + { "xray_rng", offsetof(PerturbHaloField, xray_rng), + sizeof(((PerturbHaloField *)0)->xray_rng), + _CFFI_OP(_CFFI_OP_NOOP, 174) }, + { "density", offsetof(PerturbedField, density), + sizeof(((PerturbedField *)0)->density), + _CFFI_OP(_CFFI_OP_NOOP, 174) }, + { "velocity_x", offsetof(PerturbedField, velocity_x), + sizeof(((PerturbedField *)0)->velocity_x), + _CFFI_OP(_CFFI_OP_NOOP, 174) }, + { "velocity_y", offsetof(PerturbedField, velocity_y), + sizeof(((PerturbedField *)0)->velocity_y), + _CFFI_OP(_CFFI_OP_NOOP, 174) }, + { "velocity_z", offsetof(PerturbedField, velocity_z), + sizeof(((PerturbedField *)0)->velocity_z), + _CFFI_OP(_CFFI_OP_NOOP, 174) }, + { "Ts_box", offsetof(TsBox, Ts_box), + sizeof(((TsBox *)0)->Ts_box), + _CFFI_OP(_CFFI_OP_NOOP, 174) }, + { "x_e_box", offsetof(TsBox, x_e_box), + sizeof(((TsBox *)0)->x_e_box), + _CFFI_OP(_CFFI_OP_NOOP, 174) }, + { "Tk_box", offsetof(TsBox, Tk_box), + sizeof(((TsBox *)0)->Tk_box), + _CFFI_OP(_CFFI_OP_NOOP, 174) }, + { "J_21_LW_box", offsetof(TsBox, J_21_LW_box), + sizeof(((TsBox *)0)->J_21_LW_box), + _CFFI_OP(_CFFI_OP_NOOP, 174) }, + { "HII_DIM", offsetof(UserParams, HII_DIM), + sizeof(((UserParams *)0)->HII_DIM), + _CFFI_OP(_CFFI_OP_NOOP, 120) }, + { "DIM", offsetof(UserParams, DIM), + sizeof(((UserParams *)0)->DIM), + _CFFI_OP(_CFFI_OP_NOOP, 120) }, + { "BOX_LEN", offsetof(UserParams, BOX_LEN), + sizeof(((UserParams *)0)->BOX_LEN), + _CFFI_OP(_CFFI_OP_NOOP, 161) }, + { "NON_CUBIC_FACTOR", offsetof(UserParams, NON_CUBIC_FACTOR), + sizeof(((UserParams *)0)->NON_CUBIC_FACTOR), + _CFFI_OP(_CFFI_OP_NOOP, 161) }, + { "USE_FFTW_WISDOM", offsetof(UserParams, USE_FFTW_WISDOM), + sizeof(((UserParams *)0)->USE_FFTW_WISDOM), + _CFFI_OP(_CFFI_OP_NOOP, 65) }, + { "HMF", offsetof(UserParams, HMF), + sizeof(((UserParams *)0)->HMF), + _CFFI_OP(_CFFI_OP_NOOP, 120) }, + { "USE_RELATIVE_VELOCITIES", offsetof(UserParams, USE_RELATIVE_VELOCITIES), + sizeof(((UserParams *)0)->USE_RELATIVE_VELOCITIES), + _CFFI_OP(_CFFI_OP_NOOP, 120) }, + { "POWER_SPECTRUM", offsetof(UserParams, POWER_SPECTRUM), + sizeof(((UserParams *)0)->POWER_SPECTRUM), + _CFFI_OP(_CFFI_OP_NOOP, 120) }, + { "N_THREADS", offsetof(UserParams, N_THREADS), + sizeof(((UserParams *)0)->N_THREADS), + _CFFI_OP(_CFFI_OP_NOOP, 120) }, + { "PERTURB_ON_HIGH_RES", offsetof(UserParams, PERTURB_ON_HIGH_RES), + sizeof(((UserParams *)0)->PERTURB_ON_HIGH_RES), + _CFFI_OP(_CFFI_OP_NOOP, 65) }, + { "NO_RNG", offsetof(UserParams, NO_RNG), + sizeof(((UserParams *)0)->NO_RNG), + _CFFI_OP(_CFFI_OP_NOOP, 65) }, + { "USE_INTERPOLATION_TABLES", offsetof(UserParams, USE_INTERPOLATION_TABLES), + sizeof(((UserParams *)0)->USE_INTERPOLATION_TABLES), + _CFFI_OP(_CFFI_OP_NOOP, 65) }, + { "INTEGRATION_METHOD_ATOMIC", offsetof(UserParams, INTEGRATION_METHOD_ATOMIC), + sizeof(((UserParams *)0)->INTEGRATION_METHOD_ATOMIC), + _CFFI_OP(_CFFI_OP_NOOP, 120) }, + { "INTEGRATION_METHOD_MINI", offsetof(UserParams, INTEGRATION_METHOD_MINI), + sizeof(((UserParams *)0)->INTEGRATION_METHOD_MINI), + _CFFI_OP(_CFFI_OP_NOOP, 120) }, + { "USE_2LPT", offsetof(UserParams, USE_2LPT), + sizeof(((UserParams *)0)->USE_2LPT), + _CFFI_OP(_CFFI_OP_NOOP, 65) }, + { "MINIMIZE_MEMORY", offsetof(UserParams, MINIMIZE_MEMORY), + sizeof(((UserParams *)0)->MINIMIZE_MEMORY), + _CFFI_OP(_CFFI_OP_NOOP, 65) }, + { "KEEP_3D_VELOCITIES", offsetof(UserParams, KEEP_3D_VELOCITIES), + sizeof(((UserParams *)0)->KEEP_3D_VELOCITIES), + _CFFI_OP(_CFFI_OP_NOOP, 65) }, + { "SAMPLER_MIN_MASS", offsetof(UserParams, SAMPLER_MIN_MASS), + sizeof(((UserParams *)0)->SAMPLER_MIN_MASS), + _CFFI_OP(_CFFI_OP_NOOP, 161) }, + { "SAMPLER_BUFFER_FACTOR", offsetof(UserParams, SAMPLER_BUFFER_FACTOR), + sizeof(((UserParams *)0)->SAMPLER_BUFFER_FACTOR), + _CFFI_OP(_CFFI_OP_NOOP, 1) }, + { "MAXHALO_FACTOR", offsetof(UserParams, MAXHALO_FACTOR), + sizeof(((UserParams *)0)->MAXHALO_FACTOR), + _CFFI_OP(_CFFI_OP_NOOP, 161) }, + { "N_COND_INTERP", offsetof(UserParams, N_COND_INTERP), + sizeof(((UserParams *)0)->N_COND_INTERP), + _CFFI_OP(_CFFI_OP_NOOP, 120) }, + { "N_PROB_INTERP", offsetof(UserParams, N_PROB_INTERP), + sizeof(((UserParams *)0)->N_PROB_INTERP), + _CFFI_OP(_CFFI_OP_NOOP, 120) }, + { "MIN_LOGPROB", offsetof(UserParams, MIN_LOGPROB), + sizeof(((UserParams *)0)->MIN_LOGPROB), + _CFFI_OP(_CFFI_OP_NOOP, 1) }, + { "SAMPLE_METHOD", offsetof(UserParams, SAMPLE_METHOD), + sizeof(((UserParams *)0)->SAMPLE_METHOD), + _CFFI_OP(_CFFI_OP_NOOP, 120) }, + { "AVG_BELOW_SAMPLER", offsetof(UserParams, AVG_BELOW_SAMPLER), + sizeof(((UserParams *)0)->AVG_BELOW_SAMPLER), + _CFFI_OP(_CFFI_OP_NOOP, 65) }, + { "HALOMASS_CORRECTION", offsetof(UserParams, HALOMASS_CORRECTION), + sizeof(((UserParams *)0)->HALOMASS_CORRECTION), + _CFFI_OP(_CFFI_OP_NOOP, 1) }, + { "PARKINSON_G0", offsetof(UserParams, PARKINSON_G0), + sizeof(((UserParams *)0)->PARKINSON_G0), + _CFFI_OP(_CFFI_OP_NOOP, 1) }, + { "PARKINSON_y1", offsetof(UserParams, PARKINSON_y1), + sizeof(((UserParams *)0)->PARKINSON_y1), + _CFFI_OP(_CFFI_OP_NOOP, 1) }, + { "PARKINSON_y2", offsetof(UserParams, PARKINSON_y2), + sizeof(((UserParams *)0)->PARKINSON_y2), + _CFFI_OP(_CFFI_OP_NOOP, 1) }, + { "filtered_sfr", offsetof(XraySourceBox, filtered_sfr), + sizeof(((XraySourceBox *)0)->filtered_sfr), + _CFFI_OP(_CFFI_OP_NOOP, 174) }, + { "filtered_xray", offsetof(XraySourceBox, filtered_xray), + sizeof(((XraySourceBox *)0)->filtered_xray), + _CFFI_OP(_CFFI_OP_NOOP, 174) }, + { "filtered_sfr_mini", offsetof(XraySourceBox, filtered_sfr_mini), + sizeof(((XraySourceBox *)0)->filtered_sfr_mini), + _CFFI_OP(_CFFI_OP_NOOP, 174) }, + { "mean_log10_Mcrit_LW", offsetof(XraySourceBox, mean_log10_Mcrit_LW), + sizeof(((XraySourceBox *)0)->mean_log10_Mcrit_LW), + _CFFI_OP(_CFFI_OP_NOOP, 207) }, + { "mean_sfr", offsetof(XraySourceBox, mean_sfr), + sizeof(((XraySourceBox *)0)->mean_sfr), + _CFFI_OP(_CFFI_OP_NOOP, 207) }, + { "mean_sfr_mini", offsetof(XraySourceBox, mean_sfr_mini), + sizeof(((XraySourceBox *)0)->mean_sfr_mini), + _CFFI_OP(_CFFI_OP_NOOP, 207) }, +}; + +static const struct _cffi_struct_union_s _cffi_struct_unions[] = { + { "AstroParams", 502, _CFFI_F_CHECK_FIELDS, + sizeof(AstroParams), offsetof(struct _cffi_align__AstroParams, y), 0, 32 }, + { "BrightnessTemp", 503, _CFFI_F_CHECK_FIELDS, + sizeof(BrightnessTemp), offsetof(struct _cffi_align__BrightnessTemp, y), 32, 1 }, + { "CosmoParams", 504, _CFFI_F_CHECK_FIELDS, + sizeof(CosmoParams), offsetof(struct _cffi_align__CosmoParams, y), 33, 6 }, + { "FlagOptions", 505, _CFFI_F_CHECK_FIELDS, + sizeof(FlagOptions), offsetof(struct _cffi_align__FlagOptions, y), 39, 17 }, + { "GlobalParams", 506, _CFFI_F_CHECK_FIELDS, + sizeof(GlobalParams), offsetof(struct _cffi_align__GlobalParams, y), 56, 61 }, + { "HaloBox", 507, _CFFI_F_CHECK_FIELDS, + sizeof(HaloBox), offsetof(struct _cffi_align__HaloBox, y), 117, 11 }, + { "HaloField", 508, _CFFI_F_CHECK_FIELDS, + sizeof(HaloField), offsetof(struct _cffi_align__HaloField, y), 128, 7 }, + { "InitialConditions", 509, _CFFI_F_CHECK_FIELDS, + sizeof(InitialConditions), offsetof(struct _cffi_align__InitialConditions, y), 135, 15 }, + { "IonizedBox", 510, _CFFI_F_CHECK_FIELDS, + sizeof(IonizedBox), offsetof(struct _cffi_align__IonizedBox, y), 150, 12 }, + { "PerturbHaloField", 511, _CFFI_F_CHECK_FIELDS, + sizeof(PerturbHaloField), offsetof(struct _cffi_align__PerturbHaloField, y), 162, 7 }, + { "PerturbedField", 512, _CFFI_F_CHECK_FIELDS, + sizeof(PerturbedField), offsetof(struct _cffi_align__PerturbedField, y), 169, 4 }, + { "TsBox", 513, _CFFI_F_CHECK_FIELDS, + sizeof(TsBox), offsetof(struct _cffi_align__TsBox, y), 173, 4 }, + { "UserParams", 514, _CFFI_F_CHECK_FIELDS, + sizeof(UserParams), offsetof(struct _cffi_align__UserParams, y), 177, 29 }, + { "XraySourceBox", 515, _CFFI_F_CHECK_FIELDS, + sizeof(XraySourceBox), offsetof(struct _cffi_align__XraySourceBox, y), 206, 6 }, +}; + +static const struct _cffi_typename_s _cffi_typenames[] = { + { "AstroParams", 502 }, + { "BrightnessTemp", 503 }, + { "CosmoParams", 504 }, + { "FlagOptions", 505 }, + { "GlobalParams", 506 }, + { "HaloBox", 507 }, + { "HaloField", 508 }, + { "InitialConditions", 509 }, + { "IonizedBox", 510 }, + { "PerturbHaloField", 511 }, + { "PerturbedField", 512 }, + { "TsBox", 513 }, + { "UserParams", 514 }, + { "XraySourceBox", 515 }, +}; + +static const struct _cffi_type_context_s _cffi_type_context = { + _cffi_types, + _cffi_globals, + _cffi_fields, + _cffi_struct_unions, + NULL, /* no enums */ + _cffi_typenames, + 77, /* num_globals */ + 14, /* num_struct_unions */ + 0, /* num_enums */ + 14, /* num_typenames */ + NULL, /* no includes */ + 519, /* num_types */ + 0, /* flags */ +}; + +#ifdef __GNUC__ +# pragma GCC visibility push(default) /* for -fvisibility= */ +#endif + +#ifdef PYPY_VERSION +PyMODINIT_FUNC +_cffi_pypyinit_c_21cmfast(const void *p[]) +{ + p[0] = (const void *)0x2601; + p[1] = &_cffi_type_context; +#if PY_MAJOR_VERSION >= 3 + return NULL; +#endif +} +# ifdef _MSC_VER + PyMODINIT_FUNC +# if PY_MAJOR_VERSION >= 3 + PyInit_c_21cmfast(void) { return NULL; } +# else + initc_21cmfast(void) { } +# endif +# endif +#elif PY_MAJOR_VERSION >= 3 +PyMODINIT_FUNC +PyInit_c_21cmfast(void) +{ + return _cffi_init("py21cmfast.c_21cmfast", 0x2601, &_cffi_type_context); +} +#else +PyMODINIT_FUNC +initc_21cmfast(void) +{ + _cffi_init("py21cmfast.c_21cmfast", 0x2601, &_cffi_type_context); +} +#endif + +#ifdef __GNUC__ +# pragma GCC visibility pop +#endif diff --git a/py21cmfast/c_21cmfast.o b/py21cmfast/c_21cmfast.o new file mode 100644 index 0000000000000000000000000000000000000000..fa989aa195457ec2dc4fe4f2c8253ee9d4898ef2 GIT binary patch literal 430416 zcmeFa2Ut``_c*+Ev8~u3Q6nlwVg+Ntm?&kzML`Hfv93rFlonVF7*Ui3cf~?XRANlT zZt|+ZB$g12sHj+COEYRLNh~WGj6Ih4oipX`y?2|wZ{G6zf8lx9yLZl+nKNfjn=^Co z&hQKLuUVspg8$V}3{;rzc&S2BXs9dPc#&^jigt<@6zYOHc(+H{PDe`r>U$%$8W-1t zx9>edZMgSEZ73|o5AWgXd3_KiB21lo%~L&3?3e!OyydiqNokktgI=N z(dxMso};qUmTt8gQT~ za6phxIlUl2hI=06U(|$22`j}_)_)2Sw#tCI6G2j9~hso4TovWw;BXk?>J#J5&vK`DX-obcK(Bf-pc`pf&?^8|h;_3SLvuGn+bc$^hVdaqo)R?Rb!}HMS|vmClAy&Y-L5Eg zUjYm%BvNHo3lazgx^lx_Ey^u?CpAHx5D(-wOH?YBTL1%+t1HaOW1QHZ@Z($jIQn$+ z90{5{#+xg94iG2O_dS0SeUBvi&an!C6qC9z0+l*9LEkOQ2ip2l{4kdfHOhx4kq_x7 zl?+dm3_39&iY_P_BiM9Fbsk16wYcsKO{O+fGU_m(x!6b~;~J2RQ*JIm63N)h&}0In zbUUxqeF-pNS`3|XPhrZT0H%BonR58fryTKM%BRpOq*N_3=m1u3eTcJbbF!&bmnyYd zG}$KqM|(iFUZ?Y|wl&|BI;F0wc)stN+XDv}bSZwA&vz=$_joej_CBcuj7JHGq;%$1 zcp<5G_|#Hv8uoxERO*h)=Cu0BP35Tj%AT(Q9f~+_<@7(rYPy+W^Qo;q0N9nvo{c&7 zQ89Kd!{#$weFCtLDSHxcjBS6X)V;xQxP7QT4LB#2J=3pb=TEeEMo=1mjo(fQz#2`Ft`y_H*U8_rQ|_ymf8 zG0ETI$5FV40pV8~gwP&&_#qY4c#)tM(tdE#+f(_Bm&&IJJ`>&$E0p;SFj=ei!QAhrMYFDUeLNpFGBSuylVhnQP29=Wgq zLJFKSE;d%e%{+$2#l?gMZZVPpxo}V*wd`Uc@?v;Y`9S8xLJy1%ckL!dcVy_}3Blak zl4hHV@r+EA&|9I<_X8q-{R zi);9>8hTL;GadC5*J#RW6pCt?sp+dqoioEL0z8QYQ!+rY39y%#wz>?kUI2I-_Ug*D zq6Nk~03xgiFjO$@%Dq+}bn-jHZv`q>6TKbs~3qlJmE9O z)g8*mxCX-aaOV8b`_gs;4VOtrBCV}@p0B4U1SNIp<0ntz3O&lzGb>PsL`sc%Xu|7< zdVA8m{wXXkH{tz)_a&zLYj|G_@rDKe(+D~&#Se3R)RloiOJWKkuU#4FT(MOgjw;nLL zNLc^T_1*K(K`DNi>ysz)f1b|O4X(sL{y+4E`eGaq`eGDEI_onl%7GWC^huEIL2(j9 z)MgYph+zw|YUWTIDRm7Pk|3e7FmjBmL`kY@Qwjt>Xy&6@h*hwRpiBLrnU9ia%p6t& zg^Uazl*UZW+lc#!WHrQo&`i_8g{v7J`a#~Z8J;9){<9Viw#>CyRKtuGH!&@TS&j9g z8fN2_gfUB5jS^7}GoDIfi+EN8-JI2NNMeh=tVX%0hS@kIu|;iGqf%7EY#gtOM}C5M zhcee=q8etRVQE-<7%67u7K9V~kR_n6-=%AvNp<8pwUX z02l%a&V__{?=T2j8CGSF4whV_SvguEUSprs9SK|w5V@l<;Es+cw|=8CTryOnE835) zXgq$z;72@uB;rRZerWL{6F;Vf6zE)YfQfk3v8$e4=dvrRHhQ%(*LUF?eEHlJi{Q%! zSHyuYt6i}Yd|5_XfiLg6=FpNr*J*^`5IUvOF$J3Yy{cByIPuB@A)Z8^W7Xe%{Lr$$ zb4EW{PWwgEJw_H!E+O%x2_Av>g{FHA-sh0Tqj>+|8S}lXfdl9;nv#YsM0nq4n9{ye z5!0Dtid{=aD_F1unilBr71KrmqEaDK2%HBniAG3Ml;diH)yyyQ9j1ooeNDlKvYlDJ zp2i0j@_RJ<>-Egpy(}W{CI*>Su9E$fP=?02V?u)kZf6E`VXR|7wQ{;7ZP$dM@z~td zMuk}Ec{J!QZ4I(}8P2e}t?f$6VX5TmN|Geo*c8J`%F&eqO^abb!c=gE3jvbFaeY&L zvH~LszzhJ9{4Q@om`;jAy2Cu+nK(5DoKK(h^&*;MzQ1z_}~QLRZ*5!u7DUJ+enaB|ymYvIhCijgTeFt*Eb# zemHt8OJJcfRhZ@5aelslU_nue9}#T#N!ZPF51|WKcU-_y!KeF}KLNJb+?bR>qIX*v znoPA|GfJt;0T^&0kfyp#Ta{2Cq`Kid7k9)1n>CSEA*BKffN=n3sc!ozLwAtrab&-e z&VsZHr7jNEHZTkBM7CjT%Vg3l+Vg*ge_t@hI8zB=%l|E`RK_m(UIpe=k@<8&xU~R7 zQK=nfojuRCt4n!gi9gz9kfj0cFec^zF$y{z=qFzT-g%T)ou&JMa(>yn|6kjm{cqDF z*Z(jk_iQ(sslL>Th>`)L#j=ZjTF!s!k6{U^D8&!cdY$@XvfJiGQeXU&+5<1N2cDD| zw$J8d)x}CI0!HX4yXGPeo_H?<$^=`!1G|BtnYq}LO5I9;fz*znR>ib|1_kT@M6rOu zm}xoy0Z8Z?zZGXc2@=%<&Sy)fAguYx?$nmZ{G?Q`kXY0m73Ng!u=8c zsmL4^%I8xeuds(x&JLK601$i&X%QkkbTeQ~)er#kQo%t{u!Uh;KH|Xl{hNZd+#{z) zelPbw5#WB%*{}m923W&rO#y9xLWE%D!M>9Md!9}O5(dr>t+ze-m(wS|r~1(lB&HNU zO!`63E6lux!i4!OvcmbRzR*MxHi?NOJzOaxSn7B%S?tPB7J%SNceGs?nir`ocnP|r zUB$FjEd|PB-ZKm>M2aR?$Sn*l(hN-!LC|iU3_>CxhqIY;is-0|IJ7^)Q1x=ujOPim8|jKfac(5G{{Rq05JAN- zDnvtb5kJn53ZACGoG=6-`<`fB-SHnVrjbvrs~dbzWY&{X2(s?NEt8<4Y?)EzLj5oy zjM$iZ5VxD+8M5Dk2{o(hj$#xS@%zUxW)lB|CRhFW3~)lQ1w^D&fkOnl!b1g4Yn75;z*j9l*RM|Ab%hi|BM97K#6fDyxYqdl6M;Cjkg^ z$ppFMBGIa2aw2gf!6tqw-LyY>5=gVE&y6JdY^v{sv2nTS-UaVVEc+`P=XWZRU!ZWv zd!q0E<}Xyuu@h8tK{$YQX=`L}(VXyN9h60U2m~fFQx7gLl7of(kTN||?_#-`zLKdR_TV1Ei!JMK zJ?QW+K0nS+W73F&&k8y@iJ}c5vS1~9(T)M#Bm#wPNED5p&VqQ}3{9eF=ZGZ8S9vf~ z(!~-A3I=gr($>iN@l61O3W+oqA~`>fxsbhZK2X4*oaakxA|dtR6ZRj_A((`VN*v9x z5@M`bc#tOXfpPgd?TRzQpK9-mJGRnABO5PUT?Aw;JXhoGY|dO8E- ziBBS-U4RBn8?TMX#;YzEtOi&hd_qllUP;bRNj{Z(1^IJSFczf!Kk~eomFEvJhw6?q zZ0GSqx(czoT~4DA(_sSB?jFRG$E*p#G1&%p3%bFLUWf}u3p2rJfr4?82u2H;VBkH= zeP<-X3_=f{bv$pw5hwsYA=5}&Sq`n3 zVF^~nLWbT=oYe$kT%a+nEA#=2six3UZDM}}#{?-`Guy)-?f|Zj&dU82mDl>vQYn7O^O?EV;!$2BiM(3>q>>tml1ir*s3lL? ze#7QU=KtlZqzw$sT%bWx`IEXbF_ueeJZnQH&~i!jW@zRDEm=%G&(Nd-O{QOxwYUjP zLtiD8Xz)-sl)4)X775j~!ihYd3II`P3V8R4=)rd?T3mEV0muv$Jv{OVaJUsUC5Rr8 zR1_<05j@&OB4H%EZt?UM>4|NiO0wtmskG!)h#!%YufyJ;awdIF4v?rklC}i}1cHd{ zk+u6li|egtJn!{id!F4s)5ufpAo%=TBgt2aV)-1HME1PH|JeSp6Z0qLsw43yH>|3T z$e&0R2ErlHV;n=1x&k^?Vu#X~;l&dH=V3XORi)0IK~v44 zk}UzQ&za_UlB{AS%(L|kVmGDk2Zo@BE6>?Uk6h;gg!CdPIi%*?P`PPlh2I?o`Ff1S z`-l;Mr4$$e^fQUX&%`jRLq}@EyI9ySLw~~d%P{9D@jYIQIP@#K(XR;my_9GyZ;d&Y za?A4npW6Sm9sx-Ndp{EZbNL6sUdVvv4nvZp`&T0%2yMg?+ne=jrT`ETi18XRJPR5C zsc@b-v>^8qiQPv8()83C_fZgVfw-PzQ1-Z*{K-&*nXF=>St6_ML^|EEj7TgIQzFhN zw{SWEKH+gSB0NM^ZSL*1g>{ul&c>sh3h&#ozrRbK>$D)R&l7pI93Rz&Axhuh^Y{8$ z`CnN-Lx3tifBbj8>R-%P5q|z(c%Kk;>K0s>5VTs1zbxzXzahU*cO5Ndz$C6_4zt%T z#DMWg1Pmh{HPSGkn-nNt>G(4=cPW}Y)Z)8K=3Y&*(z(y9yUf4gLnOG`Fl9lz(d=9$ z!&d{an^|_Q8)oNjWME>hn=Ctr_Yj?6>Qk9F3t9mjxHUB+#5xoYKA|;S&)|o(l}v3{ zGnp`@b8PJ=y+hYAvi+pL_K(wj4;5D$PY%NJLF4y2TuzX$0i^QO5f)w!2%s14aCJ?;h?ekML8&~P=-a93^EPi-q0 zZ2canN?@kVnn;O`Uf-K@ zJ5=xY!{vqo29XKDT_#N>fh<9R)&~pJ`w_MHI9c_;`u>*e#7+m8{cgPnI3_uXTQq8c|qmg z5rOmkwo}pe3So~d(WeNTlh7x7JLL~~af&hzpOy#amGeS)bCEKSoMW69QKBx$sFbRQ zVK>nb+}R8)$gX5s!d2uD%9b@1De<^VCL+-AK&l^^*=&OLQ6kg@5hd_|{2lnGQzx3! zdlXeVr*OJ42YR+NuP06O>aBqXRAlEM@f;&?x#~Mv`BluzTmpTXo%&aTODZs?iHt!TDy(3(4C`sTG6SJ_&PujLtQUi7|IX zGY%QY*u;UcW0Q-v40g^Fz{_@FtaQFLFa)xM4 z3IWz7jwZtU^U4&H$DHj{jn}LSk#75LG7&wBef_uf}m6vGv zatJvfau#fIrH$q?w=kLG@8bd$KxrqXlplhcg4JkA~T4v4}E|) zAzxyXmAY=M&}5g(jd&K1%IQBh|7W2~7|K6A|5Z>QCtf;p`M84z3L(t@RLT4&%I4ny z*p}v>JpW_OLfD^w!7d`dO{$HOprV5Hta9)flLNtzpvB8BC#530W675&byHyap3R?> z17FadDE0j;$pbkLA+jlk>~iB}i9A@e|0JgI18Wcj)&OimB?dO0V(YI>7uj+!3Csyl zo!2JAe1k0~I?pwgtf#Hb)OH)HmDPd!E3m8<=ZP2~Yx1d$GB+Ff&CpBj_v75lTq6;_ zB@*GYDMk8tWIIGS+}~Be*-UMmnGk?6$lk7E5uIH*X!Ue*Kzf16*A^Ntz@!$!h_(yz za1=^RqaraIJU_(>P4;2C5#Oar^#Bd21$i$wJ^&PuOA;>#%PEWT!gdlblu#d{hMj7^ za?Vv4`YWQ zqFfS0OnC*kQY>)g05&2-vgqEnu@bd`Ih*3DcDDUMu~=ATv4>JmpMMnM93XSOl*}2F zOlhn$xR_1HHP5LlmXW}WuM?vc$MkPyiA#HxIo>|rIbhvjGw@tJzRV`AynP6qy2lTTuYKsF^taTJQZ zHR>gDCJSG(TAGC}UJ7Cdd4Me!JnWwbVU;`xBhPc01TXVr(^P<2Stb=YF*9Z4DD_Fi z4vM1$LB&yhwF#!)kMZ8iP^L{Wu~Lt(78g^X)S)GmPytcDX<=+e1^PCa15q$4aZE}b zraH}nktuzF3X|nr$&4g7uS#7ZD-mqO_NvxQ?kw0#Th2_WS3z4$|4gxBvAQ4#Y#&)6 z(G>x;8RD4HvCvH4?8l46Ov#k`!yxS@-ZFo%jTM^wL9sC(b<0-#v)*_UP^^!?#25)< zwB;|JR-ck>=@Up)hFqT}{nPq{`hDnGCTTpmC!&ug9ZCiVhn+?a{a0U z6p+`ySd7O?NIbTl#$$NK72+}Y*v{P+9FcMX>zo0T3o!r>tn}iDMVktWwqEAA-nOM^ zdK)%6%hhx~fa;qpWPouIKL*JnPEMhO9FZR@;hw~Y>RTb34V&?Np$+PkJf0VlTJ(8a zqR$fFUL>IjY>_PPROTO{TFAp3rM|J57Cw~Azs-O}e;CnCDKzs4SO?=bOQTKkl*s99 z)~87eGvrhhSAjLOzW$ZOC1&w&XK0P-ArS^@X5{dV&D2ugRMRJdwIO zc!3Qmy#?>T;q5@86@K!;q&G6XG;U@3vfocH=;LxIGxK+ISfR<^>5ajrb}g@e3XKO@ z`MH$Solk*DAVWP{G(18#YOM9Ok$0ePKyoP5!W)#FyJKzQhy05MRParA%8f z%oMGZr0OvNv=yRlNtpSIEp4@>4_U3u!c340qMxETSE>uR-j@00{FPKcxh`aWWSSYU zKJOC~$=6>J9We8UiKs-@bpYc`dFmlwMSCw6WTUg0CSsCRLWM-g&8WD9DkI5>Nfvn) zqusOA9;_FrSi$tpBsL3JiQo@y#|C)X3aAi8yONk&siXElmU<)hfajr=`qpMtILN6e zv271*hvZ*^7LXA_Zis3+7ywA+`GJ9_>rNN~U35wf0q(9mTb>l`9QOc9y)X1@k}~Fx zd$2;2{ude_KeFSGi;R(ol)!jOLy|FKF6yR3%-?ZQIbpBeYdmTl(Ae*ig zWG7cT5m<)Ef$HVClr9v-su2;b*0ysW`ytgL6NkS>ffknTX!gkObU=w9zqV|PQa=|P zu>#evJ13Y%8O4}iNFin z79|JJ*jLkz|Ad+@k*n!GGX?pa zl?b9~TTSWte{o_<>hLWr#PiAfNK83CrQXYoiXL(*Y^&)Llr|fVKh}cdC%+dWUZj;` zBNCIfo|Oo^u%)KL&a!ERNmUeIxh_#rJIhRMKTG-3s`hfdM8PygkpDIPH2zEa`HEfr z%%=KhMSqKozlikH0Va$*&S`PITS(TsdR*_)*E@<-6e8&oyj_nU#rRPq3asH_PVE$! zL%p!>ToftTSkb;3P@B)_ znFulup_Dk*&%ay?=-Ho#xWFl5pqGn9Ui&t&&MeyAfp|73RO&T=0Bk8Tk%1{G2^lFs zMv;t+97+b_GBT9<7TANrftAgB(eY;irh1F>upJr~<8$SxuNp9hZ^RH&h*_<@#ain!ej5l@rA`;gFrL*j#S zA50$E%KMX)isPvqK0W){PJr18jXc-0-!DgCh7p2tDKmaxC7c=AIXK1#gtS0|*$Qnv zpc9zzI~lN~uKy$>M_sT8g$$ylc{mC`!vzeSaO8mnWJ1&h^KjbL1zS)z)CEL|NmI6r zdgLgQr07i-6S(jK-pKP5tpJK~$o{-YXj&NTBlr~y(_qz1t-j`08)_ z4fFoD{r=m2gMIkhe*bO1%~rc4B>UgB-}S+Kk=IjLjK7OW{9Q=nZ#|$|kH6VjV*YlD zb7ZZXT}Ye?{8p0q7m(ohjHVw|>Q>X`uqYHoH!9Cz# zlsILOG;UVvhhkHb2(dpa;ZX>gEWU2DqR-g9&3Nor8Zn82NQ_XV6ysYb%!A0+fMUc2 zAUy^69yTSRcs45$C`Nod;^bw`{vd))dg=4fq6Fh23>hT!=H<{anKF!Kfw2~s4YHkz z{P$>_jv=ftroA{g$(TN4CBm5OhP*Seh9uP}1f^6W@Wm!TPq}Q}Kz_N~6qshdDkenR z1MfHy5Er$EMkI03Fo=JiYW*%>KGi@ulZ1(xyG&ivG+PPjo$oBu@bD5)Agm8yJkq?K zGT)K5i}NM$Hx_P8ZDSrcjFs@A?Ld56B4KcZh|t&uu&m1!4cPfJEoK4xIl7Xn6as?? z=`OUTjh&7#T2Urasb7kXNJK@?O88K+ZRQg7BLSUQAlb43vI1n;5LghDLw;cIuUITp zgLGKR0~x<30HU9k@}c(93QEHx`29TNJkkop`X^0SzHnGhi=M-dm zG&=ycTdihiumu}ZozhE|$=Q05m>G7wiV1W%ElvFpoNe#W%(^QKV#QbvHXn44WTKMlVj$FPFP46Jw<-To|)8t3MwgaLzcaw+zUlp@QM?$DT%3z zU?tqt*-r#@q~wTfuEZ-^-1@plJcwV8AT?)6CV`*eMfY)-m2g7rSpE+XU9y>OZuyVN z{4zOzWZHvL>Lbml@Rw6z+wwb6Do&dGkMvvx`k#|BDhjCoF>Q&SU`ZR|LP%z$oRtW? zu)WvvHlSPbKak~}hd~awA)?+HdPrq~4?>B9gca%mB&^U_K*9>`8l?bEciBo>k(5L@ET<*l%e@m*4EDW60G7iYpJi*OvUakh0h z4&HdzDm=T;Xt3|+{D`zz>>v4V{M;%W;yyg-W7Us>pUtIktNzin|Nk%TUnq4$P&xVe zD$9!~`hGFoP38%7HH=TbhQisoEfl^3aEu|3J204jo%ShjwWdgCd$enMP9q#YP(CIu$H=OHE*@olc3 z5WyzBtD!sFE?b#*gW}lmqe|U)Al5>R-x$XrSr=yZF{vp8QuO%>z5_(m2VWsgm)^u` zod$umc^*K%>l8pe6p9k#rQP)k;u?EEJ<)oF#|7*8p_Bf!T}hbo6zdgYGMu5VjdP6l z_nIDo*?@Vn5j-e$Os=*|JOh!dR1+_h`pwvsLq|e zR1xa|2?fg~v?}x4(lsICvbj_0YnaiCixr8|BEC&>c3=(3j=dzhQ0nraJCNT>2|ZK# z!-_6PAS9D@Yf{KK86nv@RWC6AWpRCCC;D)L=A<5%ZW5suvy~p%4|=i%2MjJ3^#xom ziWshPJQJ6_9w%je-Bf_mIi|2BT^9jWIf(3iA#d1RcuKUoH*^+yt+-55}@TJBl z@L}^gI1*kO>2Y}-&KEu3CcIA2Vt!Ch@`E`vKe!(1;A+oo-3MjG2q?mjLg^;GQnv`# z3I*9Ygx|7XRfjXhi(ZBCiFYw$DLy@P4EU$vr!`M1d$vWA0sjL}z=z*wBUsk`;yyHt zc^(w2$fROozlb=fVoBf=J7M$OjLpGnGQW5PO`F6dl(7=QBw*V(Q#Z*_$%{ZPiJxKD z&%qj2J)b}hcs&U@9Vs~|;)F^VnSPKNIo;*tAihm<9@3~ml*mL`;|53|2iBqza=yim zBu5sM`VwqP!X0w{Q0$1g99!j1A|=O)4;9FP9GQfiSV|7+xxgLre5!;TB_mLnEX23T zouAMz!=sR#9~Jb`*S^@DI5jEBrjjjl%#_NlHp!RoCkp_9%D1ow36%+~M4*y85vv<7 z{Row!?R&`*RUk)dw3YefeYCO@4fx(0DLJQDi9in8RAib}a?nN)=@$}m)DQ<5F2Gs{ zC6bxsCtGl6Qo{_-63p|2QhcTWt->=IYVFJN^$^eV6M9ITbO?;()8u!PAYKy5*pxmX z?r6-}%w_I1v5Dq(KgR;o5(;sv+FDIyA=sAa83NFvj|D7SegUnGaUL=IIwTC{&fo3F=$Q{XuGpX2MIk|{$GfqYX zoAlnMenGSZ_JS&w=q2y$0xlO)M^55;#1-iM$jnWV^S7wHe0_!ZHt9tKoAi?BR|>M( zsatOYeI{*O9hg~otdYyuK63g>kEQb#p+x|v!aXs?QPa0A<;4)97llw9?j9vL)6H*L@Df26nC82I=qts0yuJL84$JS%?`E-yUc$1Y>exhjO zD5kV=m=*L-7RIa8sR)3olz7Zo+vcnoT7j1^)&Vr;_k1k8--I23wIzmoh%+NQ88Ep# zT`rwmeEWz=VJhVVR@2Fi2U>&yCz56@h+!B3m=73h+Z?d|IjBiS62RXV_+$&#@-rAA zPC`oq5t1ido{-axD#K-vA~f(%Sn_y_8@WN-@MMb`KD>uh&cKg|h}O2O6Ulqil)4Xr z-RMW;=N|Eu0?fZy+|R^5ZCOA=^?jJf*GMZWVDmhS^N8a#{sas#osYDLhodr9Kx0?Zp}u*1BK-V>rT8Q$hXf~?&@v59 z%0YOZIyfJzThJbx`C$l4MRTvrH&8E(62<$pcb)}tjmc0?lpmID zHYoG6XuHTCVUC_6KaBXsNT_RTJ%j_dIZrG<%!4clyF#FfbPkwYLCQnK4TXD)Yx;1TNbrd5Zinwr!IfmLKL(9gs8L z5;@Y;x-$PR4IL0yU|$-s=Hfl*@vKBJC5Ug69P(_iBo7mBiJWp7cV<&^5LY1Q7qWO2 zlXIMv2;?BXO>&lF4O6fI+)0&@qtq>c9?cWzUNBjRE07azMvjl19K^4J7wzScN|x9C zh+~uFYusmway3{9_O3(VLLOv=C@gH&|G~UYxFUSjumZlNBVzfzv}%9LE6nsZ$(e|3y8IJh&^`hZ&Da8?w$VAjf=IzQb(mzYX*% zJ}N-+zXfFLhFAF&P79n@_9VM^a9UuEGgMLPR^WgNWLr5d05Wam;Qq2Qub3dpvnqVe zMR0WZZeKVw0BdH_3ty-2z^c*&>}%qDiMA)nzVdwr;=j<93-PZh(0Lswv$bB|mm^r{Z!VuX=dW( z=-ZIFiUir6gNJ z=p8Sn*9fa{DZM|i5+PHA_%`W91e^5gph=?fqPz%Jg5STYfWX5GzQPHWMX0zu`@!~T zaBhVS8k9TmIc{*lgDuYVn%9OD?{I+&H7ieZ=>Ik(G76>pG)mn$D3q3EoG~7RLfd%? zPw3f*Crqs7RggC1kTr{MICUSI7)yFmf!(s~H_%WULrL01hXN;h|3owB-Po4bH(6Il zEU&}MN&e%Rr;lg*n#b}Ylh>*=gSP#na! zHS797N{-0SqoA0PBlq(c%>olRuWUw6HRff-=P>LEp?(M5+7fLOp{kjYBR$Hc%%4KZ zL0o}5X{>V-cgD-fL42Fs=|Ra69W_M-kdwp1jv^j4)}!Pgu0YN&W;<=iE$y_~BxgC+ zkQ_Djv_MW$u`DPnqCpwr3gnReIEj<+k&}b?HpwBshhaM4A`4^$QHFgN!XeHb@;vB% zvJozjvlqTeeSq2X2U850z?Q=BqVLcs?y_teZ}Nx-6UeH22zY4 zK%F?z^BRAGoqOi^9#v+49@+_L?Wy8>#5FdDdN$*Gq|oNvvG`uJ@e=}7#FEXQt4Mb+FD)Sqo!4aQ7g4FOwW-kp@-v_K{Y?Ib;k0)Y_m4_iUNryLHzSE6pG>>i|1FcYg;p7n0+4^Wx*VvSVxfb_}a6*hTtiy4_jSYy{TC9)LV>|-&NxbCi9&RNDE{6@YCcdlqOc2b^oaO2iRy_p zXtt4(!B2h9#|~*ukpt)f-gyX^w@85;k)#qS8a=J6%?_kTwq&O>6?7O_Ss2C!!8GTc z1Zqs9c}D=?p;3g<1Gre5ohP(#ta~h}wTR4Efv{+h5I2=kg$L1ji1i{}Enqc)vStDS zGRk;91{V!t%CNR|%J!h4Ha+ItvkrEIFR;X+iI-4IDI$NVlnD`EQ=!zI##nhCJ&|lW zZzUfg9y+2@#(ui=8V#|Vibv>N;PHUgP4&eswEQu?vBefkm0I{oX2<0of&L2|a+;y(n}a{ArH^p7D; zS>V5~0Fcs;we8c7Sk~#kd><8wY}x_+Jm$8@e+aha^m~fwUq+g;KtH6y1?w)Be5`Gs ze#Ej)e`nG*Nvn%uaEtUq04}HBOHBWndz?&}{Ol0`QvPFY`}8B0b^0el+r)DO82Il) z!i#hL4CkM)xO*PD!Y5mh3vj9&-=mAh^a8qm3uz z=?Ut;F9rOygQo-C}%x6{w$_dhj`|JZo= zxU5}arGkI|?s?9Y^9s6beUJXW?HB z)=n)B-$cuWdYSZIUe6xfk+ths|LNM3oFZ@Ed`4Mp0Im-%P;)Z!k^55KgV)%G1()|CuYCw^~2}W z>a`y0$BO>cV0vt~pE`_w`~eX7b7bwB@#ksVtIyiO9WrbWf07OUYTcvPr*4pYx-fbf<9S<6`iCwO>B|8iJi=?+qEel?TNqpY6nYStg02YG*tPC34CUzh5cTrbV&kn07nC+p9w zT^U^7j~t)d4Sux;=l0Pq9b8|=+FOZxs`ES9Y4qaXUAi0UjNm0t$*nI52zWz#4MEciq9C13puSS7x6a7~Qq%&<>cJS1__E zGJIwvN2esmBxn;-lA{teNpXtwSgj^1F*QC4%F{KeQE5?0>57D8z=%prNRQRT42ucU zXrm@1#+nx=Bu|M=0~`VtR|@kFRcX>w6BCkS#l^#*S50`j1<2q)F`oZKZCa2fCN@zU zW!83B!o;Mgpnzm;Y+7nKl0_d{dHAGqkY* zR5XQwr7L{WQles_qtdmSbZuHjv{o}QF=YZwZd78Tf?`v?VpGmq&C`9I&fLw1$6GXrkld5;PN{q9?~D z$0&jo-oVO~5NZs}h)qx8ApgXuiNUEjJ?V?j7pkuJ8^FQG1gc(N`+kVF81K7|1s` zG?XGmgoOCvSHHkO?@MCh>K;ZP9d9Tujr0t*F5RlcqG-2uhzd)5@SOoMEt`7AJQ!64hq241kVIh9Lj3!zj)brK^j!^jcg_End zX1K2py~8iVG@)TCWKu|&VuB#YAY&lNbZDN6_{0RQ7KAN0C<*=(;eUc6BzOeylCmHo z%ul2C4h+`#6VmV&4Gt*-R>Q4tkdH>~=N%p}Y%sYG9O*5&$2rlc{6hSI|HHyHqkvXL zScH!+OdZmv@D2$H9Esn;6amB3!9l^AP(R;b(o}eu#y>E^H(Wt%yCxx76BL!6uJ8|t z#DQwOhYwZ|+QPzvebwG!;Q_t@;Uh^+zsL}vI56A~WGo=kPesQtI5Yryp*=$%K|XMl zC^#IZ!8dqVm?nH=h##qJS%8>eMhy2+_ze#Z9PS5$8Wsi}Y61g>`FV#1j#PvN1qX+# zH59B8;FNWt8qmfRZ9I6qmI8GlG&o|IO5+!bWWpCF9o~V1gF^$t)j=?YA;AG* zVZp-`;TmN2aKFG{7-WE7m;yvdqZ+OXRjB*|VfsTg*dEg>Uc_z*gmMGsEOG~-pCga zZv{dHE(MPOP6Y)(m$A_)NfS)=R^&&*v{7kV(Y+toI1{3GdR9`ZHbom7o+2s*9O8YZ zips(WtHGu!P&BzH_y$GSAQ;wE^+>tLn0l+|TCLtQ% zi11*4e~|nUsvyNgO(tk&GW<`5|1|gqE+|H7)Uk>;6BS`>8~Yl@<$~0Wcr{ z#EK|EY$iBGj3Y?^2*IN^UhyXN;`GL3nZOuPbqg34?l(BpI}mR{)`Z(|5FW*FR6dkQ zZ&iS|4`{zHRn-Xi3d*D)o-YZtm3&Kwc@=-vBthT`0bfS4R*_6NLeb*VD1Ih5ZdNlc zjUvA(U?iqY^l;Y%Va%xU_8qK=ni8uJmj$u1WDQnHS4_x=i;GRuq=Tg4VLzDVfkE0ZhpfzD*UrWomi_hJ*{ZS<>67 z^fo?0+%Ua^cG(vgKY>q~%D=EiIb7Vx6xJlCDSCn;DK;uu6Q{vYSNxs2FUBs2osv^v zWgupuP)6)L@-`EM3nGNUQAtTr9=*u-Apa0@ovKNTC0}BOrNu^*Z|Gh$lVLrfO^C+G zPBSqIOg15dMwr;Hf*Zjpv56_sU=2aQg|AtruTvEe;bP~R8ro0zWO6$M0wVByF1o{nA3`3(H;e%31ts06dhOU|s0b#1(AcZ=}pNMa$Um!><7;}+jrT!0n zZg^+}+F-rzYP=73PcM+1$k0|u)>55k})O(^qxs3D;t!Pp>RgnNUdN1sNth!h2Z zo(8Kl-r>PP0lt5@fsiSKO;1(6XF_( zd&eLMxP>M^#5;5-7%^bLV0Y0?mWTMJmx6}r5YB_XnQn;Mi$PTNi71OT9AX|)hL!mj zips+5QK-*ajmdQgs~oZ_74BllZTcHANech8SkclWC^ji2ElaWv3Xe}qc{5!RmXZ{! zjR)5vUK>G>o(7RV3B%c{MiHe=NdgrB0|Tq;i5hJ@XnTALtW%;>AQF%v$0R^NuZT&< zH8YZ2XIPkl2Bt*gS{9ZV z&^ET40nxdvd&oC{LFSnRvqdfO8Z0v*!jmsedAfjLT3hTTrx0c-qGBdxz+xvYHYPnj zAx@jFiAw>h2<1q)0>VA8?_{;(p>fv;9xN#(CIbRiupr>lz>PrS1Je&v<@Fi4Pkjjj zOqPliu2{Jq%L7?!h#l7uz-(JWxM#;YgnM?ZL%4q+OZ>2HLjlgVQG|Q8T_oJI?IGcw zZ668uY+FgV=lLdHz>c5rTLs&e6z*|+$bxcK!qQv{7k1AtIBvw_aJ;&}KNL&;kROQW z%t|q>)*k*@I*pYo6v!tzUT1*f(*oi@p72i&8DRnYL%t$jB)dnv4ESg1 zIaaDrAioh0+rU4>%YlEE=7Vqa59a~#GTA-i&4z!TPh`akoFBv+&k7I^59hLUAS+cU zaNgwot^}yOUqnJY7giJ7`w0G7dXkj_-!0&6w19{6kL~5Kn%LfE_;1VZ`32Y~98h`b6>d&U73%*V zvk!a}+$=bH^AdXH<@_p#qfNy>YjC#xWEIZ&X01NkuCiL6+iz?2x&N^S=ksa}&iB=< z!Pz#MRr|$$!Y?oBI!I^;vaO{S~}~Uitj-tD~%a z&R=VAK7Ii%Yu}818JyEE)R*<|Y-jv!?7&^*a9RJZcHnMy;N9eK8U5~ZxUB!)cHo|J zxUBtG?ZCb4z*Ta%j6Ss;c%U74h#W4XKU@x%^&e>mKE@7wyd8Lq94@0T-VQvK!MUEu z;~(CfAm6g_J*|Hi>hlI!zdAX6vi9|M=qt1XFOs(}>wlRXF6+P84t%v8_!QkwokjG!NwkdDH&;7~E z1^&w7*QN~78r)e9XXlD}`z~_0tbI2-aCbX!PdjigJ8-oe&Kb$dL+rpASjY=Gz9wye z^NnBS@c(uGnvG93e|%j0DwnTk9-pjz7diiBa5p>f|G50f=rfZa89dSs{p0PxB)?Me70QMAmBG#CR|Yq;4>Gum9s2%XpItAiZc>jX`&g3QZ%FFrH?)rFzw0W0-^7=CRIDduqW$-q3 z>buy1yE3>n`a0Qxzi0=}?T0n`yV;@7-A?-+cHq72z8O6%I?9^Au z;j;Nt+kuBLxV8Qx?bIK`;GAr^{EfHMe!LxcsvS5Vt~L5ub-~Zc^`F<{S2>*Pmo>Pv zoW7&HM|yR!19!6nXJCa>4(Bj0kzSpq*@1Vo11GPHkhUqlbLwNKev%!yw;i~~4!nmQ z_;YsPAB#t0{@p3WcKz1Z%9e>T^SG16@F6X6A+>!7C+1069Rv^1|dLS#1-M0Q!Rv^1|`chUPyLHO2 zJ%63~Mw6@_zinO2zgfOIy<@qYmv-hGUA#bcwI?6;ghy##QzU}&U6Pp-6x5kCN2E>?zQ}59ctHC2lTu>>`M3BFZy*0Z|T^(j$`kf zcSCx&XzvygQZg|7_U|RXZ_v+jbX>KvV*b3@*Z)|4IxgkCpee0Bt$sYP`siifxFbKl zHz57DGq+y5xOKzB#tQ>_MRvNV>33t@ii*c?-QQ9EN5t@suR32?QR9B|?1RgdGd34% z&ae3LPM7dsBVN}hxpzERyUA;F*S3y38kX$w(HiFk-|zJ8@4Bhsm}QNQH19X_v!q{p zedBd>!hQF5T4cuk+~wwHS&5IQr6qr{;_SH8dCkXO{e1n=Iakk~?{~4V-jUWb2OU}8 zdRg~I3tQBh@xcXM&5je+)_XL5QL~+4)gQKe?35WlIqzKaUUOd`=vA)SIIdcKvFwM{ zS=-!>Hu`qqiZ7#Gv&JlpD!Aj4wd_KTANt;TBY)%O^If`j*qXJ@ZA5>^*K5oOC|o$P zb*hJNqm+>o{ofvNZvMGe8(XE99he`#rRdE~r&_~Jhl{W0sl6(`3ah2YEHn?Twur%cgZ$|B33xBUS2~KR3B} zaB*?evXJ$Ci?9E%Wc*v_#+S5L{Dvmw_P4=}7Qa5^ABi>A2RHukc(Z{|4xj%( zb>!%CXL1kxak*~r!2VBm7p4|1Ti&lC?(JKXayNGG|K!B^=hu~O__kieAN}f9b-4J( z^%{3JuBhQTxZ=(Hwciy-3^AO~2>JS8tIH!hZmKgf|6+89y3O{*<<>j0aKeQzBKj|X z>BsY51odCuVehW=+dst&a4l2ciFbx4?K8h_s5Hu-nlj)WX|l%{z+%}yPwQbO;+`uxupGf@4oNTp|RtELiJ!(!}}-H2^U-3pILa~VD}3>)xj5s zmt6T!zkO-ws4x1IO|0MHRJq2#)7-XtkGs+)x#P;S!}^^Z5byfZ z>bbMVsQR6pP`YdJv5-&o`3namxQ%R7y=&vW9_sOKBYpZ^T+@5}nV^0b--?fPTa)-j z+1yVr-b_=EpBq`DdYAL&2FRKctKZt_}sRICuizEC@dfKY3o)c?Hu1~KQrP|ROXNGe7))Y&%I97JF0vs zf6~^^+kMz>_>qa~1?9Io9{l6$6RU1K>ZH9@L*2%=!dqQ-{My#{PDJ0h?fLVdCWd=W zf3NM@#qoF77MmM52JhPTv46$62{X4%tKm4?H27lMN z-|FRO^wZO#9~=m{{m%O*f`_eLex}i&=U)3f>$dZN=~u24wYc1U#r8hTieSOgn zOQ*-Tez4-B+rHminZ5S(`b%$n_gfIy`ny-t-1fV?=+wXNeZN~P&vodz^!aag{jur6 z%C<(Q>#Cjc6Ce0}xo_s^8wFExYfnG^TcP^xW~V=1zH;#Gq$XPe?xgj7bK%XmzEM{B z4(hgidZecHPpcpJE$_HyZ1b1Pv;4a^?(bRTT-#yHsqyObC-Ot)M;;C-oj?0{#;eOV z{Zg}Z{@`O7ub%M#=)7C&HM3T=_jrEiouVt+&eJDPKI}R1^MbM~nc?;Bf9i1jyJ2(t zY))&F)5|$>%kUo64FkXG-@tG9*p=7rv>fO1=_H4hpGEa(GvlS#li$49X51+^k1Xf6 zUeaHfQ@N&k$bru)*L>Fe{Z^Zo%u=T)zvw+|NRQ4-rk^hU?uEOdO&feQbnJr;Uwr$0 z$sb=HUQ>9wB&uuE+r5EBAEuaIZm56)6rk?yhpksq8eSqdxS)^5IuzZFjga z%lmxIkdgYyDGk5)=FI&GA35Cr#d*TG2A<{3oNGL2UE}B0H4gsLuviYTM9;a80`LS=L z;l_x_$a>0KIh$Sxxf>eN!E383r}z4k&XI{9%`S^Swk|&1zY9M$U|!tHpy9 z+L|*`qlQPGY&-7vufLtMcf_d+i-ISOIS}Ez;U7(AK7S>;#t+Ny^txA4o&Sk;&*0Vj zei$|4#_W;JK61~jx1l8QMBUXDL+-q_Zr6l|`#)Q#oOQy2lf7(3b zhTp9VelwJ5Z{K}w+wQlfpU60xbmN6DgMV4~+>LcxzZ$>oh;FCSX~w#qr4Hk?n!4Ypc(Nf>wKGf6}tuktgptWnLI?;k8e$lwR(U8N6p|)RraZ_nvKB zFgDX~&c#FH>K+={X84cW^({;4M^89eoqH%jIVkp?%8;);-{ObBpT7*d@Ke~yFJJQh zVb?h)*H4%JSTk+;uy2zGRu0>Ir>^&N0~$Sd>1D&=J2h6RRy=tja?qezT|cW?{n_(r zt-t-m*nHQa^suY-mOgBfIbvd()1}{LovAthOQ*Dg?>fgD>)kF}wsGap2g_q?I33*F z`s|~~><$^{quU?f|K7P6P21sJu6$8gclRr=H9Yrxue#-*el@$1_FnzZzL=?c&iC*O zlZQw$ZUkp}0n02O|b7*as zgWuF${MF2NbI<-X`scHaJb-y1k*}JJM<+8)QI@1e$ z2Hj3@Zr-OLuIFmQnie$%7R8Jl`bM|W2|q{8x$@+5AJw9awyEz)y%{GtP{^8d|kuTh3cYL+-82d61C%QOz?CTizKu z@quI9?c#Qg(ym_|I(&B>@4M$dnYFWahkNBmUDPw$J>KwbP_g^YQAN8#nr=Tn-g!v# zJ>&gy5|`fmWZcS6GMe9UxZ1JN=w{vz^Os~cZ=1WN=gbe@Ie+HuD_@R$xbVX2cZ*a{ z>RunZb>UEWKfCX#B`p>XU9EaD`_vLer`t1cML1~pbnji*HSudNr`}~Q?|oJCRrY;VVZbgg$q4$cQ#$=Pzo%acIfmRgvYlhgSV~$R)+sr_JNQqD@~Ye_wg1 z$zrbu(|y|cX#1~R^huKg^K^&ry))tFjW=R?O+UQK;ds-C8-upAzm^iX#VzN#pB8#o z6}8#%&DrnA7d6}Q&5AZ_mma$Nu3Haf^2$j=Yu)e*AJV;c`SGcdmHI2Q8`YWCx_3;M zB;}*$8dtpRyif5_&FWjB(N`}vIbJdJ)fEFBi+}Hb`_z5E+!pPdhQ5|MeChe(K?R1g zYm2joM0<3dSKNC=+w7Sm6KbzF%q0Kke1xd#_T*if>wOEZb;Yapi*^Azz=b zXx!!bWtpc|e%yNI`!}nnm(D!AW#P7}59hysz536!K4E|Uw0u{;i-R5qR;9lb`sj3Z z?FKK59X{?_sO$Yh)m?Nkc|LJ&cPq9uTliLsUSSTM#*S{E71I8*)$4VUq+9W-aW$t6rlC+L3E~)~*@S>SlU`N8-lsYY*sCyYZIs z{o6Y>d2vOS$MZv4{bNN)-<4k9?Jb<&@pc{6X@_iw<4t^e`ZT!gs(oqYSlzhX_e)`otLfT&}H3L!?GifH)*!(`bIVE_oDaw$)oS>?z4CQ!M>$=4?9h} zu-3Iw;}^1h-Kndsep8(;$$S58mz}42r0w3Av~KkFTGw~CN**_>tmBs7@+Qw)kRF=R z@XeG-@w>kpG-SuHngh?iccXLRZ=QF;_PpG>V*luYD~mpU|8|Y%m0s$mYqjczFRaxn zqK4FSUK{ST$Ni#@N9!4x^*w5|dbvgB@fw%D){XS`sBVAwwHE0Oqh45Dw7~JwiV1yV zUjHO>$(T2Er5l&JFL?d%ActF?d$sGL<2vn4^e&3nJ99x+%Lhf@`y5#MZeq}+`K#7y z9`!wSuZb#XXs?iOZx=k7G;q>`z31M@pBJ~L>jv+UfJ`{iC!(5MO?Z)%jH+6Cx@== zuias8`5tvIzHvNu;mH1@Rvp@K z{>t1Q>9eAJv?FIdS8vLi!14E%8N$z>obRdcdZ%BjnqP*_7_jtmXqTz|4L^HrSoGpX zm)Fm{;r#7SRr_Bb)2Y$b;Sb(uF``axX~gt4SrJZ$hOF6iH{CEdF`@C%gKr;v6nL^u z&9W`iTUYL?*?s1@gby@x&wu}LZ~f$4`10GW0>?9tr}y>gdhavGQ~rZmuSmKHU*hn~ z@9?D-fKFeX*zxnE=^j4s&s{L%(C(gp<_*%-Zya;9-QnFo&hg#rzOmQd4Q_*8ZCxwG z1o@0!QXX<@^DYB>0-5&PTxw0qtGhtb<7KE7Fe zrt*vSchWK%F8QGK*3_O4F8El4@r6Z%T_gZ_70l*{;@SV!OX?I_LboR6(-{=O;x}tQ*ZghNN z{p>c!+c}mtJ3i52Y~Gax*@>U17A?5G=zhii2W872L@vMga=WTqAKuzLu=?`A?=L^z z4aJwUHyj;(Jv#erPiX(k7uvO}n$z{S#UazixzCRoI-+>!s+I+{%g%K8^~noeJN#Mg znEK+>uM~&-H2QAAz^sUEm04d!mm0V3zV&5ZQr?Vv2VO26eB{>Gzik@5{lZUsS{=Ev z%l4?gy}{2j=Y>ma z%W}R`c@6)3g2RBhOIL2Waj2bt$mfrD>~3~_@3<)a;;JRfe{0+S_;1Fs|A?R6TXp)> z=&;WhKHoZ~-_9}LulvpK(7ydIW$$j56t&!4RdspE#7jTyh;RG*=CJ9JD_j0LyZx!u zooCji{HhvyrS;ksnVpUT{MX0_&5)x(!wJwD+Jf#x!}@>yKrJqckZ4JEx!QxAZ80eyY3-#mQ^i)lKU)JA2z_KQ2zp%h-0x zW67$J$zE>lBI5e=tc+Oyb>deq21i|=bm-l}y5k5xvLF(ZRnTbHtF{0?Cw_@e!Khmek-SD zzY**7@}jmy^?yI;K4$aYuUf45bkY0w500x?cjUzC0VDrEzRoecvL5XB?X*+dQ)_D5 z#?-cL+s4$kZQHhOb85RiyU%;xkLP??|NL?%JJ;3SwUVs+PO{NFrw4hs3Opc)uLuuJ z8y`pUBb57XFRCF6AA2`)p7mt8xcb%=lko0|n4A>iQS%lU?JqvM5XI#p<9mc7{*qy# zZ3q>BAjiTX@f!*95tdjsT=s4wEXVb%{=ieP*g@1#hZWUv@uQ8z^RDSxmb(kkIEAmG zp!&SMNTsz2C!Ds69MEa)tWLZ>Tz^};)9K28KIFWqa4cn<9*b$au8=IHoF0p7yQ#1& zZI{V1fUB7i$k3dsAx&mHA3TKdaA5D}(VU{o9{=^YIP&XpVAuzJr^wxaObT_v^{vj= zy3Cb9;9ff2J_W6lQJP)Tf~MWqeX2NpoF=y(vFIi4p?aWZ?Ov)YiHK?a-igS+`4oGm z$RTH4A*_h00@6lYFf{V3&KprHkk~eV!xx}SBeLT8lXK%h0DuW&rY2mH@) z_tjG8_PHn}-2F|ABxG(YuusXe5-GWHXwA>I_g1Yt1aB-^w-|3K#JpLu7B7$Y)Gm0i zr_G=4ZYrF+5dg5gFMNR;8qGOma8VTn&)?n~@w~YIms*%3E69-x`k!lteTp$nImMRI zgul5rTqb!_D`uva3C$Z=CTa>)zw*b`{x)uP>8l;GBiu-fJ;w)L{Wj)Gtpo_CWITfN zd6Z_$fSul-{$*U@;DmV3|jn>(tEO{m+me-(}`A<>9 zmbPH&$)NRSQTcPLfqK+IJQ^a7!(-yG-2|`{=g9E`MRG8jIufTTE?$;mxN!`dK$eyd_6*6FI;w2*Fc_{U3#Y3x|dO<+0{(m z2UvH$5cmhJAZ7`$YXHQ~jvYb(WaKbE3;`1^IDNv5EA{}Lz`h+qKRXch4v-@#cn%Re z5Y8a*i76c4uq_Zr63C1lCJ+Y}9<9_6kUB1uuYaQ$PkO~JZtD1$+<9Kku^uWn<-&+D zJV(1U4yk8n>p4UloXow7t^zQ-LM%%Wns}rBzozFR1txl@+k~BzwO_jCCZVOnh(*f5 zT#r0)bHzjO5Xr*3iuxdX{5r_@hIZb$wp{?#LC{mwsy;m7J7 zyzi&tEjG_a9CQ?GY&IA$LII@pgA`Itz+QzOO%T=U$6pPFRL}&e*gi#LVH0VER0nj` zQt-4mMu6YVQAuj_f>pQPZfISE?zc%trmKSe5+z#`p2yGOrZ5wuKiPoZbU!f_S1P zVW_TGx1FaA&%w(UgkLRZ0YpBczVA2$w<-h&@LmfyK8Rd8kAhBuujI75KRxfRY=mOG z$I^S>j*0<)%=UXiGyGM0_AT5Y!AF7%_BU#ef0sFYm$?vp8@2}XJhS@LOY=N2UGLuL zCj>Ql54~iOtNG8m<)=EJ)(6U$f0)BePyTWh;S}@>)3Id?-aFc|1kOtL@VL9qAlyk` zkPkdoCgjqg?sKp}%vuk<-cSxab|&O14!h1M0-ip_yv`uFo-33DL|}{{J0!_9c8tW> zf5_NjgK9@5kY3vtKK`JU`G>z?Q??+tQz(`Rs@=Ab+FW${OCH@XncT@pDxq&YuFWi^ z^0G{gLZFkpOijR1W|4}3aZ!RSDqg99<~8e65O~pgnzzv+y((+fOb0{ z8YvVjnB1h_9CoxfltMx62Zsbh;_uvg6a_UX4hiPO!n}G-1vMHrkVH%6&-^=su@`zz zZ%uaIsA+UGH)-Dxc%s z2Z-({+DB@qbCg_^g6@eXM&iS+%1QS!(QF6j%#HTr`dCbF=d(>?=j*K90U7N zbNo07-s8v|Zh(;$f;_XFz61Ph8!ejUc8X%4t!Z#xdZwE?i>Bk#>H%V;eUgaKCMbk| z>mn;>KIJ~!tH9_)a(i+FUdKZG(>dkjcb!B z7JWfn_CRaG(~ZG>w8qE+@pCMk`yq(7d&Pm7u?6{EZ4WVohgKMmLhaWHKu zlCA>SWVG`B%%K20qvL9x_+8TNb@^#s?DaBZ-*IVjGO7W8^%E0HVE5L%2*Rfbd_1;a z-*ir3cR7X*8zsu__5uNswxaOmNa%)SV}k{n-VKNQ zfKn`{dmuoX6BY8LhuhLJ~` znFvYr;&K!D#t0MVd=}E;qoWfwka1&?u%H;?J*gpEcyn=w4i>^-^PxxvSMcG484V350UYn`dMBNo$pQt3sm7*YJ z0KUNkXev_OHO<08MV7KBc*J7-AS-iuae2|3yt2;8CPERi=V1}AsZ3+0S2Id;Y4rxA z0O#A-q$CZnMUUrx6mWYI*&;aaX|uK6Mf}yq#2P6=&oON;@8>@_;GSM(5p%DAs|}OC z99hKAIBAd4278Co)^5*pZP~735u*8pugkpJVWe8-l}Wxwn*kZroDXwi(p+!J+Lz`E zjlM0tcc1b<_An{g6Ald=gVJgHeEdkV-B`_Uk2h2z6MlRx3~6GUPPCj_8>U@?{KE)2 zebUcmZ4(aNI6=d=uh&=!)eXn0Hg$xoQhV_QKtl;Am0rVQqzV%SHL!)UwZlGY+2IvZob(4yIae8EgOY7>0Ycl|zS_ZWt_hN9n!FitzVPA`z z_-lGs9G+sdAz>$e?zb^9o&9Wn|6_KrWTzoprcuw5u#}_5TrGH8rlH+MW#1R6TBOmE zo*YEIVb|=rc{>7`+@;xU8;~4Al5qs(iPsuU)vYPHP|{euaSKBEuWL#E`sNk2&BE$5 zY{op&E{8`Gal>!y2b?0BBu8R?fx^#ZN2rP~m~i0v-P4Ect3gS3r9QOp*HccUqco(Y z7fdf&=dQBwpU2QI8fP2)@0G8Vu{UbR*8S~|XB(L>%z4YLA=ef{)ebVd%c0uGS)ww+ zDzIw6SU{lJy^)g}I1#DP6XlmRQ0Wbv8~Tr*aLqBDv~zT)d~5ikEn4H`*kwb*)sZ7})PE>EbmyqP~_iw`o|k4QHOh`%!ub#Efm#W2&5k zx^U;#k+x{uCUT4A*HzD!oR9KTybUcr7o7=|TGsHNgf6~Pyj3mkDn6FzCVq^YE3W2m zjHV@K=E<#gri@7|jJtcnR;_4X!sOO@w0g{Nh5C&z@2ri%{|+-npduM5?9_O$e^?{1 zK3FBRSQLKH18L2TUXpFi1v9rMONzY{V;;h6fg49D-z^ymi!#s97=c}NQ*JxhQTr7a z59n`cnp;i^H1cQNnXu()k8Ot>;EJawod=&cB~>K-HBQ0^E=_0E-&Y!B)3{gUo?aW% zwCGuwTyiy+S?s1OVMn_z_#ZEJ5-%0?BGl@gR#Fnrw)v(|B*iMvV&l?;7)|VtZ_R$j zw}WqI>f6Hu;$@PTv1uM-e$mM4+mJZ(UT1k4>x2)J3M{&f&r6HyvWYfYIBrTWTZ8ve zJuFVWzxS@-X{_Eq%(=Txb*d0B30FX058P3y1Dqo0p0PR!u;00_pl;1Lrc1+*c89?*^dRKFUuuZMt3*;@}i+>g(t>~DguP2vIz zl?AC5UOytA`fh{)BRvhxe-BPV~@o^s1jcp9MN>$47KX7{sM- zwP#b4P8kc8_7~d~tBg;J#+jFKn0;T3YD6nf*0T_g8c0W&VXLWh;}R!pm{1OIC7G2D zh~7OypB~U~w_4mQs#=h@T0~8^{9P6d?FrpT2kpkSRKG`Ds0-A}TD>@8|G7hUabTsw z^gCabr7ODjOCoas4f8=VK8f;@KeGLzR@0>!qh z+?1mhrhcDyB>cNA7?Oxrf=|;nnzO^-IJkMzxB`i8sdvbtOe;R*7Z-w=%0Woa3h0lx#wkZ~_e|*iYv{|A_ zWr2GvHftTe=~SdOe7!IDx|inV3<5&5Yk3~EEy$yp&~jI#{lH)y5Sb`?Zn18+-i5P< z*Jv4~aXSu(MYg^SXbaMK(Ov1=Jc&c+Rs41g<2=V$?^=73S?-E!-WK4*-a%?y%yiz# zXXAXkh2Oj`op9FdTCI`yp|v&;H_*%6jtf^ZS6p&kw8aS}-H!{Y`U+gM|B0sDPf5f> zFh*o1v&4J6WF}|%@t&->k&mJpCT?H3>_W0sQF$kB6RY5T>oUfF3iF@#RpF)qd=yF7 zlp@5f%6H#-c?@XX{sM#srt@yjf9Aba)F;$W#H&@O`gp@*cF#wtpVGa(^u_z zBG=7cy^%Q_JxC3PY<+bWKxEi&wUeuYzlIF^xK#Tv^Df^k7=$3CwVl9fxoxgQZEwpM z@3cgBe&~8=J2@MuokEpY8y|~2=^g>?%lLr#V3wV6j)d4gJh_BDS8WXJPxJj?6V>J8 z4UHKF=*nN#3{E(ZSAv6Oaf})G-^8I;^#ytdd;j!n^0#b@+xQt)CBD30{9MLir=68+C-5V`ioC|t(jIG@hwX-juTYyA~Yj-9&-RUB_~Fc zt+2G)kwGl9fdlS5wPvq@9<)od!sVOmLEs$^pi5~O?nkq3RIvUOItYB@@3ws;?L({D zAQ$ER#lE}cF3@bN6GFS;T&#%-yc>6Iw;kUW#QA8r(#L(CknKZZ-MEM)@CNJicH(M((SW_aWk^NGE?w za%49o(4BhDk@OYZiGf4Zr|EYIrJ$1?fd`63UwlN-p(lcNO({jP)8evUcKWklLQdlk zcP2Atx@>r`w_itj(3_2BS)zIssbN;6J-%=yvi=XaL+kt% zLN=jAb+BiULQb;+X>yQ4ghrX(t~P9y+?i~uZ@6<>uh(8+ zbQfEh$@IgaMzFT1kbI^HeSR@(=xkw2v?0u)vOfgaJky7U&D;s7WDz(|nOWRb=lQFg z4sTD?RXuB1s~ddq6bkcK3h!9T@>7Zy7iyt{--zZn7@v3mTy+? z@1oLQEMMJ35hs5jxNXTnsk*k7-9yPu0`c-w!nw%cSi>GR8S0T(Xw=am za|J*%s*Td!!@>+*%Eb@9h|*_S+Y&=!wD-o28JvF*4kd+Sw95^wND&TMqtQEuht|S} zVzes_tXL5aDVypZJHu`TV7k5{gk*EFy>E=}9@Bt_XSd@}d0*PqJhv|Q?j85Tq47Sk ztG;hsA>7=ld%oX7pdEmSLOi$1nz>!!K;rO5mp;2`4o;6D+|&>9`tO@~_-0?szJ@~Z z(tgnHk_NGSp&^M^!3D>SYYUOJuGuk1$Ccx(a*0RV{#5;)ephxi58R# z(ODAu>nR|@#0%3|lKAU6Ai`*b>ZSl}5&$w|%TR&tU@8STo|4xfI$%mGd&U#i*arq& z%fNPu%z&&pfZ00Ct-*3{%34zchsuwP5izUN(kIz_*s*<&?SZm}fn7b|dN^%?wqil9 zrLlcoj(|F#!FRNnf!;|xJ@-&G>0N?Qd{eOL0WX2g5r z<){PR4kYZ+b|B)bAF=Nk0=N5L5`VNqy6HspSLiK?cU)kd>JoxqO3)`f@7-WiquryZ zwmcUc91kZ&xkY`xG2#G#!-n5D@gG`syp$ZPPNW0sCME!otT^-BJu0Lo9(Fmf@|ex{ zPXmc1_pn2#kVW5;Lr2&M3L{XJd(?g!cMfcI0ejzS9A>6dT^I#dKzT_YV@p_>c_-Y8 z**#Z@I_>!SiabtD_-P!7GlAA^idKzw^c~>Yg9JQ#+@pYJulW1fV*)&TpF{Qb1%h0Z zD(=C6XKxy+I*o#x^m?wK{|Bv>yY`OJAKDdRi3gW<)6%GKfakPGjapM1zt78Tl%!n+ zKpg<8R{&53fZ_8Bqu1hZ%Xd*50BpXCioRhrK%>adChkT#SG7n535D~`7$T`uanrye z5>qRoJgHP5*+~7y*mnH>%VlitLw4w(;~kBKy|8iMciZ{2wgg+QE1wbq~8^G0Z~tWIZw(fVZ?VzrJF@4fPF zGtsll1ig6O{(UudZQpu7zi!c`bXAuLy>7`aJO$@hU`t26=(*HmP2j{$zJPqB=WsqCTJe@|)|W!^D7=))1Q9~m((eSMwX-oAJS z)%@&AgtP?r^mW^Vqs7vq)*QW$^KHX$owgk3esW)7@!8LG_VbWoybIJDNAUe<(skn& zQ$p4Y%Pow6S)?tA<*s2n*I7Pbs{V`T$h$zJ1rbkcD265bBQd(_B!*>wb?1lyIb~eZ zlm?#A!7UVXPWg%omqH}k!9pVC5TzbmPg!F9lX`x6XB)cZlsn zUlW3DQv|wLP!;X!fz4g5!(=fD9l*!YrQ|+4^na!fBQnro4sL&D>!l_Q|1H<1iXPwX z{O7UXvdJ)CkVplsx~^pB8Vs#%YTg*^rj@U!Jr z*YnENy!s46MXZVS7}4H=3_YQFWt{epKT=elgqH)wDk#vI**}K`%;htNDA*@?pz*O8 zfvbj1uycO8>{@{I{-%P6D`T*Hxnl(Nm)KjIO9SPz71N{{7gtg3Jn?Iw*c&7m=#O_O zt&B}wwPytf?l76&y|S+lZueq5vlh%$_kcTFsLTb*B9QE%)CaOD0^9NOj{3nB0OiX4 zi+#P^_h}bhwJ6JOPrQqJKQPhLV;IsbkcW#>Yb+YM|Gf*d!y8j~Nyq7vfZaF7R3_Rl z5a#?eHZnR@XGfki*xx*x?1}Z$KWcq446LWe(|pE@J8;MI5}m+Q^_g>Efs5yKl6Hhl z+AdSNKtFTee_(n!LzH5PU3x{P$RVR9Tk^Q&l1V~|UN(ezpu;eT{0IwtiCJ4wN;!V+ znmnzZ=jVp4W3z}rF>@~2|&!q{E~ zU_JE{U)y69fk%VDO{$} z%K$5H)zf7$0@r*9ruqEIa*gixA8QM0w+S-!iU&)tf99NN#MQbGMrngt|45z9wUmh! z0aF$8*vDPhEI1LoH!@sA2kJS9d$%y&HFf%<`n?5{u3@s&x>o25uF5%0`pQ|&ILST7 zIij9nH3Sj7g=d@>X2%C+$_)fGQgj`@9Umy<9*yKjR0Ij+A?K9RCEoq!VH@;%*Bg8r zw2!9>7MLQag+O_?s8mTGNx?d$r9fL}-;zds?j-D}7MX6EcF@}H;tA=-1e9v#P=~eW zJQg`ir=0#g7PWc}Jr|_{=6HQwfgKm62Ie$i#DfHkcxffGHKb}K4 z0dYl0m@es3L`;_~?MX8&>y{k3cFn6!EG)6Mosm++8@dX}kbOTA@ZR|tP;yTc+U&n1 zcuk$t&8A$~s-pgsDT)#+1+i`Sei_(S&THad@NNevhg9+1%V2;-g)o)t$)A~WKC17W;62;@ zqyhd}8GQ@0!eC|O7okPT>c9m^1&iLBF9u?>zVJrf-xgHTy(Fyx>X1+kX~bqtOy7g0 z9XBSvv2j0nE^u)kxP*6Xvfs8jiqRm2LU0NHEPUtaJTtryyG@s6`A$_11*2LqILMUB za~%197D^EK&wYmX@{^T6Swez+7X_t51xkVlv$|=yU94s331euGL ze+;W#io`T!E2k*GNZ#6p!?tesRC_hF30eLY0{WD;bU^g;>3c0jf{BsXI3NPu00?~k zp>$QQa-^Lei&y@h9#w{+q7!LV&U!AJGHutXI+OIM3+$%?o$0F;dw5YCQ5=wiL-8=_Dz)`>l_-$>EUBV|$k@f42UzcL2anu9OanujPNy9hDc zxnABt%)jq=xtw1v`+zMEeck-T!gyqn5z6RO?UkX8Kz+ae z|H58z!0ua6?K*-HGe{>*#AbQx?WLE>OpAPGVDB+Wv%W1$OkxXy6<^94h4THS#vs&| zmj$B9dMMkv^1!My;Z#u=TRO>}Wm{L16>@%&;wm8!~jO{IRr$oYyc6gS8kEOJYFVOpP|s} z`zws1{2N+8Uwqo{XY;sozv1 zT8u7hG4#eamIsnP+E(heXH4sC%&)iROowdhFCJZC=B>B?BCb)NzNHcGJHMGpLR>6W zDN69BS~u_Bds!s~u&!`iy|oc9FHuEF%hq#Y5?N|`Fo~?RJeo?vPqLU+x5Dlf zsq`{+^7)gF2d8TbN3gl)6N+GT-sG&@ijR7-)`b)HFmqmkZ{i;g=u z61*y0Z{P3CV8wMO7=s13g&trs9tgeRW&A+vhE8XImOulfaV`uIyzHo#BT z14?=|fc4>P4$^nm6)qhoIOh&Db_x}Ak)3p|)gMET$$&qCF0B^7i1UgaWx%$}v#4*+ z^MxvC@a1+D%A%=A{7Xxr(20Fl^{cb@emme{yFB+xTkgv%@AFOgLk8iYJFDdDZ{FuJ z`a`#0a24yq4)fg1*HFeMsU4t?EB=yV`>+{1^Q{dgk$Xbyk1L4Vk$`PZ$r;L~36wXb!CL<}pbo>YJT+)0Uf=PB#5ZhY=)xc`8(FL1_duSwX z^SPJgc$6HqhPKy6-P7nE#=I8;;}FV(qmR%K<0gDx>SgqpYtb74)9R%!m2?v)S$O;f zZv5b=bF@XnDd`H!N5NXrqOwF2|5~cA#>JUEgo6L;a0rje2dbL3=Cthb{7%}=KHs65 z`;$+lNlahH&3gxxjCh5|uKJ!qxzJ|cgBXky+oYQJ)mVtHql$IX_VizZpgz#t1$J+H zuK*#IR)y3^h&$Pf`V^Njeq(x@%4h`R;*8#D{VnpRJQMozm08{_N5uG-QHoUJEuu}ios74;`x4BudZ}O(RLgups8kd^r#`eSJ#F4PWNlff zGG2l?XJ=R#ptRfo7ApA)VJ&3wnWWJ0vzvZcCHh#o zwn7y&xyTTK6gGyr)75Lu?^G-Hqbu&ADgPGtFCF-Zj1f;5Sxu`~s{e6;$NFAf=}4rZ z_xtz94AggKPq(_dvQM|#rqVu1sD7^CN);_7@3S+J3LWIhp+oog_+ZZmwogsmq5h6{ z6tDWIJ^dj5%-dfdfvEd>rK~|N0+rXnf858x#_HrmGHs?0zL8O=Il$`@fV;SEC@Yw^ z|AJ(`KtkE4Ni=T8JKpScuKM6ft-PeCTSNHgXFmnSb+&RO4FP2o#EpBd55NI} zPBD5;MgWp)2k(ONEldDumNO<&kDm}}Fiwp$rDnjIRX1YEO9Whzv4f#apm?O3LA_?y zusbLi&c}%b@8vZfZZ1jG<`6zB*w9@>p_~S~EQXuxo)}TuL3VXxQcf z8hxIDX%_>Y3l$H2B#4?Fo-5sTr?>MmO@GBi11wsP(+{HwdBRR5Z`MkQAlh2t<2{_( zPP09D4V$3})i?>g-l#;Sfj~u3w%Lhvf_soZizQOiE~d?jKo2)Br5qxDOZ1MQui$Bx zODJVHd=>vtk&@<5sTI)1u`zdv1K?2=0M$w`<{W$Z zoz!sV(rSVb=2?nechy!f0}4>giMK6EpKVr~Ana-=_oq#53glN!u$;568A?1$RhL0T z;r>+?fc+%_fc+(xDN0+Z>Q8;YclS3GH~T6jE14mck%U@YDsJ~0{tLswNzP|~$v{i7 z4R!zN8LnS~pRY0v>(F+$eIkTzL06mUBooy-xi8DIny+0fWKF!}PYFMH;DxMD(4Qil zVE%3SwF&jyZ)mZ~il161S`Tj1xhS1)iJ!`b53+jt0QgG0y*e8yyTwo3EqJWJd^5Fe zs8vx!>j7(5nVBS*V`SW420D(C=2mJ7>4K?8CAXgWL@ih3D^#F^sR!5mWYnAX7!%Cq z76+F60(ILK(pK8`>GW2E4aq3=`s%1hWtDpajX2{ZE0jkp$rt~TJ>+a;O58BbRdm7O z8Vc(z7FwwG!oX6dl*X!0%+>g>Dzl)b7#Jz|wSL2}+T>%#;(3qSWE3DN&|?&73OP8+~Ps4(QJF% zk8a(h#1oI)sOn70Q*0-zJoUx?mM1+v zMXO??wcM?09BXcf)~r9@&~CQfUkjn$AS~x95IoRae8q8uPF^R70EQn(*D%{$-wktj zIOYU>Ix^Sc_)Yhjn=g`7C^yWPBy41i09fTdK{`PZ#_mT~P7c`p<3^yGX67~Qo2<58 z5>B#&uQd_vETFB2Y{8M;X{LWcA8-eit}2RRznV_6bf)}Y5~R_3E`@BN))ex+hv%XY zWxV`ZQK}+@!*yjb`BGPby0I-mQ~ak#%9V!c*FRupr&}}lbGD zH%yG&1Z;#^*K&e>l6vl2t-K{gM6vc^-R%3QUpK-icwKtjq+P~0TUe7pTiwpTw4nZp z$9qxLzV!JISG|&c$yo?KvlZFO!b1Z-QfRD`n+b`0-NTevKI<|!_75=HZ6_}CgExNs zOVsa`l8USm!!84}@$?B7DlJ!l&4|o~VY_yt2eV%?NJ-K~zU0WpwB!gLs|`0ISED%U ztIZ37#gUr|_NU--|6DMuV<(j+EnnI~I!2ql>B%dCpbD@8`3qWx_$r73xfeBCqJ0g9 zZ1c6mTkg2(t zO-?DnRyb998qiFZXAt^WH!D@KfkOuEB8ZsL7no9}Tul}cK~g6XjWv*&c4#cQj9K-E z$ZQ%svGs@XY#QE?!PMP|cxO+&P46YYK{c!uF>_m^fnD5u5qs4$3Gf-qfo<=oZwq;1 zL0Zt?7Qk$Uxq6fya)GM;sEBtC5N1zAYbqBL>Hr3tW58fjD(u~1a}Y@R4{)nXdG(`R z@`rb{=^P;8A-^pWX0%$$)wYn%RDRU>MwkxO&FMQx^TZAfrO?DsweMpyy{Eg7Xzdk#RN3lGG@9dgk;cy8YK@kN+F$ffMdo>)5&$oL0 zfK`FNvguCgRb-HlaKm0W^yd02${^x#jUS-DQ9sy}7zhjb=tg(6&H5lr+!}wWfPe#B z-w6sWxq}&H@OLJa)JoMlQ&3{>Fn~LsTMFWhP_oC)3|swV4xQ>YgwU^)JYS@ff_#4t zD;$z7$nP}w?<%%JDFu^}Vz@1i)}cz3i2D*%bErKMFhE9i{4;a|izPP)GK8rlO%;Fp zHD2&Aq$^mOU?A_PhQwARlw26|zQe+JaXFnRmWZ1g*k5F2Y^Qw6QeWdR>ReA}b;SQI z1dLGyXuUILA-_cB3+Wu!%!XnYFjt^E$0LYJ?E&d}1S4R7H@Z7K!BbC+S@r4*J+s!4 z9%I(0fARP^;bRj@2L03K4*U~+L;>*tNK=;{d; zl zzJg^gZkQr+76UJpNr=JV1=MSricC#WQ{>oCPd`cre}U zyTAkN*CYw=@RB?TOlKEqwjCb|q|VqRM31iY*iG9v*sg60O6Os*+}&as-yagDF}388 zZnotf3Zl-~COnRc)1 zP9bw4ivefRtTX8DC1y@HcI4&5KIPjrU08h9$m*zI9qL@ldU!6}-v z#manzQ7b9!Fm;}=c}{7HX|oSi655~$T5a68^5xLXi4Nrut^R$dG4`x9)2Z@OUBlbT zv~fsvvCKkHH5{IdrlHKEWS6L0vz)$&*w|fHQVjnK5QEs@iQe4nEEoCnan|&2Tj&yJ-y|dTO4hLl7TWX#| zNc8k>q9p~C?<>Sc!n`f}fV?4$h07%|dZK$OPDn{IP`SbC^Rm-ymG9Xm!n}?9uC>)* z(2zJx_9g_Y?pqu?E)p8~QgeOB?rKeH(Z3Mdw9W*WB!n ze*(N7GpiUmNx4OI7sjg&1_wU^>se3W5WKqy-dh%_bV74%r#DM9TE?*YqctXP(-{d# zHD6#h`?#5jzRP*J814HuITV_0Elh&D&12QNOq34-u~;OY2#FkwgF5}i(d)UaoDTvm zSOHTV5aWZXPjp*lKKua>CI(zM$ZoCsLRJLS83CW%$qlgW6-BJ8j~x= z#oXJeMGVFn`zn#9ygSWE%fahbKg*!8jx4%UF+t%#F&RvCiIl|+U;h9HCpa=TGpIeE zPdnsWUYWG2BLxCdDK$@ugMw1lY$$d!%*kQ_=``zuZ*$PM*^HZ0fJ0R8BHxQ622k!^ zwUA!~sG%*Ip!DG2p|yJs_85NmtJ}4TW5ivH9>7w2+A=RBdez0=B*y%XF@q;I`e17C zbo2og;AtuRO97Jq@GW+b;Yp;g2~LpV=>bQTiSGRI%O&B``M|&?r<*9Kihz9I-6h$$ z)A=g`g_3gp$B*IwFM8%WTGae`IG^c#e-`ue1_OD0o;T?(dD!mr)^$oJtLJ4Lncc?? z>6lih|7AQLfKbP@GK<&A$2KRb zpWNE=Zvr*?vnU%@h=(VmH6KfFl>xIHq%PYtsaq99gs7GI;-fRP%Yk>mi<(88PA9c} zKlbt#U|A|iGoV|9itjMsKXA03`mx!!*^Y0yi%U1^LKBfO1*;0UC$P|hsW)iixo#du z+&-yomZ>$b)&YaJuh^xn=s_Ukb}nEmyh|YU20?s=EC%~7&pd60B4F9h7u{&L5FcBe z%Sf!iW}g>Io^2ZNsYVgMu!^JXbEj~xK@%Ll@fnaolQ~Mg7DQ9rQNA!H&LS-m$)=o^ zSc;?QLvx^*i=rs%@{(MpW(DHwA*w~7cR(1kKzHdNX70BC9^Z+IMzZshuHD8c63Va> zYvbP2<*D_*o+lzlR@j3##I|&j8<76?bn89vU>8>E{bkn^>$R~*onx|!Vvd*`YL(m{ z5;jNV3y=%^7lECtn^H<8teiEsB-cfyoBmabtjuud+N^Va;)QFwvo7r4nI@DU?aPsF zp1My(a;hYI)I^Kzu{51l1@5d21^XgpE<_6M!+##E4aGEOyFa-u8(0Dl3%!yFd7>gxIaD2<(J6o`~+C0AB@EBzs%sL3!^O&ztN+6No%6!5wq z-ruAO7T@t!HRAS{bsI4FExG_ktzlwXHKf)N%WI^~zDZ4_CfjYS!%+z$^wdV}GL4ol ziGDPiA!3&!4=)#RLERnRgT*)N=+_16w+G^3v$11|rn`2a5M>+~x>wHX(j`z68W^I!vc5wU9^0w{Wdt5;0Dmo)mFTo~Fj^x&f)Mf$1Jr zCaU*1W)T2$z9L@Xc+fugi`C9HMp z9x}R>6=^=d-XR(;D45@5HDGx?bpAgMZ3~`lkvvp-V)uR$KW+uk=?8*|L@e}#%4ueB za3B=t;mV*14uxR|aGh;h&;@UxLIq3C`w=Ka-p199SV_{iNcy}hps@r`DDGu9#;&L2 zJ*a=&Os~}hwJ(ynXrs3dq8>WE?q@scL919MoK;6_s(uD}@8&J*fW6YPPHE^2pLF-N zSaq_zR>PZJzwt9Ye;_ayd<;Ke?r%_I{n+17G6w0Z*r6W8)hB;7SA~*Q>XE-^c0QsO z*4an!<_+r_BBx~kYO1!|Ql|D`kg6elKXGQ8AHo;?1(QSHw}s430mA2=Jkc6QJw}g& zwt%`6&`Ty#g+&XmT)2>Z4MIh$ zHZ4M|8{LFy5XZ3K&2WC*Jbt{gCbjVN5(%jyl`SsJ$ z{-&ogRP66YW`eU(5MOmc<5u)|=LBx3G&zLddY62$gUhasm<$kTpX5yGAtLiZeerJ zOjUyYn&?aLWOje7Ht0cZV>ot)Nu*@Q1dt3vwPXNRI`i?O!fN2a2aHY12Z6nJyC}yl&-8=KVF-%54yaZ_IcFE zbgt2e{E-rLHSw~Z@d+EP=Z`SalCk9FN-m$Z!z-x1i z@i@>Rmfj#P>7Mr3FzK+CwUt$17EdzSDYdsP0R>fRwz}fm%wm?-AeJ_-+^+=8f9db* zU!%k)rznde6p<5JQiAU)`cqg)_@^-cNB4Ldvjx~RJNMTh_%`>OSGU?-(9J`Xe0mm9?7kx^C?;WYOaTphL7>=ZLZSg=an+(epqE55)z5$P4)Yny& z!53qEI9HG`?yk)bfj|0$V03@L`-QV!23>oAL;vP)d`%b$XNhxQ*jb z;p~=t{y%WpB1C*GQBKIp`a>c|h%jJ#I^aO$A!nBTV0Ogy|8ZU`Cfa{peWkjIM&uEt zsQYT{;;(>W1HR)kG6vtS{1G5+A~4%UdPAV}C<2fb0hpb-{Slxf98oY^QhHxGoDm@X zhyZ%;muHw02a0C7#n=y}Lg4C9D&(;5M? zJ`(`5-8C}=(f~wuTJ~TVd^1gAfFn*s!02+Ze)aq|i9O}8xX%&>vvq@K@IB-PG)wqj zvlR{r!N)M$?Be|nBGmN0P6I&tB;e6)HVb3~=U-DcmtXlJk+@QYunbeKOz?NKz_-SE zkSW5gKUFX^@#(1hZF(MD)0M z{_lLT@F%fT#`NYe{&Yp@iU3CCE0D}HC`ysF5PFQ1@?u{SCu0}6UFo5|)yv@;sbU|+ z_)RhF&U?I*h_#{@{5$P~x8I#1ifIW(q|-pe!SgmrF&D|jof@x6rB)G1r_!vuDi9i=j7lOA0Y`thF1Q(Ugw~_7n8Jqm!oiYw z?0`Ic?OAXv>>cI&iuQXk>6qIqVjQK?k2z1!jAg{-qaol6nz_bu7MR^G3a)v_HH6%s ztI);vy2jD|K-fmnS6d>dHEK**#L~slLQvGO)#laQCOGWOh6b>1Wp$hVCQ?K;lcEy| z#NbNW4HEfRGlri2tr<=7xDdsZxDX%hu;C)lN1;hqyfd^ccBq`GMB2ZY-wcCHDOtf? zH1gpXaKq+>W+?mh^II@{#_`cVf+x1be*ac&&q|AP?)yhgRh3a&Jn+8;P`)QCwf4tJ&#}n;7eWs6_ za(ig@)q3(3SQFt4eRT$J@Mbt+|J}9($r^qaH+=Q&;36opsD zAsdN3gL!^C5f0Dr9)9Vvfs@XnO%Yg+U5@Z#u>F?KsRyg%`{$#Ot` z&Y8R4f`(%}&H;9uMA}#i(%=5YXIk+BA4)bJ)R^5MDW5zUhI>+xW!npB@UQM`}exI@8C$RHUr;=mcxK!5LPGE{_2d~&rz zDT~ikg?ucYtcfq&cQY_G*cp5b)m#~SCaIM1LAHokeRO#MV=4>)+H(xp=!q~S<2|TU zX)0Ynvc8xZCq)SVS8-E*P%S~XtU$*6P||U0*<)o?AFO4kHs-DM9aO1+_bVEG>0!bm3XojiW(qG#4G5s7f$eby@vzf0f#q6 z@*?QeN8tpejn=L-nU>$SBg&w(sy(uh_mxum>u3F@>i`XJ#TMTbso*m5(^;ugBkG0_ zQ?Tj44Q_0jVh*`IyA$ zz|j1#;^9%#DZiyt?v&^6UD>Ob37~RG&w-`MqWz{< zzyQ#YuD`C05R0;AS`2x7&Fjz?&5uj_rq0?W!d7Mg7pY%p4RK2(gPpzuvbu?(6^fxM z<#In!+m-CaNuy07K6|s?NUw*RtOO6({G}Yq0{7;Yjr?LT;a}~ExM@}oa~aKUEK?C8 zV4c$sd1US14|#C%IXp9`E}qskZX76Co}Vq`U9vU6@$ju&p`1WeJg%4p=9$eOVI%{| zS`bg7XANOv&V==o3F!c-7$a}&=HXD+K(W6EpEuwOa5laZ9qD_rXXPZ=#)VQn*iQwW zZ@$-J!4%*5hW(R?psO%`F2MF!jqC17>Ibv?WB**>dlg99m%w!W3)pm)m+Fpq0CvEi~{3vnwzl zF)Q$_RgMKKml1`$AYt;qwHyQl7n6d!Q+~nef*&Rj7^F79oVd@(n}v)R+F|`Ybn|yq z1d)*;yyXQVYa1>GJYo>C(y^$8X~l^bkFzU?bF{md0gAKhH~!Jfp8NbB3HV~th}y-H zB>sj1y;dT_jgDq0yultxV}SfR39MCY@pqJb1mIM%0iYiffUJ&y zMKpN#HhC&-2Y_=hz~Kl`(mlj{8<}YWr1DYIJk+zTpk)WbZwOHn_`9?W&MKAyAQ?ST znEg`|PW@dxOoUq^**&rj`c3MvoiJ@My^df9gB?+9V0pB?2L_LlyU(2=&K$g9vAR|YlwbrD>5 zi(1ALdD104l>$7+_{<^z+HVoOFy7L=2o5yIy>fCg<+>X^>GZ?Ba+rx9S}BxyJuHWB zdsQ-Yn>ORM^5euKhYdB4Zi#aCAuH>I2Bi7|Fi0>g*v&aXEAii^O_qO5gi_i~hdib- z;+9cbF^}#ep9sdp?z3pcBTez1oy5F6FZ4mnkMr`$8q^xtpArzuU7Cp9G!RYafn@pT z7n{<_K=DD?enAm)cUKe2^|BH{`$xpDzjN0F5nK|jzukNk4g9C*VO$anqzD-Ne=H2z zhlMc~4!-dLmOtV5jX!2($h3_r%PP3NghihWhk{otCZOqg=yOS`4Hcxi3PY z=zrImVr0-bQ?pu9r=R7PWE_xnQ?tTocYP4q#_7-7&g`#8Vk?>nLQ|Qc*ZwJ1%)VBo zJY-)AIyPYC8ICOf{-TCV+S4ywK5orUe`9D=ZOyJQ{pZN@savzGUBlT|Eg~~14nFs< zA9e-7HB{xqAsPLvboZ$b%g_O@_vld4n%m%Bb|8mf=4aI-Cmt`SjJ0$N<~(6V@@EuJ zP+D5PxkY%)Ly4xBKAFFs6i-Z9F+#)Et}5}=N}6}@OdBFH+j z8s$bvw6rhPB88Ne6*`*4nxB)ni6DX?S~YSnV^1QU#6H22QZnVR6jJ9XHt?LELrT~< zvExBm5u)g^@Mnmha}Se5nMq!~fr+HE{0f|LPzyM*q#mSRQNp#oB~U$R75G0vXW*0U zL1!AvLQxvlqMyZ(dSuP>jH9q42+f3g(}L*gZHWOm}ONm zOL=s|ScS+g7hPl9r=Q&Wn`q~>Kgx%;V3fk?rpbHZJrg4MFiI{5Wp)0$6Ar($ljhlW zAU<@t4RH;k62JWze|TS`QOV)#AYlqwWB8!CZC8q^&iMwifIXs;(54{5-1hz8ZfZm9 z`GC_++csfUHEJzsUDYAE7hy{n!>)K{6~nFfmgcMyxWIYq=+?Wsq1U^*nf;0LMVaha zdc`}N`Vn2HWJg*3{70PEYnI1-kN8^0+&x!vr5#j`TjKvtHouCWp6jPUe)13 z)VHbyzB@A%iW6;3pBZ$8j)=ppq zwmsO2r=b%5eqci6leskh@`B#~%=u6n=GWCNul&LRMJUp))|h^7(}A@(@1_3td~j&b ztL;1};{^EanT5#QFW7{a!irD%9w+1WX|(j22u^<84URwVOy8lCg`$nm32|)Pr|6h( zux;l4c0EKn>|>*@DktX4epi>%?|b*)6BA!hnnC+^LOrYIbJzE;kAJV1)Mn$(CV+%| zwSxJ2c$PldU*2f>%%4wUpUbUN{q)CuII26(4}8@2=GIpHQ~%nZM|ou?B0nnhM>|{? zh7lfb6jQEEbjz#0fFtsSy3>h&Kd2X-y zXZJ%a&f&w_h8lB-({Ck2YATAD)uf9h{oTe&Q+UEuElv^ibk5 zOKr&6`cX&`A^8RTFqRAQR!;g{#d9u&&N%0@9dXCXbX%gYv&;lG)0YD={s#-P2KD)| zC1d3Qp#tTBKTnzybi;qth*X(e8?zI)P4Cr>aMjm3H+?%Y#qVT|O7UM5_JuL{|8$2? z`IZM}=ehGk@vB}UaXeUt5$6%-eIrTuInI#8ds5y-G2oKF##?z6&H;7V-^yTA@xEpIA~yf~ zQwTrDv*4r|zs95JLG=V^@u2mY3&cPYa#W4cvpJlTvh%v2%<@aP@s^%Ye>#OSCg`D< zMatDPITXEfZfiNoRjQ{X4`;4F5Vi9fQWzUamqj}Df}VCEOr0BNhLWsOeX0n_>jWf{ zKEahNXnJN&j{vLB;9{bWCG4tiAje|k^$*@Y?O1%%WX%=5>Am26GGw)$hKK4zzGQfC zztAUgp-3A-D&~)1!yK)jyD}T@G|5+F$L+A)<3AB5CBz-tYJbt6&+FD=-O#d_f0rgz zij)}goZNK#gQT<_q2H99pGwN7a%qh3ow7B+^*|M--k<)@+%V*qz1+rsaqj0o*f6Jj zBNl%U?6XQtS`-LJm|X&dH*h61AT97m$R77@MsuXkhpZ6(DZ1U9)Boggn=IkVUcuct zdqplWk;E|ej%zaeQuPQ7_PA(Be5g3H+KgV5J>F$N@P&zULZ%beP?oiNOHTI1iE}@KYWG+_m=%clRfTm$a4y`eyA$)3X6@x2 z;}%j{U3BK}bCu0ItmT@RXMAB9s%5aB>76>!sktA-t(mmlywu6>DwP~j2+jH&(fx%D zFAAK-5`QhA;{lC*uCU2G-^dARhb|oDQJI52g!(ox3{!kE@7?O6BO5+?aF6G@%tOB) z>d3$ZV|z|tH)NKppnUITQ)+Z!I0SS+*0=pZkhfmPDgGZi^4{q}8ROkQFuUl%0nxX0 zi_rOk1^qoH5UJj%Fjy+ugy)mPm^;}!lQ@o?Z@~ddHN9uS#k&3GLT9Y?w`;HWiqmzX zX`A=P14T-Y-RM33J@=t*`1hxiD&ZY-Ki*A6d8soQmd&Xf@lCI%{O||vtRGNGuW?lj z?_6ZVjApn|Lu2FzW&xgVW&&dXJH5b^=CoF~FW_ z^~ISevx#p{K(L{CaZFPS#aBx2s;|Aw7z#(<>VPoL^WUb*y^EFvJX@xZflCjvC!#TW zmWPQ_olj$%b^-al1?t+K4b$OoKD(X5BI zub4Xgx7sD&G+NU#*{p!7_jJv^Uk+uH)(Xf))|)DQ;cB<2HCgjTNAedVVhrnGDUVme zfe2CbPwZ(8qpaK#c8Z?cP3fHoQLOQ6wuh%ugm11pZbT%p>w>SLg@lSv!{-TpQ1sez zo*{*9lpWjL#hu_NoU)$9g>K6@x2xsnMq)l#x3<#f^CVjLtAEagpd(GD!Z9zYJF1DZP&ely?BehxclE-BPSE`4`LQbEK-#3GMtL{HX8H zta6vpqdt9894P<&io~ybiNy7Q6XrMA4TVz0a4En(X$0=V-em;t>>ALoCGR#&Q05Mw zP0N$?)O|oSMc-x$~^SG}6>?1MIu)hA{L%NGK zdiwMRJQrp^dG_tw{vPv|a?a%acC{EcfWP$NgPeNxZDT~|dqq?59Xg+11`k|z4{Qfg zBGp zPTu}>DNeuc?IR3GdFW-4*SPPUCR8I)ee6BuFMxUz$$H!*iVNhA@CqQvs>70}_5HNf z`S5F;Lgj_Y z+7U;7mG^Z%6j!^6&zA8+%|_x&y(}Po{)MF4KK%Al9F`q$jK%LF;}pFl5Z=)K+voQi z6?7Yc#_i|on^HFUA8=M#Tn`DmLhH)k7zgp>G59O0io`(A=rkbV!TD&0qurBL9_S3@=N`12>px9G%onM5! zlCRD!Jo@~RIU=vIo58oYX3T~obKLA7(PEPaEi9HHe%^Zn8{Mgd>U_qc^joP*6C15z z=IlKAPv7O$`6Byo4-w?K3E4>gHMT+QTnl{V$$o8LjX(EjJlC{uIA7YH5EP0jlnUM1 zSIP#j*lr_t>FOc!v8#x!VCy^l5DgnCLwNsfEod z$U=)X5hY*nfAT{*q*Fc*+c$`{G8=H{78|D?kQ~$323zNGqCz(rm1;?8iU_tQWmm&| zR@`ybL+XiV=9ABg4r%>t+5f4sm0*nZMKE^*c~x3oK)V*5#Rw}~bcPzw#QO39URUM;MrV9NXZF9ABd*&)&w7#v!rCC>qcuetPJbHbkNSnse9gh^-vZ)9 z&Q3wFz{Y20e;W!A3dr>fUZ>#^l#sVL2JD0alCcI2r|U*eYhaBk?H8CeG2{gglm9D- z$Y2d{@PpV#Wg4*R5TEj)O}KE_;V^^Tpt>TwFo9zBHy`rrI{P44$lk%^Gesb$QR0xm z15%u?6e+~}3G)M(QbL(156LQiuEw95o=PhEp*n#f3n?M-ju}%>dD{nnK3+qzY>ct! zGNw@dvF5G=PpIyamp-N+iVF9y-MS124*a*& z&%i*Fn=gFyZW8PFLnPgT*4)HK^26GJCCOS`W?krLEs7*{Hjgs3!j<;~ zqCj{-kT*_7A_WE)foRo2E%X#k?vWu~k^%@eIJ>uP(?k^Vuqlqv4KH^-v0l2~o!I`o ze~mH0?e%34WU=`D-x|lF z1N~?r=%=k`;nTfj>EW;;nvv!1a4G$?3jbm3l%Af#n-~|h8TL90w(gZ)r5BTZO5aV* z_3sMrk}sxCrDOTQIIk$kj;|A>{c5);HYUQd3)rq)J=y~R6a;|GbyZ4ppgDYxa%vAfu3=MWyVs??Xd76`+oZk}lpB3L$w2Ae{ znZ9IIzo){hnmCLv~6Vm^@v=VLIn&MX#bOwZT>q+{~b?*T`rN)kw>y~47D!suz zJxHC>Xq4ya$@;2drCFG$0xA4{=u?avK5kzCYFoLMcJ`ht`^cc$F^^=Q0 zT;xU31TYh=gmq9tNe)?^f!(Fs2WSM5p>LEfKhE0I){e@BL8nILuYC44prshrAb}tp zI+Xx3Q(^_%Cj@bi#GqrD?~yy0JS5OTU%?@gDtFaf4=iF32ttxzI3xrFN4q<+W_?N2 zM0DtsDh99U{BK|3PNqMv+1A+0B*%u+)O{K@gC3Q#kF5fd~S7 zw5e-l^QSNe^IUgN;x+rB%}ab%M*84O+dBidayI#9lSQ=X)ECsV8E2!$edD@Oio=&)72;=<_A zKxA#!@?lBRhe4L)+DH(SX$cALp;y$1>pbygQ1qumi0GxEBQWKI^Ex-g30Kivzr;TB z+C`ab`DsT}3I@<|5{Yz;*O&FfIH#;K<2a)ngw(Rm8Hd!c_8*t4UWDP|9x|G%{InqQ z&em$9gEo?{gBPZY#uVthf{o4%IRzaPd7`a#Oy}@`8JT1ED-1gD4(TA2wQID)ud70v z|Bg?Q`-kzeOi+67djnH|Ptx08h~?D$I&2~c%lF-u2x3v- z@KduDG)@=06jm2$?+uxuMn;h{7>^rTs#6C3TMB+)Q!H4!Y$+fG=SRAO0QHU7q$@#3 z2K-wJe9F|-8+5-RW5$6!DY4LLA4JG669f&z8PQEKDot<_x_c!5W*cDTa&m<;`NC>~ ztUWlP8}LBz;tdSnztIm6dJ)2)GC6ej2KKQ^_z(cg6vX^%XS+x-AE%Wq&KEy%$V{l3wnupkWGhk@c_Fh>y| zK7Z9L*cHw)bnsgAh06Y({(KlWkNUT$akIKS;osBZwX}jJ(Hg%1C~bs$s|d5r0~0Mh zqPBdp$?i6ht3toB>0T0L=Poew413C~CUut^s{0wLI`5RQizF#k`_tPAmU_VlefxX} zOgGy2>cQVtBxrHeMxO7W(TIK0`(uTKM#n~oRbnii1{bQ{48uD25-*`5hv4%$VOLnA zUl*8dB#3Qu`vb}(fE#x}vWPC!Hhj@u)27W7#CD~E*e)QzTU!dmc1iwR_Mm)MCA@?1 zuA-lM{?zyXP6Ud>lk`5WeSugc)fa52_h5{$+uwqxWiAI!t7YK>$Kf9OtC4)?W@_3| z*GJvz#CC{4&r{VJQ z#e)59&x=AM`UY75NS=ZK!?G1AN+M-Kslr$6-=3sy zt>@Rnt)e`yLw>e*L}1hnZ-UXsi~}^IK%iB-=6fEvI|gzHV?mU!l{B7!@%tp+0g|?k zTX$<=5Div`@}LZRCRybn1qQs$AyD4z0n!vsyL10=Dms(XG?svo!!B?QV5PO2nH_Qi z-U%2jd3@O_7+PXOLqdb7+MA_d3M1>-KfY2Ol?e5BB65$aoz<~%oj4}A&3x-XXI|Ys zP6A;>Ra+HdLh*WhHm)1ac|dOS=_$j?Z?&RLpE~+vA;<(@kt6oUQjamQ$;huiZFr$?hhh$_1$LWg183YuKf|No7-X;JnvuXOy)IUI(j^V*0z zS`$Dx77@6+H5jx1>e67sF19RZy}51FRB|A^0Y8)fPzxxBeG+gDF?a(`CjYU2t({re zUA{(ucq}CjxX7Cz^l_p52<$FLBY+#oY=Hzyk3sKaDVuWJP=n_O=trHsPV1fN)AYfb>iZn2!7sKqE5>euI4&vMUo*(MBb906|(bya9zN_(rLW3BU_<=Y$zZ z^CZ3maCZPzP@M`7uk>e@P&zx5v;tTt1j|-NdjDKXE9$64cHgYYXD

0dfP}S!Dl=2VX(mp5%D@`zZ*H$ ztWjP=S&%7P#SKVG5n4ICYABJkOtK`YO8;&xsaiQP)~|H5_J?S$(p0-7sV3Di(}IAz zCeMZtcg&PjgxXbH>2mR1Nt| z=&%y-n*z=Gl~1_qdRX4GT3|a4sMevNTKCllX|EucE|Pxzr{s>rZML2Jm%Bc6Y}k>Ds*y_v2NuHa zrPfZLz~ZJ{9-2D@T77la5H7K8jYK)WXTYxS&77hza*IbQTMaB2hSb=4cNmX`{MRZ2 z!)3mIPXk%S@)kOTK`44|V&v4-vdtdHi`evpA{kNn#AxCMG6R;yK< z`@IYA)@VoRzG@d~Db|?njazIPB7Qbsd`YdckiKb8ZnzYcv)%4ZPLTLUjX?V%YB5^< zPFJ|NCYcRWxUMZ)vt7Qt<}EpW^tiFQvBp;&0C%{ao~X+0XX8jGXlZJ9{rrVnDEO+S zE%hcmLVv?+y;FenQ7gOU4J$nc^p+)W;$fc0a9gsrUkP@csZD;dchW>NzM2oxw}0Yr zUbRT{C@&j6jgYbsMpcZD9qCs-lcR6CbbcPzf@T7m^3eapUgui7)cEo6UlA$3XvFT&r``}F%?V4(s#sbQV@_#af~t8xrXUw^~x(kGn;VC-k_L{}Li6a7_+>)wXP&dG(YuJw7S|;vs8=+rU30sSWl96)&i%mUfj>Sv#9;R` zEh50i0brpwuhPAYTMgdu1xyz0uz^D*)6O~zuK6cs@`knNB^@v0H)M7Hm|Runt*>OV zUCpurq-7@MS=9tXw<5#I8Dp7fQKTN;J^e6uB*3CyUQD%K6j_;+47RS_HLAh3a-lzs zSx~9093HG*HIAP2@!ZE+QXv0J7U@4UzMWdQ;m7Td=I<1Y{EIE$DXMvq)lWa)LHziO zB~sh?>Gk$x!2#mwxXmy8oa~*Qe^E2|x88cdQ_@12Z}@Y=OXFEn^UYyX+a;c6SwGil z6`}Bmu?5a(8Z~2+e)J<7Q2xbgvc0_YQ{Bx1je_>&aZu+iNZCjBL9U!vtti+9I`L8k zCh?VYl(W7Z4@w%KaKg~_nEw!E@LZA(-aO(R3W|rFDvud8GEWUG7@+PjoVJ~Hi_fw? z9Fpo}KIA%DkesM>el9!~wR>3W3Q4!i$+A1Vlb+*VTMT?6m=34a?NG6~>vRCW#VbhG zY+WPUe>EGj>oOX$o6JL{%L{biV!=jeC{?K2`o`Az<4PAi_`yoA`>>ni6BNs_=3dV_ zeLtDj)5`Dl;d=IOT{T)SH|d$~Z*n~A%S~j}_*Rx42BJAmmuOeHVT9Woz3BY&-SUd< z*@4-=AE5AGSo@=$E@m*^a{{w-A42fGkuRUniZ{XxyoV^8Nk<>!6zzo>fhOpG_aIuv zq-%Apg41l@%g&7gY1b;dUQW=j`7nyMwB z3)EC@*mIU(_})?C0s7Z$ZRcj_fop6>H&-P^aSrafb!e_cUH%NAuxHe>R!vM!hA$A) ze2@eV6r)v*^HsP_Mf{Ea&aGK}@O%mfF6rGR;QE>X{wh zA%ZE|{po=5{F`2)uGOlSq7KxI!rV1c49DC(u;9d*Uk)~C-5*n9wINq-ew+`RnC#ZXGH}N);u`5AcW` z4_R0ImYJGOv}keszUiDXCLpVt%B!rcXqDM&!li;Ut&-Tn<$~R?S*kc=Nl2n{>l5l& zj3Lu|=kY|`ev^CHF*C;}Qm`OXmL42rybQ`%#j|)l1##VWgWN>%R~kXM`5!rkRQg^v zr;SBw>gS?{%}1M=t72`@Leg@ya8q*)^%N&6rvG6?3HBY={a{HN?fM2`uF`qxt!yv0 z3-4{^D@Hi%Lwl^5{q}saZoI#BZ(}EzMd*3_$@i~s*^`Yr;r6}ur1i)-5)dNQ=b&P| ziBWdalUaMkCu03$(sr)DW18?BKk3mG9Bhw5uwu( zop&F=R>*92mDHT1ToWqPPaj-1=(gq_t?>S)b_x+?EfjqN-x1UPPR;~?Hl*40^Q#Gf zWl*yVqRnF1Ik~w${8MY++EaehK^!d~a8`;bSZwoFcS#= zu9?I00R8YRf-i}dJqx@Y!aLun7wRY(bdi8^R;HBM!wN~Gj=a!%j>BP%JLn>#-5?)k z0~~w~{vS$OVrtY}(u$w{Hchuc_Md3ckI#7cn--?v+3TUDescim)m5Q041L?2x@u3s z$a9C#0<#Sc1k{IV|AIFdlG6HP+GEb?2s)7in~EdIm&r6pLm>>Of1Ctjjb3NSYK98G zJ+Qp*m|M>!m@+s%-v1C?I#d%+6Ff`1w2FhO&`N+?ONt4eeL~T__6vDB8cm;N7y{9< z0Gl=yyP|#iq6Ux?0kNr4!Ns&1oma)hl(pZsE|DmZ6io!_Ck+=AG0l@ju3R>)e3N>f z1hEM0*&Rmx6NZKWrc>rArZ1t-%G25Q*!O2gZkxtz7S1yy zHgnnE70X^{*7%paP((WMLXsTLpJ=67u%asP?Dn(A-(b|SUM(-%e#3U6MJ7l4QtW*Z zCye>uURzV!`?)JH+^61cWqn3F&dqDR{~J>3+!~Mfw=G_8_e;GkubaJ1uha7-I|5)6 zLBs7i+ltP(*W>w*?Z@*|Z+TvOzb1(Meho~$d60+*49gc=Alo&|XKeLD%`bKYNn+-Gh_A+hlF;ocZ- zi;v^pY}O*xK>9R8?9$O%u^sf*5gH*pRbP;n3lh%x7R=>zetpHTniMbN@4@|?rWW|7 z6|QxvfB&XhCP>(yYx-9l7_EX3V5}PW=6P485%|{nK&l$Z7x8CA+zZ$Mn+7!Q!3d z6w1AC_sg%Bv!c@`8^*NR&}>AeobN+dWvY1hYI7}vZHQV}WaRp0!;sZ}Z>Me6V-w2i zlHU5#BsdHC-stl6ucM@L{u+ee#51>rho3%c{8udnmovuZ-Sd?xqvv%zv+4OCSmt;6 z#;+z8W3BkF&zh`U&-cfEP3F2=Z;CHSMB(o>kA!#SBYCMo?w*^Y!KqhVd!8JPS9b6G z%kMvQBmHJaRS<^6=~A}o2pfENGFpWH|Jsi#&`TF5hEswX%;xP2>Y$Q%$}BL?P&kfR zkBSQ`lQSNoJcT? z1pE=7F2b1>TW4*i(7B1lkl@`hkFJlE*BPCK-V7H2vp4=(eg7OR5PtyQ#YU}C7X#GX zwQ&2nNce-7tSe7~jSMjxgBNZkK$EF1!v^esa7F&jxpi-&75x=Kc@hS8C(;(CoIn}X zgUD-ufupJ^rdD-epTL0@yFp4TW<;Xb}zA9}yUHqdLsrmaYK$_Te_r4*s&OaD4%6gB0-3g z$-4EhS-4BxWsg72c1`VUe|~ITZT%Rg_zxmn6dLy-5DrET_(^g7Cf2;DL1dvsUR@B>2&3>%W{K4e4ta6o6+2E9 za#Ho!1wAnCyvv$B1)!!I>u9AEMY>ojvyM#^%Ffk8=`6F#;_XitSuF?B+XNT!?o0<1 z6y3k z!@vi19=fZDj9OdLdClaQM^)UF9265Y?#oB9SgYVtz%TfhC>+oFqkFy#$)ZF2at@b7 z7nRhJSrTI?a`}}PTMjsmZ%)xt^JF@eMNb|&tF5=9Xfq5OsOAy0`Dy)U0&VD8;S)#tJaWq$dSv=DN4EFA4ZLK1Z;TfjVpW(`Zz zKw@hOn zFO*%Vhtge!m-QMfgpicYj*`d37%Bl^7PR86QINNQ;@%k+$7T$f8hDr8xVe zSZX}w&-A^pcHE>}DT$CT0mKRoC85%PvYMbSE*^;$h6b8IFJ_XQbW2=X#pYyncKddG#eAi8s_?s6qi6DBftkq?dQBx{uvMLJj-rm@c# z%1*I97c)-x^4nC7)UMm=_lDo0fEr zGo3+hwD@xhLzMY4=vxx{__MlH5>n;k4AU^KEii6o)%^%y$+%__3zj~7luYTfy*MT{-a5x0%?bHw)7x+&Sec*Q$;E93 zzj14mnJ5>IVS;nc^ZwC^rJF}XXr|M~bIKUUT#x|B3{L&Qe|8e_LWK4I=2*@$>U1O9 zp%hQ*&B@A=Li*%*QpBZvqbvr7XwsVYeO`#oCPEr+roM*WCambOrM@)n&R;sc$Z}g} z*;%$uU1m2dOy|3^Q-7AXdOnB{aV(u0!`OuTho#H7r$Z-)V?qHW@6J>@KIVO0kG#|XYW)1_AM{AQzd(mNCjw4Cq|WJQ63ZdX zx-loJH9j$cHgcr^C?B73d#BJ1}tD2jR(rQ5bqM=jjWtj z^pjNj~pzLx)S(h!y{hkJFXe2M|-6yo1H)Q5USx2x=CR+2wMu z|1!2))k`CU(H6eHGCPXr35Aub`*R_z$5Cs)EBRtTI)jqp^s~M(i5{u(7XpFF&oi33X+sg(kJDUy)Me?I(t#%4MK_#3s;TI4034 zef3^TJt3 z7lrPxL7CYp>ZZrDFGSwjBB@9k)pBD4cFCYM%M=dW5wi!p+ zT3!MxKcsT{&(~(s1a7(;A`2QjzV)?X?X083fQCdjhC)<1QRUFHJji8$>4Oju5u<@4 z)!m(zls+8SdqtKCp}9%}qX8&1B`=C#-=#S_j+426!Dig$)I0tKTmX+Fgc}k-a3D%5 zz+QXqS~r0QvyWi02F9Sn$U_BFH<}V|5gg+AW7&Rf5k5&-wu`+!A``Sf+lSZzFi4DZ zajSVf*&ozSIV}07a~fPChfdXv4jbm%Vsi>`5o_HUjO(*k>i+rmbcci|9*j^y`_lPf zPQY*HoIS7smQTMi;^(i56$55X3b|E%Nzx(gHO1kKg3~2u1X#ES0SProI9)c$F<|X3 zFgRVaG=l(kJrL(Lit$$zsyZ9H>-2RHuyzI_y4(Pu2zNlo5ubt1N1(haC{Vr<6zBF4 zC~pOlymf-KZ;VX-Q-SrTOF2{Qpm4Wo7f?97j`)WdL|_od05B2~io={1!iecl2Vy4? zNWoLMWzJod*8~?1oSwLP-aY-m*O+7|4L`{JH0Unef+N@*~DofE?=L0>cE+srA{ol>&(fJVp8!oQ4-hs+OFxpUe!< zYVgJZ_Q@D_Phb3N_RI+3%<&z9Nf@WgSEHj0LK3V^a5Q1pKT&Ipa%)}6fT}eGywdJX zn>;Y4Zt_*X)n+-Y=J7|OXD}Kn$=P!Oeb(vczYJZK_==uO@XVrc0e%J<7^hHwG=Hig zpg-hK1<-$%HZ&18IE64K+@Iq6m}3WgkIdE$qtbV|3Ct5=kOea*hHk7<;wqf%b$Q)_ z7f8UmJl_kSdURskeJYj%$s$k=-5s}XG$VqP`d{KI7Y5`0Ebz{oqw6VER()Zo7H{^I zv4VjuB{%qp91rlr$;7xFIUqBlx|0fWYo7UOK-KKD0Y8-Tb`k3;`Ls4bA-O)G&pfZw z=*E?UdVKZTK5zDe?o5lQtb^e5J2n^1(ZHl+-_u@VSVw;*dH{%{dZiwkjc&F^*GU4d z@5L9Kn2BMi5Yj^w{ z`wv=$*4%195$i?wBNitKvWU<2KX6?d_=o(bm_cFWK`7q<|1@1-|I0lEaZuEfHl~AL z92odp`Pf%bU~JEzh0gJ}S}01~rN3m5xt6Q`5n|2O6iXqZvtn9P9By-=z`ZEQl;$%< zYctWnzzrKUjo6j)oF-Z!R)HV61u%x@kOUFx4x1ePmtzEvu++I~de zKS7$Za_xNi<PD(sRfU(iusYSuwk0=VrpC$vz2*~y(eNBbC29>T>cdQ)r90%#;89W&z&U9n5eWuE)}y=Eh0Nw+Qazg)ykKNJ>YXYvmZp8W%} z$%k^BZ?40KvcJ8Lf0Vqd@h?wE;`5g3U*ZqsNbZ2neM2~H6@EIMEJ``V0nm2JBM!S0WSAetc-LW#3Ws7rR zQ2ulkHyqU%@-7JE?{$7=n*=p3NLi5XE=V1@)3mkemgV!b^Q8xAjU^% zLJ@9y6`p6-V%sD0di96-+!@pT7CmbcsTTqUz;dRSHi?Ap`8;S&-EBglgfE(uF5=-x zf{-rT^{N;dhF>RWEzjj{!VhGJUh90ca>V5iDdY|syyVqL1`dQCH zG#9Fh?+<4Un4Pr%8wUBikPCg3x$1Q43UG6PTrNTBUD94rAN<|=uXt;}+82KRVg_e>)eSpa|oBy)i1 zAW&^AfaF90k0oFcaMM;4P-Y!M#ZK&DT-zHog|0*nOzgfi01ASD7^0plur*wcDUSSa z?hj}QNfKKc?wRYUB1xLN7?nE!u$UZzi6XK#)r5=;zySkbKmY(H()<1tU4G1P*fjU< z4#1D8qO^sL^e1fot5qE<0rVE1jNkOc7VC9M8|-i^;d@RW}i z+l$S)acNGTA0(U~742I_F5@y@!_l%mBW%ChxOHZ4jb)rtQ%0@C4n{j)R`!lGPzasaMO)|T|YAOym z#EIAs!2*ane)!aHQFml29W#q0I%{VzPsDZ+pU6L-%HnN8nb#;0i+Y50=qwR~Dv{#( zKbis~u)3TcM4oz&%Ppdx=Xpjry^GXo(BeIv%wg zsk0E6q7o&D9$~enu@?Lf?`1>bMY~4dAv*xiU>EoeE5MIEB%4?1)-0k|f7NG@6@+Yv zlIc10-qs~$0MKq!B2R&5Y4tcGb?p<-T8;3lMD6fWU7I5={s+vWql#zE?4aRj0tiU` zJldiLX?3l~f{?Rvbog>d$fJ%TC6>ZiV^&%mJg|5!EhW+H5n?ftwi-o%HMX1>G z*nk&u+v>UG7uzVj`%hw;y>&*d7E`(C#0g(c)wnAMXJ2I14X}vyldCPf980O7Pjy=ozaED#-e4^lDYeoIL2ihAm-oKV1x2D5Cp3o&E+onnzp;8V zKcc&16CkSk?Zfh0Q5hWzC0QQ#f$h5mwK(V76yIRsSlTF>ZbS4;6{+Jgx#o5cbSpkv z0Kv5nvIC`QCoJ5%V`@P+rwWy!7|;SMm(uIUC`1;gP*7XQY-AflEH6%;~iH3Ex|b*i^F0MfeFz5%q-DEwv?b` z@Pcz;%70*>HvoahH`E@me(v>os(uyTY`?$Km2Oy`aSuo?V4uT)4kxz-IBt#tQ2?&e z<^ZxH|1=DwZ1jP_V&z*yA*AFJXBRNSn*%+*5d)Ip{6zL=p(hhakhvYSP)Lu9YMf5} z{l5iS&S{OZb86<|^s_B;69|Km^M%(jAGqb+=_IvL-XarOBICFmDzXp}M~=j_Ywbv4 za_NARn>M;{^hs?yKiT~LBP+>83|@-vZn8;{^HB`dW$Q?DiaW#hj73R#x{Oj{>X0_l z?bE8%>~JWN5e5KuNPsExU-Z21cl3NizRs&(j$&ge%JX)=RQO_S^WbB;19bO08Xqr9&_@)>L58X0X1M;snD&3t*+&4JhYY>!X=KT;nsi) z^AG6>IGWf^+`j$drrO*goq z2Ug;vFr58l96n&A;OorrRvd#UR10{CPXs=o`-}XBF$9?Y53czQ72{?Vq6<8!2z0<7 z6y7=0+9^+mfgQ@ma4I||b{uK8!=Qc14oC(d63aNv4+m-5 z#`g440nj=>z;ru8hn#g{3h1doYG1&_g``wah#5KUO1L*`Ac~pHm#~e1V9);I)z>P7 zshLmKNY)l?S=5!iS8&d?0_1l?Yz zH4Eb+WTns#`aUrkm%c`;S4&U@Yk#?aaq^=s4&$%JZ?wlXw$ ziT2DK{y~#wGK6?@Dv4#pAL_I8_6A}_`wH8rAlEhz%&O|D_~xbU~s^Z~#&n{jYt z(ng>;h=iIFhxphu%H-te2J>$0Zs7y!g}^iG_cyi@l%cQ@&DqpE3Wq;z*Y!;o)H*un zMkG_TIGXIv@W2!UnhGiUTBMLho4BzZYG@Z7**d}&+WO?)i@%xk_f@Q&h5f6^8?$t*eb{P3@ttrr(J6=WB#Ex+F`*xj&i zQPU>k8#WhzBSu7OfCsQpteP*1K57t;XcNaj7wEu*NKEO(+|skqL^gWv!O|C&N~70$ z8B2Gw(okf^SRDOHShM9v^79)JwwSrn9NZH>9%ZN|XzYdIUV`y7(V~4Hv^Xkl$^eBE znNRV;u4-wFG!2)jkZ&;pr$3krLynG@*eG9_tYpCht1B=PuPMNx7mq-tx-O#^F#3KB z7=+)N<{iTNCb0|n+I9b5jIrzEZ5;%ukJu_mGBT$!M>9(Z=A++1`aIVEoBa z{3ibBPSxm*yrOfXKuw?mIR8vkVGeVm8QE0r&sWVfk|BzG#Tei=ckO1v)K$5V{L06p z9y@7?_I$seACl<*^ZaDkunkmqimb989Hh6LZZx;p4n077@Qb{ecE&Cq6?vb<@LeZ? z|Ka01|9=>4EMpa2hO|qf={<%;{#XsH7Zym*~S+qM`0qj4ivy;G7H~)$P5pcVH9+`j*|}qLEP)t%glMV zDcnXs3sAZbZWXz~fUOD8T|a*xQ0t$-Ug&lf5bVBrWB`j@6aarfgo52yN@W66R0IeM z!Hb16&;}TP`~c$*V^RVFNf{U`!u=7?1c-B57@{6PI5VjUkS2ow7lG`55ZCp6fR6zm zuywHG1Muy6{0G=?w_;!k7>We&>`{{Zhj}nG0%|%U20h7=+?~8zK>1H90jx)WS1pVZ zkj<~0k|$D`bSzmBEKNQZu2@s@)Jqv(RDAj_z8UawU(j74ERvauFQg4>LvdFbsX;&*h^THh4VXX54sU)jCfdKz zP9x(}dh!wIwD}ui%LjnE^UqXuSIDJ#GbN=rKI4|{50hkfsuK^k#ANlyTt#n zxekJ9=(hN5(I;;{1Lo z|D%^Zf_d!LZ6@X(JVm$64MpJ4R7k&ThMdPS=(#ha-%k%1^TzI&aCI4Fnp1xGH**WW zf<%$l7W-;A?81antY23d`4)aGEUA~+|F}H*@@kYA_j#kQwYB&IbxRa#fr!EPjlPcIW%mZd!us#Dtv-0-{5NvH*@NHcUs( ze!%Q~Sw#K+FTE@(t7TF&X3^wQBUbm;EDL28Gz#1Uj>k^AOb>E# zA}cVY7rEZRR=Th$V-)N#$Xh&sH1%a!hg%B6(qFaV4tupqZ80>W+0n=hjM?Qa)S}l1 zff!ao;PUNJ>6^F67?45Rn<23K)^J`E196l99*M%K`?r5t;w+wE5dsiJlJbLW4c34y zqMQ5H;dncZ3(N$j51D;`22UUO0qD*vj>*zp$;ZvyVpd`x?W@%(8{yA6!sL=L>h@=8 z0iH39U-%{ttq!c2Ahp}oX1TldnHGI!S#c|LW~lsGmQ@Y;#mcZ|oRQsG8?51&5@fVl zTCCxCA-YVmQLP4P;<`+o(tUYLTlGjJp}ftUizLpd;=IW!Oi=P|Hc3qpyDVCVQ^`rH z^yqcSzpOT9flxK3o4)xigZzI4pg#nIlmdK%06N3&OO0U!=~hw|s!kV)#Hw^1L6g%x1emtzPDp&PGf|nLgaFNW zDox|Ir=1IZ+4rSdH}Ly7TM(G1tDma@Bo7GKt_tASrZvmZL;v<` zld`ofNKp@!2?bdHZL~;(&Rawu1-47hc6Cei<4D_Mmw3m)^8pFd*zss^>z-{OK)j`m zU6V1QiTy2;Nm4Z^q^mOgb?1LHu59c}T{B9{{+PNKfyv}Y-qA3})c{aQ-~em(Tt^%h zS_4}g5PZGGAPV8|VFhE}(qID55+F&<6L4sKV@PgrrJX6J8x0+TI8PJP?0R4Cl%v#& zFattI32_a>9565lWL6^y@wY9PqI((lzt}L95o(TAuy1z6uB5a&HJsTLmw=Z+px1~S z$6JNrgCSLT?1$B_vzJLK>u_{B;RL&T+>H{6X&E01(Dsld#Cj8SK$K*PPXO*XAh1$q z405GDa|`mP?Rn7wXjeVpZ55Ug=*zMO?8j8%;0*l0S**){%oe4?7_aj1Xb)4L##47h zQ$P1yXY;T1sl(%`w_4-68I*Wp=QPI4 zG;jQBG@=aKf5cLs^;Gw&OqAw>6E>}so4Knsr|s(i++Gd;sTBUu?>!YqO;V5f#N2?e zNgB4~__-De&H8uK#VJx%&Ttd-zri~_Y_!@xd6H#I0iwBG^FNJ}tH|<-KqzlI_hh6s z%^guv73*Ae5&5Gs^t)*bi9z%Xsfp=ga02a=^elyVniKx5%#gkb?SCrar#MUZR__{D zmo|WY)S3)(ws}}e%la3#6{Btsr{#qnOpCi(qt2;S0#dBYZvQq263_6W*Z*lNiPS zne<)&NmLfeDs+&&?3cUs^3sBYTZp@Y(p$>7s9%PG5_+wX z$0%dNa$AULbW5<~(l8OWG?Zs>2uStvFlGYS3JNDN5~jDxAf20`#!{SP68o6?@7~J{ z*31T4PtE0p@Qt7U&sF~S`?#mFLzg{;IpjsHHa^_NB5zZz!{{a<$w^ z8r4t97}~Hk%vXh-6&!kZ$n^YNn*%XTOXL|0r5{uZs#nhtdUpUl4e3%?(K7tChFnNvF27H&nYwRvm6^|cX^d!CNUW1A9~BR6RXbGfR@=KMsc%;YW)eVcts zJY%%lGEq)S$m0otrt?Eha{sXir{}hqj#7&tz9=F8wje;S+F%F* zekLh3VP-(?6!W7vG%E8rjEuO~3kp^9oq|kKZ6b`J(@G9nBeEQKn;~n{w7r40(ovni zt)q?(g$U5prGfLOY)Z zE2lX+r$QX_Mf;`h2hug4W{GMi@0*Q$ZQcgd3e`kEg@t#SP_lzIK|>JOq!2RBh=Pf? zsYOa%^8VmE`3{Zj(_MV~%=}oLOC8C9$AMCOXFTR^g3|=?DOSCtzj^t6U}=OfV}|o_ ztLryqTmri^$n=5T>~~Erl7i7z*=mS*y0Qod5`A{RQ5xoX)*brrP389 zTv}N!(k$&wdFbY6$6d5at5Ug5vHg9w;GKOy|J~W8m*~_agYQiv8vAb5d$LW+gcL6D zXnFA3_~9Ihl0dyldRhr_F^!}28%mW*zEt(@m;itlgE27no zlR`n{Av46pc+z`1(yUoxnmKHyQc!hUCeVg^6tYlPA|lTiC?b#bSs$JPfZIh=%ke6g zru4H8rFcR>@6t*^5f%f4&J3F@6eJ5BG2S>)$V@sR;C8s8A-rY^!$^3s86p^pXiZ0s z7$Zg|@}MaFM_gg&+1TU6IoYcdhGdf&0BM!nrYZ6~MW2S`Wa#35TlGa*zPxD9$<^6l zVKSI4?|@2lE_sQ9k+jlAl7X`?$Il-X%qp2^$Xtt}idw(ox{y6uhC0@Xb*x7^bO!(Z zVe{&{FG3K=TLi|?yL>DW4h$1X++)A6jwi;rWT|*dot@h7d=`arSpJQ1nc?mqJ+u=h z&W~^+q3Vyl?8)w%A?*nxd%sC2b-(M;K&vQ(bhdm*siCp^Z&MAj6vCon&4wOKQ9+CT zf|molT@h~9lB7oyU>l#x;}p}CD$o~8TjNjNdNfpYf;}>s`z9;!%FrzsN_QIxWF}&m z6!pJ_rQ+(6!WeLOX?AID0`+L5ohhL?FC)GeRMHj$dX1^k0C8sV@$hOhwoLdYR}9-v zrMsU$5gOnU`oX4WkdoprRZ{pQLmSa#^;3J3EA6TZx6=YP7gyC< z!^x1-jnp@;+|y19g=HIw;u=j}%E=fLZ{+u86=`YP_-CRITdVuvuzCsI>5ap^%UR3O=bWfN0i*;Me{$0L<7sKkF_ zASS?!@9&UVJioIyG!pY+MbKat&hc%U^|#)1irea1~%1i=WI!T~8d!GxFDWi>zv z@WU3!CQWA4u>XFEu}@dpH=Yd_#|c>gv1nTKNqu!D3N@%?69T7XgA>a4_p}yT|WSA>`e&emIGdz=$QlFgo}}@ z|JTJjo7iO~-z}iEGuVWGg3g~?Qyn^6Rg<6SA8Ne!y|ChVe}A}mMSoiQMm8slyphPY zH8Yy>(C@d^ahn?;*g8#n{>I{wH=CB8*=5V%E`uqlEW1ziBD92*bXK_HQxM2-znt37 zpYtq3fZjzW3t5)sdTTo9qS~d$B|nzPAP@1UZ3=N{4l=bsKh_5?n^4VWAqz3bsGqa? z^I{oD99(zPu7X`mN;R@MQ|nKQQLOX|k1`mfo0SYW3*iK0A?eAo^c_!bdF@g= zY2w{BmT9LqVPbl}ZI8{ma!hJl*7uCH$pP@*x1z79d}QP;t?SNSk+lqwCrPf&xew}j z%ZzFD(oL3&8o6*&s;*~7kL#CRIzUnwkpB|HK>bg8nE$)1T=-wr|5xSz*{+}=Z zPu(M1e&JMIMIHmf;8ADM`oUfLUhoa!Rp6e0#8?C@2#y5WApi*~h#*lKn-4}Rf!Z(u zTY(5p9t>od39MyX`=cl;Rs4+G`>JwHP5SdeL-@}&%%{G=XS13G`q^~2 zmiI0C5~bNTrPE7d+9DPyr!JG{28|^z>1Bfq`h|hG8vUhDgUo zKap%`3dPYyFs<{5iwFS+va5t8bI&iW`%}yYv(ZZFNcl!L3A_Pwl}4_iHSAS(9|8Jl zHg0;2c@qkX9<56&VZP{tn`+IU;+zaors=OViQEb;RQqA(&U0Kde#fS6TiL@&x{QvM zYS7h*+j}Td^`Yzy@G~fLos${MWOnIVfcPGj8Y!btx}TWVIW4>>E9B1qjO^>(G;9XW z!{BTi-8A;Xp0Ze4halV!H~u!!KXMeUCv8v&o=*%26j6^AOO2!!7F%2;suCprOxoio zQbA#w9TOeHi@KB^D>l5<g8B?1;UZGxzYLvHz4W9a2aRH%F0*{>)4h z9oTn%ac20tHv?JUg1>(}l|af|g5yc5>ZKcBQ2QHgT<{&*C{-B<=tw$Tyk?sh|e7?C|)@ z!;kf)4ep)e*R3QD|KB$8Ycy6I8u2bP9=)g}7N-WdT&EKQZiw+QH%5i7zs`#h8p-xP zE7!v2@7MgK-ZVSvLh%~AI_O#lBQb=(i*Q<{^*zGQWGocfyTZuXEouV^;7L%~eBu;m z3On>(?BOdUvp`mr;0V7Km;VN%(J#Icjw zam_<*%bv`eW#ck*iMgU;iww)9+KBfV)r3+Kr z7@qtgpnFXCFw9PRAz0{U8OyPV5$cUL5D?*TG3^ z+cHoeCVhoV*R^X2H^A&;XgqQ>IdC15kk}^Gdq^MC$C7;|ImJs6-^aY+2WcJPO%P^n zH4$cjD9i}A>k+Z>70^e8i#N1TZs`Cwjg>r-fKN&}oQQoatw6*l?Dr9Y5e_@mpGn?? zV3e1yVYZ9Y>N7lSPN7)joOzxV!V9g3ElMV#E@x;TzNEZk|b|o{oEcnV?A0^GD-2M_eg1n-Nc1Y_Q?SCC}^*4Gbvt$bQh=V`+;bh#i z$$7nT``ez`LH4sJ)OV!fGT}EVzvuJaUsuoO%#84_on1uUN@exby%~UopGU*5U4l2z zQdU_Zb$UT_h*KJ++N^o%lX|O~EEu&Y6*>3E$`l(TwK7AQLW6d>zDJJR)TCoPEjjgY zoLa-HKp9^n2~|J!hUVg_qCURsnz}0JpL+QsOyf`;!iPzV)1+#y%we7C?^@-S%(YFN z)GDR-Pnxo1K95>$>y95~PVkJ@i$eIEs}+36jm7#?OU{)MPH@LKs@LGjq~q*-UIh@i zt0H1rVXTac(G(?=ovPK*avapOqigvxU}FYS9mLak%Is4IZcUBaShh-$7^t>tcC`;{_!p!hu6&%gYgq_%IEWf zCJ?Gm%?vOa%Dk?Jaq{``;|5Awj68tk0Y=Jq2?B{m07L{ml(#n{hrg+_smYv^wvgUG z#M#aL@hNNj>dkxlJhzJ-8=FdW#)FKMR8%Qt-?QU74`h2ZP#>R_?29E;KO-la^<;54 zPwu&>KM&y=GQ*!`N-*<5v2@4A3JgV8X;=H)sK**aCcQ#803++;0&g zuZf*J3elmBu0MRrKlC{pM5XlG(U6V)fGuMEqlQY)89b_UNo(VYFj_B00~Z4*+Zh^I;Ur^(_u$o^e0 zQ-y^VnaEh>eo;()AqGc2-ox?X^E@Ys^uCXLi6g;hbAO)9Gb@5CyM*OgE4EjMrpQ^% zL)1hDrCECp_JvbW6z)N`PTYM}^`7;9yByci&%UEN)uJxzgj-hmZm5*XNOl<)Zuoyr zXc$9m)xAwSUu1K2656yq+u3X>9yKa@_nGbKDi2IY4Wbk@s)D!$M5-aT?Buh+?HcBf z8WD3O%+Krgn)L`>z~mS6A4vI zK~YbT70Ub^R%>}ed3VIn{k!+$(4c{)&YSJsOH$loWWV4Va0@%eUz_E@qrGg8GD zrd&|!AYigUL8(B0Py!6dh3dhGJcfRqTCZJ5iS&}mLqPw{)D3Lai7SehqkL3Y2rC-e zF6An`v_4@t>Tyyxs?i&LUvhvW3C8ux|G~LRs=AzsET^LQ&KNjFly@l`RpHdP@DlL9 zO2i1AbwaAjkeCFwPP7qnis0RC9{KPim(H>oQ=**t@GM4ou({_wI3edbIH4z6GcYf= z;wk59-e@m`Go;IMb1gVn;c(XUCrzb1Cqg!e)Dj~k{vVn3elyaVMkzexyErAeg-SFQ zzW+{HYg-Nz?t|22Nhr&01wS(6^>6I4TWnM6Y=15OGrc@7(-Is~{P!BEk7~Sp;(Z-S z;k7DNY~QNC)St>@`@Ap!7iFHwY%|uvG1lW}MuTilj=ubFSC$6J7I zf*g!~@Dl9;i%AbE?>50p4{e#njL-tMr>+FCxMIMVfpkPmf+Hz0NjOY;3l&Y?* z+txLaL>@L0smy>_-JrD0K>jK8%lKMvz99W^8yikN{GU1NX>I2;#in`h7!hg<=4$#X zLyhJUXTW51{UqwcmYx#J@t8sWszdO1X?jg95r4yae77Z(1c+i>w@+%i&@z*wQ_rY` zryw8Aj+G+@3ywl*tU|fU32tq$NaoGYarhJe!r6ZQ17!- zaWkz!Om8)ekdf@F^onAkKP(>MZhx2+fypZfG5n9VX1XF97LiYJsxN71(WuoZ^VOg` z_p1NTacwlT>}I>&&u)ERo+ETaJr(kU#!dJN`djP9cB~&V-D*!MN?dT4Fr$C4joM3R z>OICA6c>f9G1E^KD^G*b-?!j=mI&FPm@0v>hJa$MfFzg`${`HUK$-mB)xg7g^^MZ| zfIgr_Q&dhx>Ge5E?vbGtPLZZ&*!Akfk9pt!ZIpf}<$nm||F2R<^nxr8<-uFCH@YU}PAjSL#ns)Arw7ysuFr4%|L zSS?sJI?xgekTrNf3tXQzumL@+|LXq2TR|P7CiZBMXuNwErp8)6rS4$fde0vb4IyTju7hPuJ;2$xoLsH&i>f17Sw@3M?P zOZDxX7NfOGij|bxR99urT865N_07{30Ovup-QJ9qt()FIkvpH_*EZ* z+5RX>v2CVhUmV&yyJF~kW>8PEDSYjJqdI*M=4(Zg=#8KcF>Ee~^}KQ$-L{S=hLRw3 zp+QTq@o&4Wg1E>!{EJ~8vv2UaxnuTS0{MQIHoS#B+PeVC9MO75)YG?{$PlGYr?w^3 zY!qT33tT$GVB?9+_aa*3O(KH8ndPbWrmk=y@=A z?T1d;+U~nheErg_f9=J)az8eqeryInx!K#TqyI|0h+=`)UXC#u&QwA<6@1xyB*rB$ zFZOLrWbqraEj`uk--LWfy{Pi@ykIr|Gl3b)kc9;&_f;kyTIIGoi8PM{RPlbjnH@um{qIASs80F?ADj^XodP{&BH0xLEhiR?ftW} z#l)zI$nK*!a7oM=U+Y<)W!6jM&&%KJd+f`sitSq$6w9nv9b5F5JV#FA&9es$Uc0;G z{3)ukP={0E<3m*To^Z64nN&~J;82!N`LesDC)Coc1<|K%&WQ@>aXW%BkfBr$gAXwL zA{YWNJBX)y80A_pT*_9-k|)yQ}Fx8Z2JG=?*6T8!>HIIVpP?@SbZ=%R8@MCkKk#X z%oB2ptit}#xF$yULal0s?h^YTZ&+kNE+d)lj~b0CpbaO<(K;QrtiUF^(Dvy8G7IF^ z-^m^{V2RWuAH6}8GK}gjMDUH*I6rWD)thotI$yFBh_U6SC@vpztd3#^n$1OaA(+}+ zw{WE*YnYW0nR4CY3gj9r*Wto!_tj3$g7v{t!fbc0oC4pfL&k)P6kcQyZPzKgxo#h@ zTY|;A2GSG!%S}vIX>ge={AIde#H%UTFyI z#X=s)xA!k8@+7fjg=o*8+Wka3gtj>D4PDF}wz!*uUkJldJ<-?6_Ro|X zVZisc{`32_-V?xhEns+_KyhpB9aCM|1oPTpvtHE5qLDle=;mNKyVSwp z!x_9KwIp_OA15aIwy@w(-qV%hEE*gW<`OqV)J34X>aJT zi^99Gb21J`5g%xPAp|ovc9Nne$4D?^`Xi;r|70ffa%H5vPoMtV(e8%W~G8ed?=w5?A56u7Pds*iC7!A2G&A(6tU*`s_1j5YqUrECzR&P2Ka?S4EfWxysMf64b$u)e=mgA0ET%g|dV+H9=a5 zHa5w3IPv24uddDki5Z{R6y{%aumA$S9-5mlopRh-IZe%94a_VMms(*&bYOdi8g+-# z$MUQ|wO4pf*$CVVk#w-o`w&h?Awx(lOIpyIa@JhBj^Vm1@NWQan0g)lcuqdu!`aAA z^x-TTAFM6VZX#zsOqLPZz(xP?$(z$xJ%UAMLW3l9Vy*2={oC?ko%OS*glgMvT%~~- zxZU^3sQL&2N(vgbn!MN0eS=c`>yx(I>0|5e%@=C#_p?0RaDHpP?BpVl2w4M07*`=7tXQ zlTS@CHX;ElNEu(y<5-wd)b=AZdpxH1M81s49uW7`3CF#ewJfS;lEw~-H3ygdK7lpC zpC|IJiv*@t^--*xIoUbt8XVKMo?bVYq1RDd7aW_q8|%Cq{2Ey3q!FI63j;25$nbs= z9#%;O7c9wUcDlm01FmH`aoAPDrO4M5dWgd9zFVWJl2;(ceO|5O_2(YN(RT{TFjW#% z%zfT1YTJF@)2`U!p;c=5eO?-q7>>+i*)o^%X*(ZS2(FkJqY#&4W-LQ?=6)ll{iajoV;re5~^^zxmOy4*$$kQgJJ z2v0e9oY1>@SOyK;)U&PwzU$fPCq)7g0(cm+WrJe)W6Vh1j6kzs6CjWgKvO@Q^3cf( z4?OMyRNDef&6`Yc&*+v{+Ttf(;ogl*ioU3$0eq97*{d-R($tgU*d8O^VgHbSfqc4y zqKTsGgwyuiK5IrJFA^r585YfJvoBHmNbor2m>h7Nj)sSlo;IEU?@BoC*jRA(ao=z+SHZloEAQCw&JtKs%P%m*uSVHu$0U_ zF?bR%eG6Vekj7iH=;CQFE6I=+!$*X_N`>bM%+0B>#=uQy)BMBy#Zytkyu&LftLY%O z7(#$Z;-rjj?jKyM6NyXh;lEON1(H^h+%)iR6_^VX=MmVE-a6`^({vUd zV*NB)nU-8!&AAVC@Iua*m-X&gX0FYsE-2gn zQRHG0ZOrk&THV@p<#OKc&GgrqV0MkcR#iAgE z{V|#~J0lh|F2)xE3udpyrO4Qx?2c0T_Jd|OoPA5fvZr^Ad^^w7^OvB>oHY-1x|O}XmFbZ$i#c}=n8-}8j2uphJYy=eRZ zl#jp9U!~pLG|H>>HF?oHGwg%5~=a|G0 z+?%;Eu66%c3N9QH+}R<{1;Wy3fYiohm@? znaOhS#2+^b$!kRFQ5RZKnmT*Pj4Bo@wr{03J zqtbHj-(oj!%IvE-HQ#J6vqUMD(Xg2^vFGNkBSZgwe>S$yWmhMc4~A5i4~}86tG|X1 zW4NnyQW}j!dV46vkrMOpb60zB%GkXrflW>gDTJ}yq(7pSA{d977D9_2p(Mm(l@QzG z)`;=M8+^?vQP-RGAOvGbd#22YC+<1i%j|-ScK7EGg_A=j8<1FJiB00jiJQH1Pv0KwlZ)U*RCNNbss3hyT3B#f|!Okg?dO zgCx$fv-Q?A{M8*2ad!Xp7q6}L=%C=`O53Yu;A0E5r-M|X^51nRHNxkl&{zAy zD+jcM1iaJoPy}tVHL2tu=Vj2V9C8>*W zyRd!50K+Fj+y%L%fj#~Nx!nuw0lAgMs{s64GH58Ur!A4i4_R+@nj5t(6#rpRocCnv z2@v66me<0RS<|f;J5*Mhm{@mk168b2{TtzCV!A?m52Qnt_zpwl&Evq}0$m}2n`9#3 z)fHKm9~cQ^Ifq=&2KjXX@Bi{-KyUj4Eg=tnRb!<>9+U>2f*-VO_4ym+_Y*Asf;i~R ze$N1Y$zOQI`Ce)W4w^hpwXS3~`xB`UCahHRdC8&qy23$D*kFe+7Ie;FdE1sNf0iUuZCHx%a4(t#^Vemnc$x9m2l@nd}LR(7r7^_rS-og=%$h@A|{36 z>W{kaF>&*PL**m_3*lMPYgWyeW_NW#)#HI)*U!QGSfH43tbluH5^gqbIz{x&UT z7Q$X)`&0>RB(H2;S+UXs1d{9*WS0{YhC-pStvWU z1;!cUS%0?Z&u-nqFS}#gjiRqYK8%l<1c|R>u&xHRk7MPVV7$M*6FvQkMe@%6?Htui zytmQNO?!~)AHDP|hk=XL_o7W;*UhcFW$!@UZQ7}8mA}4%%Jb8w<;BIhFDT0@z zbK~AI5CgCA*Ox&o3g%8$wsPsCx#~PwfA<9|3550HMJVA6TqUx+HgXoZM`IsuN~Cpc zGSKO6XI(^$6p`I{9FzUY3fJ9dhA&VwFTq#)_2yahvQ+KD5lQd1HMc?LCiHCNc~@co zN2)cWT%N}%*MV&6HeOaT*M*@ki_~0J{;}MF?9iwxE~KQ+FZZyA zoG?;EU=H&(k!noxRYU+K)Tfy7fK+MpyW+$@B?-5_V$~9xzF$HpXfE{@2H$|92fu>(BL&w2uy(oI^6cO(3{0`&zfY*8cmrllK_a`Hwh;)G1;i?oT0G3)v9 zQe#*FA5sV(N>cjl@g7C1VtFyK+y1+%mv8KMh-_nPsDvvvWwgN+x+5(0FDGJhY@A-} zL1n9hip=N^;c^M35Ud-OkCLhmQZVA>@l!8Eg$Zs24Y1vmZXL6xlzH^6Y=Pt8`J+dN z-=gD|O`K)xz7D_3`Liz*oqmsI&2JK9-^wSy{NXCmOLi~Lo_;#IwJz!+ez#)RJ+Yw4(<&hKQ*5gDx`RFR zl%S#8yWK>Vl)4@ku)H$>8(Z%e2)L{A6WpJd1s9*OBv_*Z+Abm9JDp8vtI?~Ay)S3X zz4%lrOut)n!yRmC^sXS@Rbo_Q6B%nr@I#HAU_r9F6N2~|QT1@f7#E`QiGr)kPw_J` z@)y?vQ_Q|dI{8gbv6(9&KSs+N@EZ@U5d2sxN2hSR#~TMOS=!<4rqk};_!;z|Q}oJ6 zFWMZu2e@o8vnu$QwNGMZ`rM3+0;(@WKGI9G|9#%6Mftz$9@(+lt7%zMC{v0RgbsNO zuJIRs>5ycBgKSU;`P%O&;Xz`B^g^KJx`%an5proJv@^knNnj=>bWZ^GV=?I@5w%}! zjwFd6}&3C@D&pvJLy_AQ; z*Y62VN@{rZbG1HC4nJtQ?^CHhpYw12oOko`?x`jEOsVtq_WGe8r#54|$s| zKJsB>PtUJ%R^OF$`@r>xgSB&&t2uC3rHru&Dr3*Qec@HXqTtw-<9ynVay zjY?kz2fn;kcii$HYu?PeY)R#-Jre8OOIfjo3CWbzaR+ zaeucz8FcjY^;LJLHGEpM@96KFCa#~*^gzQGC1j$5ta^SvDa{78+?2EWw7AT7UuC{vA})&n9@jT*4)>ny*>;5tBK!G}o4%X~4 zWnjdSw-LuvhqU+2H{(IhkZeOg&Hp>T^TzPI$WE#^BeUgX?^g zG~?95TfeQ$)O2^_FMb(4-DgIdy8|Bh_BiZV?pdu3r?+-nSHDW-qi0@kXfuDx{Zjc> z)?kMJui9ea*KjlSD;;&%7;2exe-SSrt?kvo=dY_#cBqbt>ehu6PQZ*!#=#ecHjX!&x$)s`2A zczlfee$kPYrAM?co^;T%ZckvQnKNqyFSIXP;`r9cO`(_TmsNav7xGXoawEHdel$A{(z$|@u1j|KHF||xmhW9zDWNpgg4ZnMZy;yo;+QB>X2OaJ|`M|MNS&9w2 znfX-6=C@<^9DK8GWW~7)jxQ)J?y`n18#!t7rA@2mRL|M;_56K<{Jc-!?O5}xh@ zUcS1Yd=~Z`m#vQPci--qxZQVGktF{yKNhNY_42FLmkXY*w9sz)vMnPW6FllZ^Kt7` zsb0BFc~c5jXjE(8z{y!lx2l}A(8vAuJ9bT3P-fJ*vzw1@FBla%rc1P6hn7y!*Mb*~ zsp>i6Qo*c8?dE@R{BTK^V^j7=@5|b;#9!|kUwhy9t5>IwY~6ctU!y`>5+*P0nyKE& z7LA(T`g2sPZdD&$PTk$?NZphYaY2QbP3&Ibx5ZUcTF=Z<;l{k3H&!$c+Er~)c>D2t z9yciKKJ@6sX2;4tFB7^VF}L5tA*&~S%(3sH&&|&p2ba!wyxNUiZ)RlaeY5x7FY8Vy z@J(Fvw^awY{+Ma`h2=E{1+N&pG4qJfsehgOc(Ty2uS52XnEqGIlY=*gULJqiukFey zeH>oZ8WFSWNZt2K1D`k9dFkfE1uHjd<-QvATsG;`pefyGyDS-P)? z-#0ybb#1bI>cq`Wzh5+W;pJDJ8z0;CI^DfYaFD~JxE)@L7h6W3PdVtlvubkh-!HBl zHSFn>#8!Eda&{f+nQ+PFgmcoM1i!^Qen>fe@0j!E#_zT^S?GD#wMmZ&m*(Zo_x5nZ zETb0<$TN1zqGmHjO#A-+ty`rXj=jGb-M?s{m;0eHmRF~KUz>S=XVZ59d3wJ8P-N%W z`ySo{R+pMlEOGRQ)wz!P^(ZrbPQ>knjk>m&7IS{hD~G3bSN@qjcF7-`W*z+^w}o-;G5l_1RlKZfXA8i-X>Edr-B(o89Lp7R|Ewc4+?AV|UmOI}`NR zZ0C8kehON&(W`8YgySnaIR)ffRCamW2P<~>eY)b-tleAAuc`E9i_E(&?2P~NVZkHQ zuT*%s_HxmYJr;XRua$88(ecDVV>Vwnm9N{oF{L8r4?a?MNWc4q&p9Q2yYBB9i^`v` z-15N81A&e1^}O`pVZ(e`XO~_V(I#Sg*{!o~yr{75aA^BMP4kUkwr$zoHdV$9>yu^A zILBQ>!hK)VIG)<(bWonR%TLC*ozFAue*2y$KK-%f^ZTIn*=~kjeBR6{tkBY1%{%!& z=rCqUxm}retf{fN!`J!C{^EXrVg9vKh7T(JzG|V`N0QyA)qByW=!sg_dk*Ox>)9r@ zOzqI;BddkZUpCsa-=D=Vm00&dvEVNAer@OR zi&NnF+Hq9|-~DY*-`n9|o|dN)h2auru)J1{GyT<{l@`wd&*J+5?%cUdpElxr~c z<%IiBX4$8#dfT|KYwo#CR@VOR*oFZ$n|@R5`udj>%4J&Zv;Td;235=Cp7VHPSec>y z|IRw&ZLMhsGS}(5$Nf>Z>AoJnjks`n%D0Wyv>Z`uaK5T3^?bjxTz$UwtFIsZ{728C zB|3(OUfJs1?%0*J9cPq(zi0BWGqp|+Uskt#!Q6#Aj*T-Y(4clA$bD^;h zCZ7H2H=i9f`?fkf+529N?bF{ppE@o3u66lNpZYo9ulaX5N7Z$344E@|V&toY+h_kS z_`H9I-%1|LIk-y^-}yuB+l_eK&cEELN9C8#S<&yo>$W?lbZ?PbCm_dx$)%Q_Y0@{> z`0#m_LjxA=%$4uS*_E}HF8Xl%!{f0t$B(M)zW6|d_qyn92i8YA-TYK=fv0oLm*sbT zS*xh~^(-yF8oaDbi5XeOtnPh!Md3TO3w(0FygjB=>9bxH+uT|FtXOpQ9_@Z^a5nHn z>XfU^9Upb;FwLpx&byy_wJg=Q^M~0;LD1N=&r5sNS_jCEZd#%S= zYW4N)I$`V3xOa`)*e~q6d|lHY?6S1@IR9GyS$~z*PL?Q|qu&o5?@#=c`|6qN{pwtw z_q&r@aqms*mUn)cZ%3o0mJzL|?Kt1NM9iJs$#rfO{A1?&GZD}7cP&2lx2bjW7YP0K zhW5JV`&=6y&zgR@X|F*O-(RSEZ1vKIfA*==W>1+*0hRJynAC8RmV4fhRc3YB(<>@) z@#E!{>IIdGy^wR|s_5>=u8uia{nVM;7yEZ8*1z!ARrf7vQ|sp3o!{qgQR?)zI?d+o zxa6_){@E`It|?w-?q|Q|{-Gsnc3jeQ-Rgab!^ivneyx7(Z|>Fn;oB7xzjWvvxIUj# zrR#BSuk!@gkL!JI>f+g>D%YzQcDhZc#!dJ2FLuOzldIc^U|Gino3}Hh+A@{Z$T5{&xrC-hYlV6I_1*&@(CVy z*4?vgTRLaUgkxjg^jUf|cKYG1ofZtJcKrLlPt`8|!KvBKHeYmVvaX%;;rX!#hi-5S zAJ!=M+NU2D>`8okZ05pq;r@+6OIB-ic*f+Ukq-4rE*&>}RolU<-Sf|jZ}@b==+8|D z7b}(1x5&rT@p+Ou_RBoD>fMncrDs>Uo1AsU+DX;F-t=*1fyHB{zB^ZO{junJjW)dh z`J{h>+x6x7FJ0{y(RN7Ow#k22pR=d?_x2~pJ4dO}`)Al+{E(ctz=w7pT7B-f|Iy^k+w5-Udb(lr`c)xc z?mbr|%bbVDB9Hqz)*qjx%-~~-vdkJ@uYc{-oqv}4yW9ET-hYkRRyD_@sNolD|M|Xj z!1WF7*7Qxiw-fZu3Bj@uqb(&bluP^g=r>n*9ms|1b_r$p)3aon2difaf zSNduOhD{&+I7>&DM&l~iQ%i?iy zC;pnaXy~g`{slXB8@24RWoiCD8n2HGh`8QB^KII+zDot)ww)`wxYwvwqnewRNo3na zoQ~oYT>SQmT{1D5Z##a@zn-Gy7uV>p8{R>)8=Bj$SdPqD5`;LnV8!(-nkIkug_xTS z=52#@v%$h`uy`A6E;DKa9ptrEf23(6sUc0AbI-JI=UjM(Lvv@BiJ82dD~xmWb`J1y z?kC<%?BLwb%Q?XQVm9XrUd}FF&V_}#iF1}_**gf*6o^lDVMT4q#L1EM5i4wiRk6W5 z6;?r%?Zj-Tc&vFj=N@P8>+E9RH=A>AFN(w4Id@ZM$|=lR-8%V+%3*zyUxxDb`D{7*_`VNt;ll|XO|{2qs=L!b(?13 zO=Mns@m!ZNQ9h!W>9PG=Oz|RyN#aZprx9Og=Xg7ZhPLBr^>6V!RN@&g;?W%Fi>594 zFF8t|8z1MRBCa#yyeQ5Tk*h>Ihnu!zYw~ZgP2xD@T%$R#gjq6u|L0u!I;YyTaDF4s zc>5O4lf;=I&V}Mk6z5uTCW&*mIFrSBRGeq*dx|m+HSI;IuA)>kQL33J6(Q(IoBiEP z_NxaiDB_U)I!Qck&dl?_#F{Mfl|FYq&UXb%73Ujq#@ma27iWTfXE6pHirEWwGm+_0 z;=7k1!T%iJIgW+mrQ@ME?=s7sSv>jIT#Ed>5of%E$fY9In~}_OD0iEGf|7ky*LxZ zxlo*m;#@1vBysK*D_pWTkBak*I4_Db#X%HGwI6Dl1X>R+@?s{}G+I|4@{=3lG35Z( zm2GM33awADuDCdn*w|0#yG5KH%&yY&iGMZ|5i<=b@rAMTbrjXM&g@ zQ4S$ue$YZ9mgcmDL@dqDZcWoOYQqwxZfZjkv;U*W0K7W#h7%5u(T*tc~q zJW=$f%eYM5&fR>R13EZ&^K$mH|0$ckMH!2Hv+RAGU7E@zpK_AKKC)k%3wDTE(SP$^ zjox0_d*RdO+4b0S^8zIOsZLiLtggZ;2wiJt|7G7BYNPzL|DpVD)^gLe!lM3PnYoL{ zMxREe?X!24`e+<~V7C2V#&L#0oXPee79(33#opN|e-<7HGN%;;8_FzRJht8=h=q+t zQOorA2;V8?$#b_2_A4{~rr)cj@RLBUXQI8B-ujre_Y`y97`0R%&3REZdFRhYV(XkI zR>1~qsIc)u=f^C|f7u5viv21@oOi`(zJK);E5E~WJE80+l-@!aD2Tn+Kd*mEZ1Ezt zhs@Ug`(E!WcA0VKeJe$rcg2}1PGcYIDfUu_p7!F7pq02I=qT<8yu}@Xx40wl7Iy@p;;Q*}qOS`T za)?01A>&>m9yer`^k3qR7qR@e`x4oPp5i{h!PQ=rY9-?9C}Q^(v3qmuq2lVl#C%1G zneK^GnHB%{G4PM~#64`^=lTBIeO^>f%Az%o6w`QS|G#3&H6n)I}vf&Z+4<6I_njB*dMQw@9>f_!v&Y=}>RWI+nnX6z{DZjoFE=oV3zn_=8f^%(QN(fEM8waEU^Es8b#hh&W{a z+{EL4%xt`--<&plz0%)%FX1wBKd1dQiCOBux6fC+F8$B@>jpcaZY%a!vDjV|eR9Um zd?TY2*Uc-wh(pE}FCMpM_8-Q`f8JleqW!fk?f0#CgAK7cR%nBzxXy44GWRrJZ!!CS zF<;&C2}5y6f0}RJ%>HlZ+jzy3#>+xx|I2v!Rq>(qBZXPXe;+UZxZf={oeQ?!0y6rd z7r(hEb4Bq)G5h}!&qF018XH;x`l4z7FUCe|_LA+T@fcu(#WIs~hjf!{=$6=ENjBIa z8|2f zC;Nlh#GB47vg@x`5?I^spSAYGsEluRt%|Tb#AQ;`%5=@?)P1JIj7-xVr#X$w?2y}z z)yDhRFIfLBP2WAIQ%8zN59c|Fn`gJ5$zkbnRVXAX5=Sv5&4V;YH@Wp~J| zm*O11;4-~!m*Kb^3h8A;VAO{d3X3n|SczpIF8h^KlXrGnyE?J)=x!$jHkaopVQ($ zB>ibYwH5zfYy8x|FIaz<29piF{X3(}JjG>~V&aQ9F4Grr+dhpxcbe!R-oB*w?L6%V zi`#qg=813-H}pQ)TjNvxML(6ZanRC}NqhR)#wE=m?bEvSR~^I^TZe;iQV)8GBR=~} z!<0%oXgm?8b)F8%TZ+fTT?mRp@_ynmaT>dHNFFa96ZaI~>5x28JSOf(P#lu)7mtYt z5EO^pBk8e2x@SZ*?GJI1K1zSapZkf&PxR+l+7P_ZzsVidVD$B^IPD-mDFnrlS)_*i z^*uv8myRWwi;Mapc}?cI72cM)i^BbxyD=YX*lAJBlN9}Q=E=?Qxikmk!86t;b3>(1VG#%A=L~cCSz;qSK1c0K#iuFj z<@?jpr!Vtlej`EuoK2J+!#+I~pNTr>H|pTpRQ3trOLAP1#6jaX^FhCZ^--)(=6qb! z{WI!svEGf}8<6okiE@+&Pv)NNQ$hdks6|7a^sp-HP_T*OOC^3%#)bkVEuO8$5ESxCgSm4>BD=1^tmQ|nCqYA2z@4fDRI;m zD?Zr-r+5;0J(NDxm?tuq@pNFHU5ZaP=~LXeAm3l=Cw-XfpUVizQ06I$ehTwc=JmLq zCCuGQ7(n*pcIKYU_0M61EKs?otOUyl)>z|>BE2+A`QG3QZ`Rx_!wK4{gK83_or2OP&9?u2pGEY$S0n8H> zK9YG7bJ_2Um?tw2WdEJa-CWbhbDp`U!k;nsQ+N)MK#D(rx&FC_uq>x1)={&w;|lp3 zeTGQ-sPgIS^p$hNV*74<1?S)(~I?PmD2l+VD8C0#lg6~n0;InpG4NXR7>x( zSNb#8-{TjObL>+`@wv?U6dpLTe_t?9Wv;)+EqwG3=0v+%C_eec!mf`8kM!|WWA4IS z`ZU)!0!OW<;?tJ(p0(5a3}EiZTyKt$Ok^IwT#o-0%oCZ*_qX>mPf~m?Gf!50UNhI~ zq>m@37^r%Cnd|Q%2}v1!06S_CO1mnu-mh+YA8+OX%;kLdXP%GMabH)r!kY0b4d2Z&iy{`qQdZ#J%eq_C0!}LC7#J*1P z2QW|6_jOHcq3`#O+A_tb9qYBm>3!(_K+ivO{e5F0nZ-W)6rTmGkM~ONvw?X6b9w)E zkbTZ8K1Wy|)x_}8n=c+dlK#x~_jv`+r{738YBv<0B7#%=QO(l()Mp;gT;``I`-np6 zjzHGCG&iuEzoVGDF_+_H5&INTe3r95sYQC9oy?P&%YHw}KKgqk5Yg|f*IK3bdB{9B zbD3wy9Hev98Y=N*7hE6biccx#T5H2!e?L+@tiwKS6rV3y@6y)L%lvd??#5jDgt1Rg z#V3;We*6MS`b=dWz+C2e4f}*CJ{wt|(ms7WN0_H7{4R5?gQ1t>?IZKt%w;^e#X?7U zb5Xb(b2o*zX6~u*KFs|TK9qTY!WS`*QutQp@d`i2JVD`im?tVcm$T8XB!$;xp3Gc- zuTrFn?)&Y;D-3zm&22nt&HCJ(jOY6MmEvIp^Z3pNm#;4;3r^#6l+uqgSs&Ha(7)sP zy-U|SrM-?eu4>wTsqbzO{ryPs@B;hKQv9#7KE9`+m-Flc>sKmzd-2&J^?OopL$ALN zDIS*5KT~wn_A2^{tWO9q^!odd;$a)s|E}mevOXoq(ChC(iic6Gf1>E)SnnBX==Jv> z#ly+0&t&ZyT`-gNsr?PT{vM=wxJ%bNrM;hETow70`hf<~-+vSjjn7aWwOoq-W!5J| zr`NyI{T;P06#WO*yTuuL{XIqTFt~|?UR!18_4gpf!{@A*_o>EJ@%|6%qc*13=gC9Q9km}7 z|3ZROK2vrWdi^~}@vsW(Pb>NwtdBZi=%aXHZO!^CioS#NKVsLs8|b zgZwZs123o$O}i@M*N2>PrLcHzi_^;iTb#BJTbz~`TYQQMUuVJ(oABo*@jHr?*{Zjo z2`_8H>zHu*839}Vxy1Oe#mk%U1}40V2_I_0{Y~Q8WumA3*;bsb#J**V&oSZjy9l=Q zwZ#0f#S={UPbNICnCG^90!(18tNw%-9+{qxwb0 zMh^53qC$b;;gLasvB4I&J~$>g+Gao&|G@)ADN7ht7#Qv!5yGWHGG3_Gx~ZEczPPg_ zcxY5)bZiViv-rmb$He;g3k(;Hi;j#^ct}`yY;bg%HYRLfXn3$uBqlH-Dm)l2g+>JW z4|ih_;vW;14p{tSf@4K1qxuEv?KSdjRol8KqOOrvLD)di+Q9Izm|%ZPTT2`N*ucKw z!7|(sf0N6?qNb@u*uc2pXyK<@rG=t8rB?|<4_k&EeL8si$3%sP4U{=a7ZY9<7PY8H zYMc5rg}yZnPPfdc6vlOI8MhTf#lKU`Kg6gggi&2nBdpiF)w-h%sj#kR%i6a!({#BL zR1|QRL(77F?GqXs-NxS%93C5J-i;YWQt`gNp1+or`pG3cW@(Ezg%Peu*7r5~vN zX%oq4iS@Zrn}5IHz}R%&NLb?&6d4|#F$#a+RI_z_hl)vLq#`If%xbS-Dq|hOA<@CI zkrsckHUtNWHOPM;EeL5nWYOnx%pgkyPH!MW!PMSbX>AFNjf@Bj5=A4$R1Niy?H3&!(=ReyF1sQA z|01yX2U_|M7E5MyuqCEnSV(M)?N(U)J4ac>dfqKMa9GF4!O=m%O(KV;MbR-bA~?36 z2%uBHz}TjNu|fS{Df@?u7JolIU1Cnf_%{`+VPEmDmc_(QGj7*6HZW|URpcEQ8<=)c zpN_u41B0W*p3$a7+ZKE!#6RN|%9?e7!s|8*=`+;2?x+*Ljg9I=8)WbCWvbJ<(u$d0viPT6(krwK&sauYpxUg9m@KDPmZ1<*YaRMlm~KF)xzM((U!hv8 z%U-(c(BJ9D=|8K;(YmmhX)`u~LV_x3@Yb+SHT1DrR@Ymnn$)Y)#db?D{JH=3qHLa5{tqe``76aolmXa5!T*eWu zFC4ZD={02>rTVgvp@c!Hj*KJDn1A{2?6QmQxSQc^~5cZ*h`Z z&iWAljN^f>%Q+kZ;~?E_0*T}V6(+{e$=DM@<{@XId1RCsQp))k;%{>kB#E4AX?4=T zqYH9IiTkd9xve6#oJ}F>ohn_FlO?T=bR(4%)@181diljIU9l$C)?FJZtf}R!Wtb@ z8oR=rrt8++*?4<^1a(STRvh_55=>9!6cxbGC!Cn6Eks$G>A>T(G!d3l2;p9mLxukZ^?~Y8@$FXOMs%Xc55?;*Ai2+x6`)UTf($a@J>U^tXY; zbDBy1);Fq%M9}*-{>Ha9k(R-F&}rsfBEu|xvC&p5@#-MFjd+m}DPG!$ki3HpYx+#k zI4X&6_r>|)vvJ5z3W+}eUPe5ogZM*nN_|a&;voJ=oRT*sC=TL}#VL75g5n_lM4Xb3 zCnyg3aR~Z!Q}Q(g#UVas(VmJ^@*fC_gZMLXO8zrJagY!Fh=Sy&35tXC&&4VE4T9oO zeeN4LgU@H?s-Awof)2Ib(!i;VtFF?tM$C!55T}gCSJ7Ko-v{)mpzp8f!&xtXo{Icm zf_@bEyaGN8IOc~w7o>xHkY6z2^(X=2AU*br7jrc~Z9$Lz7T}nl2=JjgEL52|hJyZ` zcrNofUeU|^OandUNB;Z|U8c{rrO#c^<2ZZ=9NTNp6O1n7ICN&N=CcHF^eJoNQycV{ zx7NThZ=Jyh^A-&H_t4$}ieBa|2K1P>;mm1Xe*m9D;8P5^{5d6x1@oDYfH>$I=AjsK zHJ{}{k9nvM9DSOY_;fYV`1YG`kK(zUUmX>_9B(~AkK?T` zb9KCp0X@dK#)RKuPI1zY_{ezZ_cZ9B*l~PT1&;kvi@BOlFVJH=oq(fHFB6|ZCi-C} z`U#-NdKUu6dJ|22Hh>;|eguv_M@)Qf0H>VFarG8_&_9>>{GJZd;<%E3r%oML^+Eqh zl#%`D$z1B?xM~i199Qj_tK(`A=rPU(CfxYF5#xTK6vR_R{2Txs>bP=aF5`4HWLh)O zW1c&KzO>NF_6C3+eC4H=+hoJ`UIHx3;|9t$?-oEe9-@U;5h!b zDn1rHYE8Qddb&rIlIQqO~uIAG|2la~4FX&SMIQo=euKLt8(Ki7-=Aj+vvEE?dSZ}O} z&lu37&jR4+v)aUG8|cyJ7;yBtXyWq>IE{5zJ-eEgUHtqF9h5iZt_t6vtHj^dU`|?$ zrx9>mH+&Tz3;WCiz4$Slv~^>-q7P^N8qnjqv59#lp~L(CgP_Ma@0oBr{?TT&U3pA+ zDHGm_Ipwn=#l-J#Yyv&5GlxNs`M(T$?B9E!$8q}-^w{1kv`C4AywRr+bBYsvjNd=W zIPYqk_%t)|>0zP|GttxUpU|P^-}wELjPWlt@mXc!vl}?(;fRTk{M&wNJa!;@Dt%*KMek0GSz5sJ|9jFW(>#c9% z;|qF>zc+C72{-W>272_F2^@WvnfR;+J^CC3jy`8he6E5XeVzkHANvBf^PG#hn&+~> z(Z|EY#|!irPgmgR6Jp}?m5F|aiT)eVW4&8|W4*~HJ|{tsJ~x4*&odLB&!9)20{p95 zlxOsDWv-5YchIAcFL3nfV&W47dh{6v9DOF4_{;-6`m6_zK6_1k&H<-fxEjM!dt|~h z7qX69(#!p{qzSLgoU|BcJ>W0IHM#FJQG6^$b=p19+rvKlT+z#Y_C4rdg1a9TsuY;SYKT==WenFZ(?L^w{sQ%+-EB1U@xHeKMaf z!3Xo1oqu0d%|kxsln1)SmOdpxkK?}vaLi|Y6Q6b_`mQGWAkbsIaloIVp91tDnK-Cla9n-CocaaFRTI$D|Mimn)m+ibapebk z99LbLtK(`A=rPV6;9pC`BUhh{oN_}y^C0^@m!g;T76Lui+Y$5)g_rb+1U-(Qalmo>Oa&iYuNHtF z`+Xg7^x10Sa}qeUc?0MF7Wkn5JK&g42mbp3YCa1xC-1C~&*F++=F=7Qn9s`0)p6wu zdW>@p_|u<)lKI~Zdd&YJ&{M3goQF%G$9!G~J@(5Z&|^GWNyCu5(I*RY${YF=0X^oq zH0a68V%TaPpw9+*_EhvT&&@%PdF}}OmGG8%3k8ntnry-^0mt@UXRfyQG3e1hlZ(+n zYAfa;Cv$bY6$X7y$b+k*mwBiHddx!@aEyNt_+WpH20g|<7dXbh6nwBB*Mi;|;@__5 zW&HaSKX({TE{$GI) z&c}JcG5<@!2lM|u=rRBMfTPbL6Q4^a`s*h8N1(@g^*6gkJ1L)7ZFtrQw zjfvEHgCKJ`J5@%sTspKc~T!JtQ<5x~)BoQY2Y z=+S2#aGa0Zz=v{S(X*v#??7J&=3}<m6X? zGX(VLGX*&M%r@~^272__0vvtznE3nxdi1#j9DS~v_`EjJe>TzQpdStpM}~d`j`fyj zuJ)q`=rMj@;ONuN#HSbN(I*Z#&d0C7huUD#Tcc?|fu8=iyBz>nTnd&m+*|d~~QxE`|^0Ejx2H|Aj!0@l*niJ~d2yd`2WT`pg54K1)q}z6U+}{0JO<$(4$WvaP$c?@reUH`b-0kK66ccR)8LTb^%A9A5DCI1wH!Q z0FFNQO?+O19({6FwcTF@n5+F&9XQQ-SEIPr7<_QOYOd(zdNl|1MPR*Jsp!Mme?91N zz1jgB*QU6boiPtfD~69OF9 zpFzMeKZ{KG788EZgr7CxnQG|G(qGR8iidJt`jR=d=_&Azz_Gu2f=@2chk_p4H5@ql zj5YCDV4`1cqF)bstoH!$XAtKvCO*G`9)11>jy{h}eBOc{eR9+kL5PF$j6V69tMj@# za4O?Ug~ibdd~iH>QuG!>t_=r$F&NJi6ulhJ(?O5pc>!=7&s%^$7j?>Ub-{$cHsRv0 zIp_!Ffnt|F5zHwEI6lV#$9zr&9~_?xK#y^*1CBmhO?-}+=ueyI{{%hO`xrRZ`^v<} zzLuVz46mmO07st^%++yK5%d^OL*VG+ZQ|n(ochMq2u~XUJ}(55{WxCHQyA;A1&-|+z+CPNvR%=j{{q@IRME?J z#e*K(^)+*PdHey|`xEFfo)e0{jORDdyFfe_6}^ln1@sut9p-90W$GCDq&!4GoQ;@g z5)9{W2jDo~dVmkk-w@DaKMn(qK4VOL7Mtk5HPP<}J=S{_IOhL0_)wc={@;VX6y)Ej zt`LbsjvtwSXXfOA`7aFomAEFye>LD3XBcqI&miXN`Y;;w=rb2Mws$G`V0+hsp8ocQ zjB~r9m+jpLdTei|dXya_Khfg5^eM-j>cnwXA2_zR3Hac+Y72U7Zy<2=2{Z8-ZK9uO zqMr|Xtalr5toH{KpFcp4KCggde|-iY?5`sADa*$FNg3!bSLW*YtO9!MuiC(Ie6|3N zaZUz~?VZJ39iPiUk3QRhV|(|554QIx=v|?`=M=phpI1PS?Jd*5$Q#8uM4WPbHe#-h z&kn$`y*X4L+FX_n@c$Ju36#L_h2$ z4s~2PGbazsb79~(uBrjYIAg(oxbTwo?g2fn8>fL|J}-a|_RCGsW4*6{qt7Q3pL}0Z z7BaprD8^hJ|K&lC_0|WD^)@l_X$yMv2?UNlVJ1FiO&+yqt9mG=(Eej z=cI}Lyovrc=&@eMMz;GUCv&x5%7GqzS_8-V(iwbkzRU%EIhZerie9cuYeA3mWgBpu zFK2;coKH=-Yhxo1)ZS6zl=CQvxw^g$1CISQ27GWHO$R;3lL#Dr)|&Y2H_;z9(Vqo9 z)_V&$*89-J=MCu5C%dPSH_8wC;^sVBgcVbo__-$9RGiT9^-ig9DQDz_&7BY!HI)%gFeN9WB$uBr*=^pi#X}1 z1^Nnh*7kZSdYS(gpvU}o0*?8g3qIpSeKMa}o9fl;v0y$+0>?a*XKpWwVLoeu9`oP> z9DUlF_yn2g2bk!GfFA3e0vzj|ZQ`>G^ysq%IQr}{@i`27^tlKeeNs$(9)TWxG;eEv zp-&d(YQKD8qAzEnuL*jr*9SP(+t$RVC+N{<5ODMvX5upkcqY;G4My{|W#EJU8-U}y z+oAYa*ykDOE5f|{r0C_mb82SIKhC>6z;WJ{0*-OEFyVnFd@gh9--+Us^Y;emasIvm zj(OI6jQG`Yl7o3BaT((*2^@XOoA@*|(R-Wd{XmcP1_8%<2blN_0X_Om0ggViO?;Mv z9(}d~N1weWK1V>0K9_-`&kYlwHzs<`*LFYVWUlt33vjHrf{9OK;N&IeT}$x6c69@e z^Da>Fu^3@%%RpZV=G{6)FX!D>(Br(@4;<&+G2j^ITN9qAxe=I}H*e-NUnYxF&btuM zODd~n~K0eXyc1#tBF-o)pCiT)Q8{U4ymdY=Qwdf%J)6l`HTZ?%ErcxcR= z@=3Xt_!ahsnUPy}yJ1H0a;QpvV5TZ)t5W<|i9-bzLn8 zdaSo1aP)CE@o8zI?_i>j06o?_9XR&OeDJ}3Sr7Uu&@Vd`z3i6*pvQhW3LN|8FW?ww z*;YoLDW5Z;U+OZa7_nbk0>^xI03V!peL#=(MgT{j!6rTvL61I5fMcFlfe+?+2k5Ip zo)0Q|ndf7m$2^|{j(N`C+M3VVkk2;E)qDm4$2<%GAI#?v&|@B^07swMCO&IS^qWoe zKY$+VJp~-={lmoP2I$e}C2;ilXyTK#jge={C;Aizjy|r;)pf%I^yt$bIQn!q@reaJ z`YZ&F<8uZ0;P^Za`f4yf&nxB^qFkp^Bw5X=O}O-w`aiz$L%}NSBG)y=x5C*>E*c1$($ID z+d{x`+`0qDIAg(oq41L9c0TBF+^z$T`Pm9SxxnWD=&|0jz|rTDiO*vb{VNl_Q+pAF zIMnf01~}GRnYlV{eL#=#hXBX^jRYU;-^rkNgZ`bX=w<&d0X_EbYT($vdw^q{mpT}E zpmsTka~gGyIIaPwIJfIm{I3n*s4v~o5RkSp#P80W{OQ_0L#8zXJ?6(((OX#W2m0*b z-$l{O{PY1m=4S|SPw*MXyppKz1D*VLKDUD23-p(OWBfOm%l69nAAla?f2!zZ{BJ>z z@mK6*Bt-UCtdsG)I&jMKb^~jTfm44;eKd2@VtqPK9pv7j#r{X0t0%REm2{Y&8?`*AvO%<~%HI8KfMe-1v^O}JB6 zBVcMPjw^TI*xvfg)p6Ad^w{1`ie9$27wECQ7U0<4ap1EN+O-VyIIgyUp2mr*(M;_J z;M8V0PR;-)eHGSU29Ekiz)_#Cn^92BXC3Bp9Ljh?L674w4)o+D$Kg&B{b3XRDbQm- zUQqNFZtpG7e+lFEzM_}?_zd*ekD0q$^MKvzy&zaKEspES|`0ebBBYl>d> z`$N#XL%+XN^s?VSf*$+bv8Oc;IDbn5$MMhwIOc5@bIRKmams$*272uGL!hUAk@o|q zfMY*i2aftYy{z$Lzql}`c(7mE14o~pz_H$ez)_#ToV1vSjleOUec*%foKm=}(H!kh z@WJtLOVL|c{{r*{U_5+O^m05n_BIMqEI1x=0>|-C9ypE%FW|U;g#gF=9Ar-YvQ3~6rb_6(%wz-Qh}qsXkVk88cze@7*9*)YJPeHCx5w~gaXHSMgvEE zB6BsKzkp*rkHCk@$o#wmJ3J~aTW;71qnWGm8~~2-oCF^#Bm40(=yAT>RP+|3nc92Md%%3j+TRe!_R9H^hdI@W z^QAQKSK^wSr;UN*d37pEvdDI&? z>ZgN0=6?z3$w%h@AaL@L>*opJSnprJQ7=C@BQ55q1a|~+jHe27*dSyGSD*s&_x=t23uKah`eue+K&2;FAk@U*ME~S8>uY z5IEH#=k)~Oq_09^am)mc`endTf0Q|CvAy?zWBNu-#!~?} z#Uu4?nXB^kpgAc|t3pn{Vq~OHy4RDNS6L8d@W3I;Y5;(?_X^>Hm9G@~jd6<(A zj{l;H-eMHjJV2iV#(xtdx#sJ5CYBO_c7v;y5<2(s^96xT+ zhJf0O`Dx5t&Cd$pKZ-JPKAr}jr@(Ike+K+H_>=~oHAXxUhZ=tY;1sKzUsZrpd*%G9 z4IK5&fTKQ=IcYIJ3BWNw%fW}r$nmokxSszk;v!uKj`5rZj{29()p!cV8h*qvp0do< z{;CUHZ!gEw1USai893@kFjwPQ0vzL64?a{z=4U(TUkEP8&wfR3F`B7e0DWzkUnz=S z&abL)Q*cJ8_@4i^m3it5Bis&KMWk#$vePtoy;@B2!L{f^Y%*h|uz3!l={JV;iju7BfMy`9qfTMl`_+!8P0DAJ-N9N+V z2b_H5y7vk=)|+*dUM%f?M(V3ECoSf!EpUvdH~3&YQNSr4x$cbuj`1V_NBth=YCKng zV>}PR2jlq+octRa&DU~_HXg`vF8ie>@yIO%0P zTY;ng0&_K;HY26oV1vqr@(PM zXyc5)?8P@KBj-y#;NE+K?ya0~+%;Sv)QXWw6#+69_FOQ zcrF3Q@tW}MT9p-Xe$@_M1;1rYWm#)C6 zU!;Bt_+y-lK#%!91{{6<0FL$E1CDy<$<}t^JgNp9QIO;nySL2BTj`2(YAB<-qaPpV^^(}CW zX9sZ9|H)j9=L2wzC;N139x$H5%!%RpUrNzijN)2v&=-XDzrUgnXMHs2as3|-9M}H^ zz;XRQ1{~KDyP4K@9TN}b{^i4*+J)6i(e>X74kHE_~b zA+b2N0Z09D;HZDXoV3_p_XOQezqiHy^<*yRySzW?2%P*qjN)1VaExaNaMUkhuEuj6 zIL4C-K2%1=<1otzfZ`;7c|VpLIL1>RxUSD)l=lFR`c}YEAH`gae-?1O&-_vGaW$&d zZh;>ACEsjAptj4CxmxdJ;F!0C;6r6(-qr)Bc6oCib^|B99EYcYqy7bRHJ*BNjQELT zJk6P_@TUW%3O_SAaIOl6!>5~Q$YVhR3+D?If~w5grS`QeM4ARFDZJtu3iT{uB-Qe39Si0f(t;MlH~%+-D$W1>$m(O(CC0`gFKzBQhwz&(LK1Ky4~#Zwx1AJ9Js9t9kI zzB2Jy2%N@`t2pUc1DtZVU8kCM0JuJ$nV$fT`oDmqKJx;jAZc+usSF(Zr9N}DU)lgC ze-DnQ2XKrh0yygDGgsr;3moG)20m0q&c`dj$zQJX_kd$OAAqC2>_VducICFnvJW%t~5jf_j zFLO0NUQ4V#SnnX_q(3Q6nYR<5$94D?=qV-{=O^IQzmn%%DxQc##v|8BSKy?V_rvbM z$zST514n%XbJF5Exdk}pCmDRG%sz3_@jGzxm+NE-aE#|QaMTxHrrYWF;TTUKaEvF4 zxy+A@XB=?yZ)n(R3BWO)mB3N|D|0nJd6rw_#CTkotMSxOxGTrg+=Pb!r+DPg1B3&| z_@@9zeFAXQuLO?zYs}U5W=^!--lELa_Idy(fB8OMec%{R8{nv)%v_CU3vkT;A@HFx zayCO+_#Jz2vt>IkjG2 z;FyOV%&A@~Bl9rBM8CpB{{wK$!$IKWFV~rKz)_!jrS16Z0>}7UF}GLZ?*{r8;+kA< z0~Ni+s7_l8`Z{)2{SHMh_o@A$$9?KBaNMV^0mpqR-*?vbV!KK+SKGA$^f-R5oA5lV z41e-DEl!z-Am*6_!+mE2aJ+9H2R^yLXAy7*!Cl2k$6DZ2hwS$sfs!^>E(TOC*Y_L1&;az=4$*~fa7?(rurDo)H1C#9;iNbm{WVPT>&Qg?}209 zc7qSqA=j%Dz{y{}A94{m=HUr&)aU=+cKqFeV?6zstNl0vIQh%Z87Be9c)kIS`a{gs zeBK3)@w^5f%ulv;M!VATpT&4y2sp-52{^^mko6sztMLp0j`2(cAB<-aaPpV$r>zE# z@$3PP`WwvEc%0T7?IVuy6kx9QR|VkYFMqzM7I2KGHE`4qVy?!s2RO!a6nv(PWct9QvVE?MaT((#4UyVVJ`QDXXYCP2n1E)IV{#6|~>8r55 zA#l`p1djTl%t?#;#X;bh&okgdWn?~Y0w;gDf29J)crtG`0;YVTz6x_Sobz(G9NX0#IOf40IO@kUSMzWTIL31ke5i~ZR}X-r{~O>KPxh@w zdu2XlKYB1%HjAuA-jOT0M7|&whsQ;O{8qXcz7|$#4!FaN6OW$6(zZV3K z@l*s(@ko6K=4w1~z%iZ);Dhlj1di=p1svnq2ORa+nXB{)H$b z*X_27-eROtTMYWwpkJft!&$!>^tgZR0*?FF1>m@UW#3^Ks_iPuTy0l-6McY*{weU^ zpd0?Yr;P?qamv?gQ-Nc=4yX61&;0Y1CH^80!RHc=4w1=fnz)= z;6r8jsrCr;F9etS-77_J5!dMGu;2Jj9yQ_hnm=>dUb)|eg8ro_BmErA#Et?`rwpMuQQcq#)& zpZX>~U4ZN3)~HSk22ORzeQ+dj(#w5qGH}!{0gn2^%t?#;*FE6aFK@ty%E*4no-Ce- zgZhj7<^EMTSv(Pk>_-`YCJekuaj^Uv>V)do&pGC$3LqduCs8qa*-7|$y3p)xX_-N4cR zFmR0LB5>5({bbDpj-Rr?F&+=*YJPlylfQglt`l&KClom96PT;{*$y1z`5AmLKYsux zf4L9d1dj2%1CIKVKikfa7jRt1qM6%^V&vm$6xSAj9`k%1IJPSle6U>(hYUmMFW+y> z4V?6H9F_-8@ko7Z=A^}VW&y`|62XVc$Z@z0IQh%{JsCLZWjq&vqy7VP(qcSbhXoJ^ zag4{0Ipu+_$#{Z*lfN8?QNT$r;~57W^&6P0@tgsU@mvERjOQWfUx<3;{`*qVTf{Xw zI{spOCy%;zR($|-*$>;#VS90ngMBm3*J!d*F@=O#S=2}2;jJQ`~n>H@0hFcl>XIdFL8{=ow?e-O+b(Lw=F@ROL)oo z+a2_{-wgncJ~1XfbAV&J76HdRYygh>E6mkAWIAa(5BZp@d8i5;{c8fpcv=ESeH?Q& zo&&(KUrvG#m67B84si09KiBjKIL4Fdl+j)}&ZXXsxf;)C;26(z@WFW20LOSX0>^j` z0Y`l*b2Xkur)~FF8|G?%g#t(a2;dmcc;Ki{Vy?#X2{`5_=NW6Bsf_He^1#Vot{YW> zV?5r#Q6I%zjpqPxjOQfyU_5t#lfPVN9s$RAGM%-y3-xZy)p$k&$9SfL55}_?^mzZX zQqfzCFtq%?84t+gtvKcSQ;NA958RU{1I@BbfC zcc5Pa{1xymz~2L3MUzJyRPRS|%6bnw5;gcI@m=yAnRTq|KLfX9vvLjcR?FGS?SOxs z$I9)2uPAKg4#i3eHMa8dz{hx5c?IBun^<{87U1~*T#W2!v=!;^&&cOl_yO^&^qAM{+zO9(FaU2yx)13+sh~J27~j%( z06&-CNtQ#L@d|IkJVD`YnI|f|2lFI_hcZuAcqH>P3LnNiMd4GK zrz(6tbGo~-JcxO+ z!uvB%QFsh`jGxCVd<{QORQP6oo~-a){5(bB2l=_? zM6<}6|C7vJ6n>7mr^2r@4^a4H=J5)D$2?KtPW(Js;f45lio(nBbB$kL$oyAk?xOIT z%smxek9mN?n=p@8cw6R)3h%=_S>YkfQxrasxyBoej6a&Wi^AiWdn$Z5^8kfUWFD{Z zdCU_P{vGpVg>Pq`qHy_siV1<{G~amHBVZ+(qHtnR_Zcn0bK0fB5rzrdkb1i%N{9kA8 zqVOloJr(ZA{TiU~T+HJY?!r7#;T4%DD_njaoucqotk*Y7Sbw@OccD!bcqnsEg%4#O zpzuk|;}t%ad7{FXGEY|cX67jhKg3+inLf_T%v}`zfVro_-!l(Tcvc>-@d_`=JW=5_ znI|jUmwAf9J2Kb!{T4a?gP6N0d?0gAg%4#Opzs9d@d{tbJW=7>nI|j!2=f$$pJ1-# zN}vC8%v}_ImAR+FA21J4_zULo3jfGFQQ=NJKa&-ngL#U=b2Ha+r;k5Ba~FjdX6~u* z;>-gSUYdEl!pk#HRCp!k$qKK*JVoL4nQQ#MoE$$s%v}`ThPkK01DOXXd?539g^ysK zsPHMw{|{Yv0&i3G{&D=6GS6j}Yo5naW;Z2d$V`!LhRh)uQgI{FKqOPbjR-{~Nt9di z3lS1YhMQR`Dw$IK_qyxz_^-aNm;HLV*LmOPJbSHYt-a4a`<$~+viNK~MSLC}%k$s* zFT~@-KgHw4x8Mold+_Sbrx&?H9Sr{BgY@{;yLgH z@qBoqcu_n_ygZ&PUL8*nuZhPB{dfM<$K%9X;ql_#@C5Phc%t|yJW2coJXw4io+AD> z9^>^atN#i-PW($eUi?QqLHr7yD4v$%uq5$3c(QnDJVpEtJjUy2*8UcFoOl;JUc4`! zAU*<56d#8tiBH9o#TVi!;%o63uj^U+zrf?fcjNKmhw%jQvv{KT6+B5iE610~;>GY3 z@p8DG&$jmO;k;oc&T~6=G$p}rCk4b;w)Gwtc&Uh5dybV0+~pbE$^W62coKQy19!WU zF`PGS8o0}IezMQgS$qn45(0O5R+6Vr;Hh0*&`ZRZY58qz+IjxkNZ3Y#H*1fE^ybLw&W=ncxru~^?7QE$B`#K zaF-`Nd71|9^2GP{c{+>dB2Pl#F3(Q#^asT})AJcT^TfxA3?$+IPJM+t1e`^8)1DS^8@UC47f za7WczML(kDvUnUGi%b3U_4EHN$&)GYRBGYl3W%R??maGW*PeRhDHph-T*TKBkKyrw zyF8W2(=>2L$<3l4(b73C^;c^9ov*|b0(W@|lBZALj;b|{eniX2xYS?Q^TgqafxA4Z zJLBlr#K2vi#KzH&Xqgq4`s;e0!FW>OF3-_4K5j|isd*ZC|44i=o*cN#vzI(u0(W_) z+!g(Zmi=+5zpl3@5l;!+<=H@<(}BA@xf(`4qUCa2>aXj0VtB0h|GUdGk35+IcX<-( z`M3h&t?{_PU7nZ8Q!en-_SE%xYKh12_`qGB``O-`2JZ4C*712dix0*V0(W_yO5@}E z1n%;jujPHDcnY2vxXbg9?SBGKtq<|D#9QM@fxA2d)A;o+2|TqvcY6OwJccI+?(z(# zJzD~Id6Mt&dG?F1#8U!yd7g21ik8!XJ4)=3%CXDhN#serSU?n#Ci>^{jHf-BO8EOZ zcX<+sFCg9;j|<%8Ip^*aE#(4tl+q)WW3|NN$rB&A%hSa*AX=IR?()Rlp31S#;<@mI zz+IkZXA|+9LZ-l7o?XxTxB}ww~NW;{j>PbY(FFMxwySwJjU&mz+D}FX9cGNckPca?*DLEyc%vFkhJz3u>B0<=G^7U z<SdF19y3Hri*?=%f!G_`}LUjS>h|npA@*ula|6Q3Ebt0^J!wqfxGy8#BT}Q#V2#z zzF*vqzf%Htd5)3)bl@(JJ#V}$KA7iGvC{sIHZFx}Po}_Ko@(Te3*4=D4_+>C7az~_ z{95AG@c6)8p7N}>Y2Yr;dCqrq7Ps>t34yyjw~?n$;4Y7yXBjCzh5U(uyF6>7e{Pu= zcxs-)-e-wV!IJ`ad0zM7u_b}KJSz+OKYS!^=M$3ycX?hW&z8Vb^AzxZ*e`D9RZ{|Y zQ=lAqP6zJtOv&y4a9P}5_llMAf46z^IC(M!?()Rv@P8;EUJZ{6oGF=lhjM|tJcD^% zsg`&G9v`@?4-ZGNrh&UW_CA=-;wv+zt~QnsxXV-DbF5F`F3+wE{tqL??R`#(fxA51 zj7<#O<+1ZWv&56w|0D(O^3-KMFA3b`iI4udB{^^x&oIWe1n%OK-H&M5FTN5_3Ebsr z!+K8#?n3Q-gqOwR$P?qZ#^%Yd^ea=~E;QxK=tr~^5YI)PxWHX|YLKT~;Hm8yo|-0B zOFTxN_`qGBjpS(>xCQc-Xt{pU?r@zAevN{}vxCo{xDvTD$=J)p6nn8Lt`QH?jX;CVm^o2_K2am-MhvJa=&qTgA8Wc(GUf zyAmF3ZdiRX(%+vXz9akLbK>KfXn%=6k`7=yu>4=qPWKrfx0t)nez?Vb#>6e=@7?5Y zyU!rF#k?H#bnk%={Xy!}T)YDN;cnu8P@f*+!`zCZWsvwF>hrXCR^A`*f_M)bckzFT zUnpLmjp;-2IF5fdiT5DSF7Xod_mH@~@8>7+;?(()cvj|TdfH|6&z0ZT$3CxTUM0@^ ztr9<=xc7?cY)7@kzou@D#UHipUHpA^Y!8d)Xa6`t+_xqcdrsWO(auj>zv{9*&Xah{ z|BiT1_E#T?Utz_Y#e3r4h}UDjoe=NKxc@G0?f+N&2gWNK+okm@J9R54zJvUg#Cvkw zTU*@Ld#||Ft(|yo=2e1tbslhriZ38OQQX!$N!-@ENBm{xVG8c1dwQD;(eE=7Uzq2K zm&D_lhv~Sp)$^wkK3Pujo8!C}7C+7Y(>~{6c{=j=XrJ>i|Aqs#hLY!R_Rr15|D^q$ z#i!H$N5%hS`yVQ9^LB#xy=)iL#M7`pNfJNK_P$#@2an4~#4Z0>@v_v}K8IoT>CfYD zZuWoXbJ>n=6+gs$t0;b)e%&Kpl>JE?@%!*@;!jeC{^IujgpuNJv)>pm{w&+wtKti& z!+i0p%#(M;w~}X__*u4#|A{ZT#n0P4;1B6JJ-n5c|WX;(sz9I*8xKcGO)w9=FduSbN-OYuz$J z;%(lJ6TeJ7r-(0OKQLF^-dDCvJZnk6-nHUi(O>)AgSE3A+tqG~&+h`F<*@iMwy(3| z_W7>M;$_&5GGz4mY`uG#4|&A{p)@KhAucAif`;Ccct+k|h2E`~T(Q?z6sb`B?lSD^s6p;x_;7^A?uhp3gLs_+s3)&s$jha>lEN z#Ghnd4i;}sKc5lb&hhq(;`^9aGsS38wWwC7*(H@TgSd{)oC>`w}cH|6-StoT+x>@oX1hvhj<-R_e3I%WJFT8TH{ zd07|nS>)*>Zr3{)Ccd5h)mZV1>`z`1uSCDz5TD9;EfqgN{tv|$5Wh+M1LoT<@n;!d z`<#c>?LqSYB=J=kuS??j$)BF}Tb^{}&n@1G6F;|#Pl@y6Qc-*$^{*wKnf5dm?@N2y ziXUTq9~R%od>9~pmG(a=K7#o?L3|!{nVhc#(ul5#J|scYAW8` zj*G+l`VC~F>@0R#su^m+sx47Ek>lw#;#S2lN_TskxOc1yCso3Wato=<(N0TL5#!CEf zZch{6#QyLt@qHY>trQ=?@ysXU-;rmV_zLFdcj6!6$HhO#?S1;zue6NcKNA0_3y7Ai zJPup@P#y;hh&N=Omll7H_0|x7oBlQwUyHX8Kgn^(1LA$~N5waAd#HFW9*-u7Tm4@Z zpF#Y5@e}yF;#H~RI`KOA|HK>P--_FM?Q@z||9?1c{6*rwDdorOs`w0!&ofdd%TqnA zkH1-bE8|#P`~tVDh`&o6>WbUvSz3u(`yUjy*Fk%WTfd$Vw|+e~GtM-_3rzo4C7XkXvj!v-VqkMoRo>+GFp(v-pg0scB-bO8hq* z=g$|v!hZW*@m(B;uM@ZTJN-|5BK`ed+}e3U{4d)7yZ9oumw(0MX=gUpWBqO8yBjMg zo}2R+WyNj1_WldY^BKn{@e)6bacL<&ll_CeE^2vlkf*oAkLS4J3Gt@1^I7qA96wAJ zFG2n};>EbVMEnQpxkmgY_Jf~`ucf~`#qIpjA#tnQ8S!VChZn`;*{;*m538HS=Mo>t z<9lWCPiSWy+}$KJ`d{=ubBVX(k6uCiAs0yf$4UHf_AmB+6PMq`+v@=tP5T(Vef zz2uLlo!?9RT|6G0l=y_yEYbU6CB87n71t!5@K^@+D^|}|Y`+D>Tl08YTl^}1zxY(z z-%ETn+vfyv`#kMj@i81HpA&CFo$qCyTKi8jZzhZX&f~>u@rOB%%E0lm<*CT}+KTrl z|I^}4xV=XFW#;V>@y9stdq%_GBV%haXkXqciV>yZk4jf9IJ0Jp(@;Ic1LZ5obEYhKS!E-N!A%;!=O5 zzMsnFY36UnMvITa$BI9JKOcD5&MASr{OgH-O?(wTSNsb8ruZ3rnfQGi7p)L)gm1*H z{!_m7iMEU1kM9fexPIB|*Czva{TfRCpTzBbW9M*N??m!vDjwZATFl$vd2k!|YV03M z1n$;ruhW$gpWqTkOGVt>bn|U6`R@_8I<&%Vz4x--Zh^b?zUvZ3i#`8#@y?gv{fT!s z-FhwmFkJJG4cz6~{fL(2z+IkF#Ovqv zn&9Vx_)<~49dBF<+~qG#{+uPE8`0wWdn)?(I>#Y}@HCOR^|p@wxuu50SLOJjzQo6; zM#o|uB)&NNr38tuM*K*Lf1P%ZllVB|=Sch?{9&=g=OX@7iGMP!|HD>^Cp`9}#5bco zXC;0xcSy@|kkxY~k89Zjck}0fD{Qpn7T<^G3*3!MPxkM{19y4O5no#T41T+KPWoF* zJR{yj{0_XicqQE4$74KcZ!N;4aSx#LpIgADUrz&{ngjPDTt8$XD<;d85W*VJ~)Zxa7H z&!eve?%JPBJ7c9%?{@uha|50MxBT|FlSjNe>un@H9PcFlj$d7DXyEqP?*24AaMzTL z#4ix%a3l7f_(|e7h^M1HTgB~l>J#GAiT_=EJ^ruw4|w*{z5%vf+kUVB?!sKZ5`0*! zb`XC&GP~YOees=)OAGN+cx&-v_`~9V;XTAJ;g5^k=K+U{XHDlj@Dy(S>P5fa4dUJW z&&>Y+Ly5P~o9vKy+aK-^+|{Qz>pdu56#p@Bw_e*H{v5c=Q=j;0Md72>bqs{?m=Ugvqpr-8dX?_`L6M9b&mPvTz&?(*3F z@SDJ09=raqy-&gVHT5Q!J^C)5Al~J%{o%>LU7mM||4DoaelBpA$M%Q+irfA$XW1wy zT5LPA{b4b2+aFdHPfOhzif6>{7te-2EdDa{V32rG;>U=Wz+VyPW^6wHvF+ua4}3pA zka*i~Zw%bkc@y>AB)$>fBKez#fxV2?P(}p5N|482X7&M2i`%v9sYp${dl*)!#ea1+|}U; z;`@s~hChK@orhBANfK}Gznm&=*RP%-d7dWEJK~%1E#lwe$Hi}b&)4}^+{V2HK|hq}Z!6|af6!EM~jQs;gW{~+;?iFd|_N}g)unJ7LCUnKq<{+aki{43nX zy$j?1L*TBzeQ3`S@t*i;@t5$Q#qINVe~Q0D{1x%H@w65EINN;Ng5M-=*9p%l{v+{u z#Sh^H0(awXbtoRVt3%q%z8$5-uaTz;ZsYzJb#5Z@#mUoL+^#p?TJoe{>DSv!yeKE6 z#)?;@Mro zXjv?t5r0p-GQL{8JibBvK0I06u7AE&ya(|+#O?jN-vl1k;fKIo9iAi45%JOZY23#B zE9#u4QffJ_&y$73XAplA&w^W?Z^=_kyaVG~PrN7IS$sJAf$q3lo$K#Jw)e*acl}*O zdxneK`;5nk+xv#ciGNL=m&GsOQ^e2Xv&C~~^>v&po*iE-UIAYwUIu?J@URXa1@7w5 zocImmjq%O6jn^3Jd|2Z95r0&?4}MDWOeD`W@f~=<%Dx`9-h+5e@iOoGxQ4hZuj}tj z#{K@lU4KW?o_69-;9bS%;0fZ>@xJ2g@qyy@zUtxP`-vYV{w+RE{5Sl0@n7(l0}tyk zJ#bftT-l-@(K1^+8@>Ry`d6jS>m|Mn@f*cU;a^CeTIBg&yeocDyg#0&if^yAXANEi zw{c(0xK|3?_4jt#W7m^&@y@H^wI#k2USGToey{il{66uacsude@J`}W@C5PYcu(=A zc;CR?xLX|_58U;4JMqKCx8P%NtMjMSd6vW*E^hBr zuP@%0_`Ae=;r9ma#@*_0f8cK1ClcRI{5iZUZsXpPIuDchdBi^{{sul;@^mE6H1SdR zO7R!)?c#gzeYlOgeQx$-;I6;+e)^xpm(b2X#CPJC#JA#h!8Ds!=kWB^{CqJ#i)R{=&tUP&oR^yl4Dy<5Zw;k$2;=cL4sA^s=vr|@%<=Lg#JkN9`X z{rDED?ti;_x?PN2pQ>Kq?j?$I#5WT!vcliLn|M4vRQv)yQT(mpt<_llpy zn@gTs$Ya-=xBTaa`~1&H{J;2Y@tz}m{6gHueHr7vI&jzDi?nC0`0seKc)`3r{}1JZ=}zBo8<)P+ zxfpKq^DW{_iZ8&+NuI~aQ(yd5{6XyK%QV>i89i{~C z>hJ~euZe$(&&93&d7kuj{!rqN5&yCHkN8H(QcT;v5 z@vn;4;qmcp@z3xL;;q?F?iN3SpA;X@_VtJO8+fdq?}ycGg>8@G=kOBZTd7aH_~xg4 z-j?Er@GiI;U-vw^P@Io{RN_MqP`}Th!o&(<_UJn0GycB*?yaE1`cpdzTct`wi@wRxj`o6ukeLaTf67PqX#BH8r zU_U%Ohu?oxEtT+Snp!-MEsq=-Fof(+lPU>JW0fVEdDy4 zp@Cn&)xoZVnpu1!@dd=~^XEmx?epr*#9J2d>uV|A81E`R7Ecf#iSG^E)xpl!r3CKk zP@D3-E5Z<)26X{^A9`iIPRj-1yXA*FSGt8UIlH&=wz; zjJrCx+0&mMSKhkSv%I&>hukhonRpd_9qiggbyFH`?|65o$J@#1~(#*$|` zc^(u$#W;=-zlcv0@4nU7WiD>xUWsvE9=Pl8NZPYX+^*yBvG^R~KNX*je<{8m|4Mu< z{;l|ae82d&_>bbh;U~m@!G8+ejl0$1kHB3Wa^33dc1hg!d$C6T?`fi6ZrpoQ=YqIB zUX&qEQSnlE3CS~vu!P|?^$L)K7tv%=PQNem$f9>-KF9z=Vdpqr!EM5(tDc%X6 zBi;soOMC>rM0_Z|O8hl^jrbJ&Q}N~a=i*E8F9Q$juq$v^hwa3FE4~Fkgj=02Q|F5k zf0X#E;`aUS|4N=T+oF|6%guND-&s)s5Z=`s3{8@4PzNhixRf(T0-Wq>Zyahf-ye~dqychmf;9(t>2kz=Hk@!{O z&*2~AHZGH>^B#$xNBnoDrUR?Y| zytKGoAE%1=LgH(P&&Tf!{6y4#t3!O?u3w)Me~yK#c%~iEN}^@Hcz%4n_#^mc+{S$Z#^_)+o9B~qIcJ1L$H zKPO%h|3loq59peBQ{rPy{CaG@-GgTqe+17i{t%uga5wH&hr)roI@t9Ui-|u?o^rU2 zdq3)Ym&CtGd}HzX_a#mnHo1n$P&>Toe|R|mTe(N*!r z3w+x+ZDe4Kb6ytL%GojeW1$KoBtr{Y7zf54x@ZQSkqcwP$J^>-xg zc}4sQe3tke{0(ut&e0u2BW~lq zI+M@yXyC^qvgwp zyK%QVyc_uO$PW>}Qv5r79d6_D7Ipqc;;#|EPy8zWgXH;uJQu{%?(*Z6^*;aG)|(qI zE8YdKj@!6@&bZ$d`0*%ZmeM|NWAU5tR^pZMw&G>+F5>s&4~sX)`-l&~2Z;B%W_9};-lh~^PLLA>TxA3sO@b9|+Er)@rdv-quFc|Rb& z9zQ33?{*)5P5dD|b4&kstIr@jpZHw7qWGO0&&P}ZhIbHO#qmQw@gY3Ej1jNS{^T|B z-|(g4H5tb*#e41X{rpz^8T?1_>G(O^&BIgCzvpuMJXa+?mmBX@nzDX~%+eqRrp&$loxU*I;5h4cEjuO+@3@h2qSK96=jaJTpeyOp5n*wzLLKW`G<=?jZYPyf-e@Yk8c!ji*FZC!jI!N ze=hO<)pHUbM|`d}Q75Ct#`h}yx>@`pUJ$o>mL^a2ApTU8Z#dg^U5QU7zOBUDynQHe zSN}ZaT|ifxC9v z_Vu2)y^rWq-0Iw%$MHQ9Z}awW;I6-qvEHNN{qR$gzcu+Ui1)?wwT+TSi?wG2UiAiE z`v%@o{AcFFhxWWHZr7(>Ctip6FLB#mrZI1KNc>9H z`)d&I>ijJA{9XJRJWV^_5!=3I;~B)K<5_U4^BUG$F^G5bXEdMVs44L)=~oMhx9zJ_ z;I93vSZ^2c<#1^#T_@k~EYUBw|5=pZ-_DL({4U0~l*HTPQPsfR zcs<2>ZxbJZ*9ts*Ji0q@muDLBO~fbR9mPMu9~6HV?<@WTK2Ur&{KF7l_x#7mMGCuMvL$UnkxH|1|LM@#SlAdwe-8ZjUd&ireGMKjQZIlB2V) zQ-6rw&IMN_+Hy;MjuSVj1@FtQ!FZtVxH^8Thx4>82z(2Zy z?-eh6lkeAWfxE|%`krHXAMpRNI#eeFGMEnK3z4#mWBjPLZ$HeVA z`A>=4b>=6E53@Rp&%qbsHcwXayl<(*C$sZ(Xze9Yw2mLs@_Ph3ciDwkwgy$2t>)RF*x9j1S7r#WF%Hn_EHE^3(TdCW~ApTU; z&L)L?yT?j=GUGB^;%z_wR^YC`*(>=xOT;td%L8}owdeI81@7{cCVqpsU5EE8@utM@ z6mNhZ7Psr|9u*%z{BPou@jt~U;_18ie%rXei)Rvl3(qgU6E7^j6|W$E4zD792Cpxk zv$AjBUE-PX*5Xz1_TuI79^%dM-r`O00fD=5xAP=Vi`(PQ%i{L9^M<%R?z}5*k2@R0 z?Qv(jxIM2qC~lv(`dQpQZ*>i~?aQunp1bS+&j;6Ed;BUAxSQwQs85{ugLrA&@-NTq z^VSeQg-;Q;?+IOc17Cdu-z9FJWBNJpY*AC_ek{jBzFpSNW*pBK7Ps#yC?{_3C#)%M z-{0Cud=dHY7vG9^6+e#m6~BZJ7tc#Q$BCD*I*T{L=Zbg0mx)iq*NWfbb|BI6g?LST zw|H~>2<~pW`DX9iKOyleGrH{2^*_4#-!9(G!y(MKEaG;Z=z`*QednU$c0K4y;`7O0 zO?(dCP<$#n<;0@x1sv@v8U& z@rwA{xNTqSY#bl{|8aEVp0A|8oeQ`4!OWAg5^s;=H3E0@p)u>dL;NnhZs2ac_BeiT z;4aTY#NQ|034cKRY5XB^yB_oa@i&McEdDA!M*M&HIPqlsRq>Pf4Dl3vk@!tjef^h; z$MBED%itTtD`PQkQ$g^W*KsTr3 ziraC^a&bFu`9$1~TXu-samyibJ8t@QmGk zdo91kml7|K)yKsLUdmJI&+dVzjid_kgTx!*Pm6cJUl4yBpCSG{zEJ#ie5Lq@_$T69 z@onOV@$ba#`yh^s-{cM+qvbd8g7`n;cj8%l`1@J?+u;So`{Jd=$Ky4`=im*+*WxY2 zci<0*-_HDbRD83|L-AkmMDYwcd_O0N--5p`UK(G5+jcq*Um@`c91rXa;@$T3DBIB< z@$UFh@zMB6@lp7n;xq6o;;-VFdis9boA@%ku=xM*V&b3URm2bDHN+3#cZpxb z8;k#rw-?V*-S?-ncouw`co}@AcnSP%al3B&JL0v7UoYMk-zeS+{{pw|s}J*}O|SnS z7xy^WwY0z81-E#+ZvW#FZ^xO>1b#e9(TDXuE8Y{I5V%{f9cNAr+~r9me!BQ5{4Mc? z_!9Bi_*(HD_!G=m%`IM>hsxlbRV8k z{2n}?ct5<5cu%~%_=|XD@#pY*;&y%cc=5N0zh8V0-cEc6-b4IPytnu-_z3Yr9Jo9s zo)1q9JUkAZB5uclN#b@KxKiAX1Czz=IB=J^9S5d}+i~D|aXSu-_5T0);M#A;fw{!( zI51AUtiOA#ns{YA9=H9#(p>%z9VFh4>k^znbU_FMd|;_vf#S|@OKDDb$yd#r2VX(KyKd|&Y^_;B%TIensW;$`tE;&t%3 z;@$9N;-m1j;*;<%#Fyf`#W&!G#rNZ9#n0lG#WONq8T$HuTm1{+dBpAe1&WDxA-Z0S(#(@~;w|w6@m}~q@nQH=xZ4k(ivA@Q^!J?wGnLp+1{`}ob`U*iSD zx8Y^QPvRBDkK%R2|G^uGU&Z^2=eynaXOMUv{0ZFVd2Po1d=T&E$@}?yo~sf+g?W{` z|Nqy`#oO^!k-**jslq9cm?rmcop%B zcmr|!9*aie`R?%TZY$mZ?@HKP+CB_@m!eJgSdSU zL?Q8a$#bjtTX+?5`@V!4;#-NoGjO+`wDZ7C#O?UMleiuK_ZGL~{}JML{QtbT9skc1 zx8wh}#qIcit+*Zke<^Oq|KEw1^}`W6C0-f7B+kQQEaPMUKaQ?$b{;stcti4(6>p5+ zDc&4!B0k7HutrNKao%n+~RhsQ>U@5P@N{{f#V{v-ajxa+f9){6h)IrgRaANY5; z9VgVv@Bi?##M|?!OM$!na|-kJFY$wTY>>}q`RkKEyZA5oi{k&{3vb}dZ{XX+-^%0L zaWe2y5tZ{C%QX1E?fiuJTg3O{CB)C-RmF4V^?B=wSHhc#H^e)Ncfosz55WhEPr{!O zUx>dbz7d}(Zr?}qmiXVquM#hKv+wVx;#Kjl#GB*$#UH^>h(Ck>EXuQ@k{t^;(2fJdE>>)<1NJ-;}42=!F!93!=Dhh@8ft@d=2rF#dqU##82T% z#M9;T{aPbl5dU1fCcaa=Iet*QH-1L^8T_L7Y&_lL{~gEo@Lb|m*>4mTzome`e+BXS z_?_Y%@O#92;cdi+;oZc?;9`SK_ zKJhVlY4JIDdGVQe)+heEUi;pooZ{~hf2;UbyoC5>yt?=iyr%dM_&wr(;!VYW!&~C^ zxcn0Hs@t&t@0Xj0uNU#R``{M8vViw75^v`RUJTsL&#WAfO%~6HPYc|w*Uk^j3*6-? zN&EuwqWCKDczlg`J^TytcK8V;ci^=c|7u ze!F;P+Vj150o=ZC)B1Zmet~$a=V|u;S0zsp>n%LOH_Y}MqiXxx#l(l>Rm5M#Ylu(A z@5HU0i&$^xAl}WNt$F-6+ z-+;d({v-aL_z(D};{W2Gi(kTbi{Dzu*I}=C5&XD#1^l#l8T^uXef%%++IW_ce*HF2 z?EAEGh}-vX=={ljhI_Wb>BaeMyWPTZcqcNe$k??Z5#hc_4Y{h1)~w*Q$L zcWBdEIlYci?XS zle3WjgMI&-t+yopti<1ePZn>7&k^s9FA*PwuMwY&e=hzezEgZ1eo%ZLen$KXeo=fF z^-n*_*TeRoGjeeoxB4U%^jJ`K0JEuwDwgLt=pNMQeeRN|8umn#x)`~UP$`MTPE zDN9{{JCpcLc=o`<{eOYL-8hyYzKD1cyrOtRysCIzyn%RIypeb-yh-5U{)!ytMij} zef(_kVfYg9srb9%lkiozjmz}HKL0mCyqiBK(|JEA@kvFz{~__Vef<--YyVQ#oAw!h zKilpW;TZ#W>+PN1$K?s!<=IGlKJoQ<;lN!UyPj2C&jN7;@qt3m=KgJ&y{~CW@{4hQrw|Tgl@m(hI zQ;7ded?vnA@>I_1st}8PFY)JzKQHk%PyPzr)#n;@_*eWgo_=)dYErM470#2KfxA3; z>iM|5;yLgFfrs;?WZ*7O1>(z!m%=Lr9?p|mfxCH<-!~_ASK#41X%V*#iEpFe_ z*#WnC@>dbRg6`s3i+Udi;u!-XMM&|4KZ4{pe27vNJLDSL*)L z<*^G}92PG^{88}&_$l1#T$^_0c{a)!E!MACS%13-Zt;oK;Wml4?Y%+ZuAMblZzJ(4 zc$2`x?Y(W_F3)|$cND(|?(@@=7m9y{FA3b`vE!N5fxA3MiC-(8f^P`i<+1(GSK_w+IUsKPpEG!xD2Lnb3KWZe zM9V)CZ~LFjWBuQ4zx^xi$u9mAo(H%5Maf@GycXV6+`eb+A@SAB+b08e`;%k7d9m4n zr;Y3o@r%WO!&i%^r#;Eyo$!O={qZy6lkiKpZI7K@L82w?IRD$~Gnn?2#I4S08u+-f z;(t@;JH+!7Uq?JI-b}nI-cr0G-WIoZ4!hO2XH*dH=AnI#>^X@~ZtLUcNWAT5766@Lid9k|P5-}`(paF=H`@hRey@DqW%JodfM=L2_n z4iJAqd>4KtaCf{)*d~*+;3egkBD!_PvbW3%c*mk@xHLOpE*H% z2JvHf7TofDNSi(nQQIe|{uzH{UA7`#inG%i;Y3ck8w1eZvBGd72XcqcQ>sM{sKUKUYK11@~LjHx~weSt%t?<3#AM-xTBe>gsPelJprj33? z%elZ^J6qG9Kg3($*Tje7v5BeauFr>ocxLfwcy{qu@OrgXrSBKNY*A_pHH^OZ`Y@*IxB>o@b9~Qrc_mn)_$@8T6FZc}cYxw)(@x1?X18(D9 zBds5&?SZ@gX5#>5mv|=pd-1CHA@TC~N%8yfpTwKve~1sjFNycTV=w%7zS;L-rx&;H z&(0jU8+WTip1@rl?E9_ri6^n%Vz`aFy>Gad#M}2<*AxGUJapgy)z7)@h?-DPEe=A-JKO}w^ z{-bz({AA#6+^r7h0zV#k0`Y%{cg3&aHtsvA^DQs>{<%84Jj01EDE~2N3#na(= z#pCe&;zjTxfuD%-S{+IU?$+Co`10a)@fx_zt1Z;Ixx}|8{(kYcczemSn>>BRZ-3lX zA^Lt`@w@SP;N_td{@qNscB*d19wxQDe-5-AIJX@pN8js+2^x56l6OpCq5sq zFW!azJ|I3G?~S|pa4P!Ozl2}k;}XA;=P%QOcsGvSsq;+nZunyHQTQ_P;rKf7DflPi zFXP+A7vsCc7vM+4H{i#_Kf+JrR{u5Bp~xg(C#zeD^!|1k+~Ri;Uti+wd0f-LT|K{H zy)DGQ#@ht$#><{BbPe3)IZb?m_;GxYc)jw;3I9y;XNq&#W3jKri^QdJ?2LFd z{2%c{jBozQ{(jc}-|d}uXvioC&YPQCh<0J*97j`(}ne}6Yqq78hAKww+8O=3?Y7p_(1%i_zQT7_&EGT z;NiSIAGllZBH}NIzlmQ7Je;=~Uh#FZai13FZx_OCd~M#A4&1GG9r??PufeNG{#oR& zEB-FtQ9K#%FFp`|PJAprReTq|6t{k@&m2o#K9Km<#P1E_U4OUJukXdT;D;qo2l8AD z;$8i#=Zb!~Ws3i0^**=_H;F?}}R;F2<#X$NGxjRNVV;@tg5iZs4=ti{Ic46Cfk-%NQ%G3Th@zQu{ z-16T|{u<(a@V??t|9%Z9+yMz?1Zzg2uWzE3Aix)+kU%d;I5wUv)`PYj- zjPDa4ik}rPil?9M*Kh5wiRTs{hnL4~{`BT}@^*=j5#J$*cl|9%za9`ThIf-Z8OSp{ zhJ`CLD=|ud;;%)FOGkklk zZuUNzTLO1^77$-hd^mn<;I2LPKAG}?yFAa|=hs(Ryf0oOaF@p(e;bM0<8NDWd;IM# zZjZl1#O?7n5x4nvW}t89bcwg^B`I*%uYApY{9EFA@THRfSMq--p6fB6XOsA?_(AdK z`}_ElxEn|JykNiUNVHrG+_keZ?YSym9#1zj^={YC>$b)-ir zF8%^uL3}Y@MSKB%Ti{_G>ILrVu$lOH@z3yPxXp)0sDF2fKS2DW;`{OblBXYe62+(X z_jR8uo`kOxzld+bZQM^W?t21v{XI>4z7s!=r-)~5;oEauJOloVct!j-@v`_;@uv7c z;`iVgXZiKJe!2EPf@c+f2+tL`8+WTi!N6UfMB1O+O*|*C|^ zdE(FDZ;LO)-w~gWe;}TWe?!{*-tv{5kP%_(bss@u}iZ;nT%O;Pb?%;S0oH!QT$tjl0!hW#Fz39}xe6_zL_J z+{Wt#>by_l_Yi+Td^dhX^1Mo(3*z74x##?Mp8SYc7BBahk86tC{^tnmZ6SUL?{I@W z{UrVu;vW+~gAbKFw>{?DGeNu-KII0!T)aAur&|Lr6{Q~R)5p#R?xuJv;$v_402{BJ z_$}fs*-usw--9<4e}MY8!Cjr*^NxGU`1N&?_$fT^8yUpA`Ii2EAOEy?41YsPN-->~|dOps2tBOB{-yXPIuRZ^57`V$bnfSZKC*pI(SL1JrFUMDkAHqKn--B-w zFWlPK;Y;z{_z!`*e%bT!lYzT_jUoOg@n-nBz+E1DK7KWDmuGsL=ts2tBR&*QH_!jw z#>>vn=Lp>8xudO*%OhR`&mXvJkDZ?{5xC3KlK3*>&G3qWyF7M&psu)`A80Oa=Lfoo z+xdb1;&y)EDRH}g(M#fX{oy&{cKzXZ#O?aS>v7v&;u53IMawRUxAPA_1n%nAllmMH zPr#2${?dsqd-T0y;)4@YIaYMO|84Dg7QaJ$@zXvo9(VJ^Jud&rexP;WuANWOp7!D+ z@Q1|b;g5*V!uyGTia#d)F+M{42mC4V@9^iuFW?i!&*PH<59=@^a94-C?V=yi^166V zd?9Z2-=3Hn9@`-C<%mxfx6kQpmOOjOb3lA0<9JDYBc3KT8#C~o7PKArc< zfxG_RK|8C9*TCzDKY%w7Z-+M#e-dvlJ`8U!J_GM8{wn^6_iTF|C^YAf}=M(b0CjKM7O8h*&L%h!Ds8P}K z9d6@Zf^k0;xa;o<+H+QX8Gb>0H-1_CD?H7D)YV@Yl7;z#f_3!_d(i`F4a;I0noJEn%ka)|#!p8U9t`#03Nn#7kR&+X#H@!FE-2lBKM zFFnS$r@#2^`19iX@Ts`gw$B93eip_#r$W zZh20Vr=ob~M4z{*cmcepcoIGaxAvT6z2n5s;4j}G&zll|h4@9{f8xs|&pWhdo%nqm zm+lQbTh!GC>7pOeaw+h%(d~Z3XIqr|=j-qPxr!GTf0oC=JH&IbeKi-Kk0*#XW?Tk} zcg07EKZZXq{vQ5@_zQ`?KOc*af7bh#;&bu6xEo*hJUVw)AD<%eNhQ6fS?qt?e8|II1YTCW9$rzr7G6iZE#5%972Z<358g(+C!Qca8t*AS3Lh#y10Ny& zDn3Dc1^%M=GJGy>+xsHM<$MtD#&K3DKQ31#KAHI3Z~y-|x_CSPRwVG_k$=H@mdGAl;>O~-;M_*KCz4U zArf!%_UXV~d)lzxMDdpR_`uzI?Y!}nz+Ik!#J?uq8~;rFHGH%9%lNjy-Fof3?%u%N zdUp{2z4$u(aNsVFo!31RxXV-Tf#^rH{32cs|2=S*$F`%t19y4m5uawM%jS=JoxhIX z6u8S{+fnYoUHn?&ZxLUG7Yf|vvHeK}aXYVDTinj;HpOk;9_Sh+imbl@i^wsbGWsqJL{b& zo`6riL7v4DKbZJs;sfyaBu_=!^Qrh+;tvGwo@Xs|PpYEjufQFBLwv4xQ~!MZdDg?! zp|p5z)_b>jW4sOS>f;_4M&$K*yGneuQr@2l;@$W@PkWvfABRs7pNqdHJ{w;sz8Zg9 z{5||#+~!YB+PN!;cm2wq!?*u{#P53E`)?9&$3NEscm3VUdSlD|{cK)s#xn%&)@#Q< zxdL~2P7r^y_z^rQaF@r9XWj|iIzilCryeA3uTzi4Z9eCp32yV@ z3+kUB@k@#CDgHLzSMuy2&uH<QeTnZy{9WQb@OveXU3aXDc#Rigsmn-lyI$C9;x7?D54UmO$GEQu-1T=T z?Rj5(5WZe~2EI{z3cf}BLwvjV`}kh*J^1(HJMp99KjSCGPvd6;cjIn#xDdFjL)J&4 zAJKAIJR_cFmH)eq`)ky>5N`9ZIPtfN7sE?Rp1I_yCH@)SLENs7HblJ8i@tqN;Wq9k z826U~cm1tOdtMQ*h|dyli@zb>0$(KlIKEW;G5me;SMU$TU&1$vFTpp7zlCoJ+>N`{ zVRzuJ4qp<#S9}wG7*7+$y5szk)cKOcA0qxQ@gMNm`_Y}F#qv}l&n@D1eYYCo4-?-? z+^);k8Mkr2z_|Ab-1YY??HM3`3Lhq(qkC$zV^4}_!pDkN!6%4Uz+Vw>j!zSBg1;f& z6HgLPz!wGX#@*_$B5+rSam2qbJ_cWp+j!aa?7o%w*~IS`pNSumJnI?9-^J}ZcR5%4 zdfR$`CBC9~t(SaUP29#kU3%}019$y>oAxvlUx>FA--dS-{{nwl{3pDJ_(^<#c*Y*S zorA^G;ZKUk;m?Q{!N&&f#@*`hQsAx*4T*n6ye>WqxBA~jomWYG2jbU=x5Gb{Jok}j zr}$&|&*IPEnLhA!vi5w2=fQ2<^Dyou0(brGL3_%GKa5uupMc*co`}~IUyR3#FTk6L zZ^B!OZ^S!_AH*LN-;X~WxEpt?L!ZE19sVMIfcPbR7;bgmMx7^1eD0opJYE&giO-Zg zd&u*y_yv5cc)FK;{uAO&@N>A0d-3!>{@=h|e~ZzcbZh+IUA*(ccsB7ycrNjJctP>* zcv10gcp33Acm?sN@Y}>^;&+Nq!|Mg^#@*`BIPl|fB%Aw-Y}| zd>j6R-55%kD zHtzeVb5n_bp7<8xR zV_x*@ogOS6Mmi*ck#`MFD3Er zeT0ghTm4IO-m2my@LHZ*f8F~CjXbwJ^@wjOUJLIi-W~5E-U%Np{uw?@{A2u6@pbql z@zwZT@%{LG@!j|u@vHcH@k{s~@wjcne*G*S9X}0}?tPrHp4+;bLwp7C8F&@XEsuL2=MB#- z&v(Q(6kmxq@!ayb_i;LkyZ3Phh`aZ3Mv1%kai)vA_i+}9yZ3R{i@W!6_KLgraZZT4 z_i?U@XUP~$E;t^E=L|WLa9#KZcRiV(BmDdY++Ba%`#yO+w>od7o(07>;zcF@67rW9 ze}YdGPsKnNh+oSVj&q0Sc3+NW6A6yrJ-7REjO`XT@pV{lwY_*-|A*R!@o$J9 zz*~x6!`q1ef%g=T)h^86S3Da2k@)lYDDkxTH1VSN4Dmwv67ib&a`CG87V*~jcJUVY zLGgk35%Iow+>K#9+~c%9eHnrx#f9C{MX_S@I{_m9=HElFYfj~d&S-U=Y+W1 z|6CP!`=3YRZvT^Tb1;bDaF1WN|H-g9ivM%B|H&)v_CKY?-TtQr?yeg>bA@?ZNW9zU zboAWn8M{Lm-$gt+-V=BE`;mXB_zwJ#_%ZxX@k==)QAg}8;qR8;_BqMz55e)G=eEyz zM0`>4BrisVN6L$5#cPYdiZ>Cjhqo7Ri}w=mj}H?cgO3xRjejA&3SS_;8(%Gc65l5N z4}L&A3H3iEo)y0$UIKq0ULB9MHLQ2E;5X|_b3B>&MLfUwUHoP7=W~a7tK#v4Pwnwr z>ABGBNqiU9Lr*W>)^l3cZC~+}_($S}@KNIV@M+>z@fqS3@g?Fd@a5u7@h#$g@$KS0 z@q^;y@gw4+@yp`#@hjqU@rUAD@F(IM@Jv61{c-D|Cw=(@k01QC_3>P+Fm8gxznVMr zg%a=fE#G-={XNQgzZXA*Z}QyE>-H_XJ-0kJiT_Fb3Vu>N$-Ch=&x*&#Z-^Jf?}+EX zV{8la#Seb7@w|n{5wC}*67Pkl6YqlO68{v>Cq4=EpMu8}pNJe zKUe&3e7^Wyd zf?=Zd67Tl6J3Y64)uAtY#B1RDCI4;mpA@fLC_Hb;ec|tJJk9W0;z#m_aUDEQ7*KKh z!r&O?x$STNB7TZ^^a4@gk$K`N@fG4P;hV)P;rqlJ;m5=~;g`jS;CI9);?aHz$Ls3z z4W3AR1D;;|SNuisJ9ts?c=W5hcqY8IcoDpbcs0Decr(10cu#zo_$T-{@tOD+;#=?q z;vHC5JH&h9zlwi>pAlb$UlZSsKNLTN$Nf3%qr1-ZNDzLW9C!WAoIms;Uc5c--suv0 z3GrrlW%2%ab@AT#TjHPLZ;Ow`JBcsEyNZ8}4-x+XA1=NL|4jTi{<-*Je4hAie1Z6N ze64tbu3BKYPnQ+&~?DTP|7jO5M6zRgeYbE|hq0skByxXrH z_uSS)NzQv(yeNLbb33owuU_}u^3)~%ws>_s`hjpT&Ux4twX zew6r|_;}APkK12O_uTTlNBm6jPWW8UEsxt@trByT?~GHafW_-tAk8dTw>--z_R_q@;LnybSL0*Cu~;@j>`!;^Xme z#LK-Do_Cw)wr{CwCkl>Bp4+~qA@R4x-^C-pMtxbtjdKwGocJ_6o%m8br}$R9i1=@K zIq^U6TH=rK#^Ol|hvRN1J`Eo(z6u{NejNW&{2zP~?$*y4zW-;n#Gfe?j^~IMZ|h`C zx6n_Be}Z2TpNC%;pN&5e--t&)7@o&nSHH)TivNbE6#oU!CjJ-xqWGV9aq-yQ!*RSU z9v!bHo)NDlo(_KlckA;ab)MnH+jXX9yzsp9B|dYJ&^JrGyASU1-1=37^ZqPe5I^X- zo!8w5PkU~8Y7l>3yb^v(yc2#;yfq%{P&g0Q-wAj;@lWtH;v4Y{;%o3c;>Yj;;s^0E z;`i|K;y3WR;;DLs^QkYM1aBo?3~wi%AMfP3^~K!>`*?1BX-ND4@i*}IJ-0mWJ~+yA z%hQ4QapLXpiJn^?cORT9?(T!D#NB;xySTd#9ujx=!HeSVK6qE$-3Md+77Q*p+~ccX z(eU$hxV!GT`(Sp@tq#4IZ!Ym}cz(%0nEWq`FTsb2Z^0*vcP$c*bEW5YA8gEh@Hfxx zJ~)E-^WxL+8{$jw$KreN_=m&uySkmmQ;Xlivx&!MUWLTd;$_71;nl@o#orX)gZB`> zh`%qMpjbGcG2(gf>9|`Dld^{KUrT%v=DW*_w{`Myk1&3(_y_oL@mct3@h|Yd#Mk1t z#8=_bj)doT*UtlZEb)DK3h_ViG~yTWoZ``XhUd#8{+Reu;%SL5Bc2kkfV=gUvPhVJ ztQT+Bnf$3kpCa*R-U)q~#Jl_8I?t_N1vu{}@jUo8&+WYKKKQfemZv)LzlvAHQydM) z<<^z^xp0|0w>)Er&nn&lf6;RrkNdfBg*}fNPp|NN#l;KZr9HPi?mkl4bIY@f`0C=@ z@H(Db9(NyU?78JROMEl&<9I92Eswj8^bmLVk>TR*KJuBkyN}Gq-MXsVCOAoOtd@9p z{om@jjXzrNsPM=R@yCpRkK}Ja{v+Z&@QdOf;7`Q6v`HyEF-?9 z_#*sW+^vTL)PK0dZzujE@gMNflIIwCW{dZukDJ6l#*d3%!Y|^k@9)v~yPn&8e`P#> zi~o#AKOO`Hhr2J_!()rzz!Qt7>Jxr_a`9w%2JympX7K`eF7X<8KJhB}OP*WbT^(Nb z-0JWy@nyx^;#F|h_bb%7g~Y#4d~5N+cn8UIhdhJDleP})HeEb3zDB$iz7=1J`z7Iz8pU%t-0F~n_zdD%@a(wjyZbp0WhA~d@#V!!;guy%FZ$I){Byjo_&j`~_@DR;-1U7l zeP8Oi&9@rkSs`8tUoYMX-z?r1|51E2zEAvP{5SCh_%ZQ$_<8ZI_+{~p_*Ks@2J>}w zxa+yq;XLtwi=V`!pA3S6!}a|(bxx1Fbr`*07?(*rf@hOFkH}L}ycE~}2I5umZsIfX z(YPB=BF;NrJRv^y|KwRJ@fnC;A)W?bBY74wo}Jy>3rBgCo|d@OM|| z;>4#D--H(se}#I!CjJB7NPINo?15X|Y~R=FVS)aY_#yQ} zPjx2z-JRF<_XW?bZo8>ZcJUo}ZqKd$?)h&~&n?d-;!BF3!K;WT?jL+naMU;x_1k~@ z(pY$7@yd8J@mKKfp4&Lx^XoyL+c*akKUBOYKEm^;^_DUCqTu-SOw@1xjdLCGlf;+e z(>%94?)mjR&uu(Uh+iOn4`1rJ<#ErizxUkozZkd2m(^i#m_D+>bDM83=Cwt<8@^k7 z3jUM$XZT_9b@*}d)%XSRBlz#)2l1QYH}SjT*YJm)`*nzME-Z-E$Lf$|VEB0)@q~C1 zJX#QDeSb`yb4h%5;`51T#b1&<@#$lA@k-=xDPA8RBt8?Lg1dPY;=Eso7r^KJpFHa% zzAW*Z#9zj@NuGs_=U4HyT>r0oZu_)>(Zl*DJsKvUsImF%n6Z)F?KH}?&U&P-Q zcmJPYU)^W4Of6EEf7)FW%Nq1L`ne-2Fe@Ys5Pezh1l@zDIm0{i;BfJqiO+<)I=K6CZqMzHlR0mG@dE_-x9$uJL-7uU(XwRZhzcHo@U~k@m9F2!w%}uRs1)6koX1s^Z(@wvtSz)OlJ8Wem{aJ(WO7q20n8?Pgt4R0o134cetJl@W8o0r=cb@$xn z)tLC+;tlZuo?9NbFB;*w<>^fPNb!#NIL|GQ+ZW9ccl)9x;%;BG0e9>C1nc1!iFeOm zPI_+R?8kV{iub}VN&d6szb*bRp7~OcG&tONlJUSUDqf!Wx}MwpqLY1LaCG(DZXpeb zA0*xZA1OW%pDg|*K39ATzFa&5x1OAfuTD&;!`W1_Dw(#QZk1NszAA;juiEmCFhDyA<{*3nA=6i_q zju$_GPx9RU=pGldJhwboh@UHd8DA+LgN^bU@yEn(@ZA3Bt`ECBxASHq&tCEL_yN!D zkM8<#5_fg}jJn;Dc=tGc?75w{F!`hX9()oUZv7O%W8*IWWb!8$Uxeou-;7rf?}RrI zABwjZ-;ckKyT`@aSmC%wNqimR7klwG-%8BuTk-Pvcao<*d4BTZt%MDDJ?e{=ay$4@mIxX+)@S5j#-oMCSMf^{^rsQ8k z{lL(GqCB6&scf5F;Z?yNL!XppF9}fxr zvE=DVo|MD6@D7YguD7Dc{%)xViNDxZ8^_vJZTwEMe&q)HP5Z@?*3libIVhR z_(tM+@OI*L@Q&iu@c!Z*@xkJ4@G;^a;h%~R$3OSne_YJ=+~)NK@$u3pcueTIjdMNY zNg}=mPl>zyXUU&M{9nAbc(T$_!-%vJFHigs&l3irqwGY%G1K#?=f%V?7Vm(6FFp|8 zDgGsXP<#u1Ry+gae=PnIp5S^oURUSg$->W5;jV5G@)!5wt^Pk#=a1!J)~$6}o0ym(um7qSK)f@7-0H>VEYO1!)NtoPjJn|f$en#gAH z#v)^;eQ-Jt`;<@ni;&t%L;#Kf_;=S>Q;+^n#H^O;X-)%fI@aM#*;TgnN z;+e&l;046@@&U%U!FSo}47xaZb) zw{IEixz)KT@t=t|!Y6xfdECBbj^~!A3-Mozcg7caZh74GdA+#1KJOKG*XI-B?)lVJ zarf_T9*Mhuca!jDFo@uA*BSTkZZe3wpYM@Z-2Hry(ztuPS1%Lhtt;{Fe$m8ptJ?tT z(_Fj{-dgh4A%8dV$@q5hMffrC4P0mLd2aWMFKr^hk>*zTyPMaq#Ag@3jK3uQ5HBm9 z`js$m4e>m9Bk?kL8}Zli?&7WSA>w`U(c+`Rjvo{+j9;zRKz;@{!R#TVgQ#4qC8#n0jg#UJBG#P8#m#WQ>m*7=HfD*T~%8T^TO z3H-UcVLrG2X@w^he;dy%{vn=Cd?=pFbALZj#PbWm5M~fxLVOzjis$}*po-^~=R4wS zh_A%!dhYKBT8O*-Kv!|MA9!Ef?FYt-yZyi{akn4%R^06ewurm^zyaJn?smKy_V1F! zyZio4&#gY&sKZ_H&Gn&k$EcQHl z@cBUEzZV~m?-c(EKPbKhKP!F+|4aOL{2%d0c-;F@!~O65;>53ppQjYhif0ioh8Ga8 zj+YjH8?P!p1aBZd7k@`QD}5O({t7-y{9Sx1?ymp&(?_C?xe_0tK0Ca4TW`l%SG&az z<447B<0r+h<5$HKd>D@BhIkzOU-7JXj0fS@x$9mgJel~*cq;MYcy{qO@Lb}x@e<26;kCsFt60jeh{={Y$(Le%o_9uX|iS@!axECO-N< z!6(7t9;ct;NyRtdDaBXg*~HJ|FNz<+Pj;z{vp;wAA~;)U_1;*IeZ;`Q+^ z;_u@<#QWpJ#OLE9#Ao4?#E;-p#ec!)i$B2^i$B2Ei|6<_tmkI&jQD=>H}Hevweh2# zUk*~c>+l87ZC!OB{&(><_%+WhkGl>(@Z9pePy8eC!FaSsK~QkG>wvosClzO7Wu9ugmkACvsc$bVV<9$x8jcz!pY_!Yy?8;d_zA@ts!+x_L7ePM7+ z@!YPfS&5$~UL0Q`UJc(Y-WJ~{-XA|EJ_f%mJ{!Lyz6y`_Bx)M}JsOxD}5tz7bC=ehPnH{1~2B{2^XY{2pFb zJk=-R_$r7e!(SJF32z{tA8##Q3vVx84euk~0UsdV2LDFHPW&ba!@gw4M@e|@R@GIi` z@ay8+@h9Ss@aX)<-|>TTSzqqqNyW2`3hS9tJOiFhye|HtcuhQ?=l;H^gy%M%cEpz! zZ-tlh+}{_~@Z9nYCccjNK)k-^mdEXjT8q1VQBQHVFZw{-?TaRgyM57Iaknp8CGPe` z+r{0!=#aSE7hM#0`=YzJyACw09QHSU^l&3#>%r>a_CF~-w|b7IZfV6o#xvqBe-rZO z5uc5JBK{pdL;Mogt970yw4vK(Cpb=eZuE%wKgE+&iVBb17te~vj1ktsjsI0Vsdzm+ zqj+09k9dE)g!mY|qWB#Ab@A`;X5xGBj^d~AzT%JY55<#G|Ifs;;WNc+;fuw$;6IA* z!+#ThjGq@zStab(P28=|B=r4}#CM_Z&&Pbao^9QJ#rk|fd?sE{d^27|d_7)4`~+TE z{0QDa`~m)!_#M2xc#6?s9Xg38!3T&J#D|FI#m9=*z&{hOg3lIjhtCslh5sNv9N#HE z1mBCh$6a6gSUlF#K3)z=&@XFPhjO@!FUUH1Q{vrz<{i&%J$%V|+lo)cJ9=*Cb^Dp# zo?D*piSIAI68}W}G(JZBDE@_bj4|PSXNmtq{4(*p#IF>85&uEFBED0+9KP3ctAl$T zyruYPybJE`3j?wR zlM9ai5}%p#PWR%iUsLJJOz}zhQt|Kb72?bBt>VAnJH&s&4~hSU9~J)-KZU!#WM^JU zb8ve7WU$;{uiioaq%;FCGp6(sPvI);*W@LB%Xozrs8Svmbe?| zD#kg=i??~zj31tNlEf#e9{OU5ch{dap4)s&aNhOeMer@2+j-qSZLjB+rylY9#cScG z#e3uD#k=FT#K++G#6Q9R@!ZDg_EE7DJY5eq&UwVg7oUwM_T2KgeN;NnEzf%5Gm3wY zXZ76jxP8=1;_iA<4tIT@MjsnUyt{6+@Z83;oAb67-+_0K{4>elTYN1(MSLH=M7%V< z+jF~al;*ner{_ksh`%r15|5cM?3b%US3IfsCwNBjBX}{~9wt`*mC1t-!BI})BgD7# z;;o)%saqTIlXy?@$9P}yzwtr1n^!E(JHv~&aTe!sJzwG@HEi^e$YzOmkLx|2+xXLt zj|z|cES?fS=((NOJ+4oCZh49le_p%*eoOo<{GNC{Jl1nze{GtUe-Iu|yceEEd@i0r zd=8#Rd<9-Wd?{W=d^cWRd{2t!PbL)${j`i`}>X!V|;6rc> z5KoN1@44l1*RfHaTb|s+j}w0ppXj;eao4fA;_f=OO5E*>wu`&xgNMZ3>$exh-Rrk^ z#og<-u@XId-T1s_FxcQohr8>5yMAW(-0Dz*`Q{QYg6Eg~)5-s`_)qw7@w508@wz;& zS9xyN&zf964|{IZmiP93-JVrpRL;_JdOA-cy{qi_)Fq{<7LI4qYgF1 zGvkfKOW*L+U``|;wui(?gW7P_Z8XOC7w@#*~3VoHt-{89an-_2Er#9>4n0R&k z5Ak>LYvOJ3N8%&!NRnr-pTqFP;&bri;xq6p;+yas;_L9D;>Yol;)n4n;`i|y;pe@KYAW@{roJ<^Rswj{GjJ{-v928o?D&* z#Ge<>h2Ijdhu;&gg~v+zY=8UU@x;5}X~bvX8N|Q9^N26T3y9Ch%ZP8o%ZqQu>xv)8 z>x&=8TZ!Mn+lk-6J9%z>`S1Sdx%K6_3BiZp7$6=Gf8TS<^WXi^bIX&B_;KP{@QI#V zp8xKT;{V+r#s9lMivM?i6#wu3DE{C5QT)IAW3p$j^E+z?gAI;!xV!HCcYpNU>hKct z%_W{6&oBA+lK*A#C-`vjq;;Z(6PY6ZK9B2Fo+k`K8@v0X=SH6se?fd9ep7rS{zUvZ zo*?OWn)D82x9 z>m*^!uwSbrJ~Q?C&5O77QKVHxxgIH}Tx^ zxckw&;_iObPu$&)J{EWPqsh2ihpp=dCkc+l67TLKYdp8{-(sBW#s9*$Nd6Aw|4Do_ z9z9jK9^80l;;F@_vi=KuZugP)+(+K<+-M2$&BeFkox~5}{lstKBgEq~&I#f<@mb>4 z@Fn64@ZI7Y@FTdpZa>NxOfERiOMD&188h{>kFR)>!uYu2vGCO5+41z^FW|Yw%i;OO zU%_7yZ;F=_Z-iIE-Fm3cy!v|a_PA)7Eu7adiSN=V^r;f>u9I^;xAokY^UfFVi7)Zo z&g-s|YdyC-6N%p-J{CXYx#e;HAIBBXEzd6EuZyq5?|5!`-2Zp^ujiJh@aMsY;E0jt z+4YbWkL$VRao4@%o?D(_#HSV?jA!uN^0@0>PR}jRbmH@fPr?g&Zh72wudKMc?$yHG zIyv-K@I}G#j>Nm`SZB{|JnK1cH}N%iZ^?g*{KLg>;N!)kHI5oiWPx}*;#cDKxV{+t zS{H^#ws~&j{FU+S68{-LAbt-&Bz^-wC7x<>c;a*7$?z-Uh4Jg+1@H&r)$vE-mGNk4 zpRP~84)Hy=I&>sHk$4+CHSX3!BI;aF;ztr+MEqmCq~u9Wo?7Bt@pr`!;3LFa;Nx-E z_uce;mghF#DU4^X_(Xh(_g^@e}wa@gw*y@xSrC;&<^w;z_536FVyY9Dd4k z>$|JNWzVe+1&O~Ro)^D^yS~@uara!hr|Zh{R3tvBcm+Ha?()1vo;>33(#I;|{qR=e zX~;7ex4zqW>T=#;;yT=B;E0?Ct=@vIe}-6$ATa2)d7_E+QCU)}ZG zXcO`A(}#6-bvuP;5|77uOW{@ryUth35PS%ZiV}Z@{ZAV&-s;wid36x)hW8a8hYu7V zg%8Eu_%Cwa8D6}NbEgeBIOa=yl6s+UmUy?{*yFj4e*x$HS$rOT&~yJjc-nKzvzPeu z;+ye%o?9Nb--wnW?3Y`IDW`_xk0l-tkMFtVar=#wp4)gD5T91O2A*Bfb+v3^qN8;7+$n(#x&q{ba@z(fr;w|u0;sfw>;(hQ;p8Iup(Q~WAG~)A$ ze~uT$UEd#5=UNiKjQH2Zm*5R0Pi*?wQM?!VKNkN0pDVr#-;BHKzfr8oTRpcr_xw7ixO-i(xVU>=v9h>(pR1v` zd!MVVxO<gS@Ff>sfxS4pQG=M zJh%DIW;{*Bzr@>!Z^t``Z^FBaU&MQhpT&oWN1qjmI);lsA%3)Ydg8~6r^Y9FZhd!k znBlqAp%n3R#EapJ@MuA-J^x)!owrMTZQ_3vuZjO8dDfBVy!d?{*O9Dt#;5;pUF?lf zpGGo>zs-8cfxEunr0>N%xA`_@Jf*}N;T6P(;FZPu;dR7k;`PL*<4wgk;4Q@0;vK|) z!#j)rf_L}a`tIs5z;mm^UE+s`-^4%0qXn_n_qNn|w!|lx9sFf*%oC4?FO)o8$g^2| z0)A3_4*s|JH9ST(8)5i!Eo{D#IQAj_E!Cl|)QRl%Dzmxc3;@k0$B+tL( z`9eJFTI*Kux`p^l_;2EC@iVx4yeFpbH$1ob9%4Lq#1G()#UJ3&vOir{mgg28Up&nm z8+{~_NIV6eTD&NpUc3;VO}rNVqIfktpXb(hSBGMr+xXujzLa=-yaMj}zLh#Rk@zvh zHy0m;x0XD6$kR_e<@Z*#;QOA$v*9bmJL4O0*Y}L{eV^wx-|39!fcRwmnD{3Al=xcw zviNEIiuiH-j`$<|f%pSFT8?MeTgtDZ$`gqto)nMox%J)EA-U&PhhoI17B7fr#$Dfg zQ|FQrU!V9_#NWWnOP(R*X(+xO?;(B=A19u1U08>yxa)g9`o6&P3qj~RjAw~>Gkmr9 zhxj`2A^0}&dH62z+4uqRAMiusoA6WOC-8IPNASy@Ti;zBZg_qn@W;g85x4INnM8C%iZA`d*s8kMP{)o15{B6wiTA z5U+_(7O#xY67Pi16>pC(5&r~VF8(3DPJ9l&Nqh#r&GU=Fd|e&(d2V&sM*IQsjrcL# z_3JQozAf?RiN7y?7Jnpp&X6bRi{bil`Lpx9KEHSoyqfqRydLiQUY)+T^4#Wolkv0@ zzlL`ePcbj}qTuK$o(LZ#UIZU1UJxHCUKbxLUK5`z-T|L3-UgrL`Nd$|t_}-4w>pd@ zeu?-8_-fqsWdwEJFYybAKPdhUepK>|BhO#rhwucso?Q== z#8=|AJ-7P2#X5P@bIY@f_$K0;@fNtN&q&78S$qaQP`u%H!AXK+is$wc{Le9>II_<3 z=mEV){4Vj)_#yGx_&MnDr&HN24cb6UlY%TR}s&SHx#dcHx@68cMxxicNTAi4;1f>zc1b$ zA16K@pCCROpCi5)|5|)L{+;+v{Cn{q@Q1j2T*sr2J@P*7_wW%>76gK!uBYTK}1 z6D8igZ#C0%yUv{DykCi*#OHf%=XLK}t@PaTJSKjP_?tP*>o?D)x^TY8J5KoU6@!ayb_le4SZh4jyUqO5cUd40E z+I^^KZseNc=8dN<71YFn<~GRCs0a(s*_85_moFM);fJ_3;+s-SF1po$(Hy`*rB< zxz%Aj@x8@I<3n&)=N28pI!uxHdBlGq{uMr3@^m22YVq^*@u2uE{DydgcHwv);_-t| zZN3ZXd;9|7AIxpOD;ZBB@nv{w@xyp}@dJ1^@%#9T;*Z0QMxtGMZB)*?`b9}JmX+@q1;_u_%ijTwhiT{Zo z#^VQbviW{b-!FS^^X|w95?_iZ65o#}6W@!c7ykp# zBz_Uk=DA;oe4bk!;w}n41VzlWa_e;2s*Z02ExuC>fB)*9FdAy|L8A_g7;#=`|#Sh>k z#7lGtCJ`L7aqGLy>n7)&Cw?7Y`hW6lllXs$-zEMJzEAR0U_2+phjE{J;CaGeoH2Rc zo$jTuU#`wGi7y~tfXB;g;y>Vx#H&)b&f+;3=P2<*_*C5Lc{%u1l5x(J_#`|}+2O@o z{ZlOtJ_N^Z@nrZ>@q+kC@x1s|@oM-D@k;o=;;rx)g`YkyY@E&UWa5MHRO0>c^tkI+ zO6pM4i?_NR3QrelDDlhQx9UV9oh9BqU+e3+jdL>R9Vk8lAL_ZC*F9ew?YZSyN&I;6 zCHPG7llWKSzv9V?Jgujl*F9f*-g7%|^(DcF;CMki0iMHi%j2G}74qEje7e-K1p6ZK zj(920EsuM?R?&0IQ)!uH3I5%@czL{*=a$DkUu)#K zm*73czrhEI|AY?}-;IwHzk-hyzl={7kNIsl-|6BJe3s{a9Ts?Qb;wTq67fv+?*b67avoZ_|cLb&Vu+w{Gh=QiKh8Bay= zT6it-p7`tH@8ONbC*sY-$K&n9m*XA9m*73c_uzfScjALQx4yeNjPTs*aFzIx;+OCV zxT}9d>bzLu6D$w=@~wD0{5#3hoIHEP=ix%J)Ep_S)Whi{2* zC%zc(io3q&q0XO3{7&M>h;PR~lRQPp^NskM_;&HO_*wC#_!ZpseE@y`$MXxpd=D|6 zf5i{raZ7~j%JNwJ13aPlEj*=oniatp1xH%(6nGZ#qIeGRLU;l3T6kgcYIq6Ht?#Z5 zm-$Lhe#CQ|?{vmp7?S6U-3tHjFPh6@Py(iS4Nd5l0-Zy zp3-ycyQ@P+&#ewch|eNk5YL0Vz89d*uStAW;;V>P#%oHR669$qUU@_~ufgIC@M+>h z$uk#seVn%W{$KGcc-&IqI&^&>Nu6K7-SsdQ|2{6ecnmzZw5$0{6yln5Fh>JaGkjIunCWYyFAUvlR-HA)KO_DZeqB5f-(PfFJU$-%m1o!Oi+F7D zYt3I{?z!dJLHup;&3N=O&-U>;9$Wk(UfFXy zuX}y+b--p&g`~rF2^W5^d*C+dVZh1zKXOQ?X ze3<8!$Gtu|#&gT_CGnq%PsKm?-14~BC%+bVuTQQPcdt+G6nC#r9u{}6PhQ5|`drZ? zIC*e9l6d#JVw|$i_I(xOPays+o&mc#R#1He_ z`tIs5%5$qjX7}%9#53Sia995u)OoqY7bE^V@gn#-$@4dP_KTq34zLWS7ych2J-itbqmH1o4elV`E`OnkTaQv8y5q-QXR z;D}Z}To2ZFo7WT08%z8Vo(OmIa(S{!d_ul&Jg0bEJg?+QzpL+u|6Al zZsX5N{-)xY@HXO=@DAdy;@!pD;Jw9L;zPuT;lsrToCJ}tHV0t z=ZLSy7vZjt?Wps1iQiBBkK*p%h5aOXx{~L-_(42UAzW|nyyx*W;=i&#FXFj9-^|GK z%?6&^Quv+tcf`|jo$MoC3Lh=L8J{g)iO11u@jLh-@#kF~aC>~&{&P)=;6rfyCGiD$ z{*w5$us-hbc#8Tb7e9t)5r2s15Wj~P6;Hvx<0vVf1g|2VAFm;v8*eOL1#c$)8r}+b z>!B!hnC!({-BLzXEqMK0;+szo|6;AgyXSG+Jh#V1OU}DX{B3-n=XPHAJno3+mdE}3 zxD(=i$a6(}K7L*N3%pdtXZ!Ac&QBTfGsM>vFT}sQsVklppX0gB%RNtA;(63{gZSm* zUGdePTORj3ajWN+r`)FCLvZX6Pmb^L-14~RiNASndD3nUJ_N@x@syjRIC9!^%j2FW z{^_~pDMA;;qHM$2*H3#=D9Ciud;1ufq_}tqu=}A1;0iAC0^Ex1-MU zBtG?)AX#uM5KoCOl{{U^vrT*zepY-N{#ZQGm*IKiRIw30{kb#+=zD6z{`ujk5?D}60a>j6|d*H_1)E>spnRQO~khlUyFCZ zqXn_H?@CRbhe`Y?;zx*|z(+}*%;cFR-WuO1-UB}-z7fBGyT^MO`hLfAo9|V|^FaJ} zJX+OgK~%VYEI#qps81uY#N*+K#0%ib#Pi_k#cSZ1#H-*hinqq|inqWEd2W4obtvVz z&Fe$r%ZLxbE8|v;=)td@)VaCD&n3Q<_#C{wzYDk0+iR zPvp7v-PIwr=T?Weh)*xx0MCZIzW+*{Uy=AO#FrEAj8~F8=gHGdJi*Mc4!y)P;uFMs z;9ugd?@j3Y63=bEgBZ_p@qYL^@mcsL@oD%j@gMNL;+yb8;%D%q;wSKP;{V{6#P8x) zJh#5PI^6Nx>X3a~@F6%Jh`)eGs~)a5*Y|-lqrxNUaI3TBd71c(;wAB{l4mq|N{g?; z-w@x0cNNb#D>!*@^v7M_-=*&(J-2aIWjtfWE8>&IJL1#D+u(D>$KdnDKf#xaFT}qS z{~F&Uz61Y3d^5hwbL+dS!vW7P1b&P7L*iHQQ@HDUR_c6T;*)L<;)(G{4a@NK z=SCJLPg-&J^YjXe&z&9h<&j$Ai^=mQ?)u)3zPIz-=9`J}bQDjI_Y^OW_Z2UV4;62T ze<=PoK304%{+W1xe7g8#e5UvWe6HsggZa8TEb-jxu$uVg;w$iVxOFXh@M}4BJ}B{f zi9aH~8$T&|){*Cycm&!BM?603p(yV9{&B3RFOO98+~#|X@l+N6 z4S!wyG2TG@KHf|`(~j`G?}(?zJBpXayNH*-`-(Tj2a4ClhkAZ77>cXINYAYf?-M^( zd>}p39=d@EjB{7?K<@k@AB@i;rfan=-%fj1D(g1;r60dMBH_1)E>o#$4E z8pL-LuY&i)UEgEQ3CB4`;#(5`sd#hzbIId=-sUp#(&XPMUK_tCegu!y4%dVA-R9Mq z^TrhKh(CwBdAU4UBz^$#ImG+mc_hzy##2h%{l5SWJh%5L=EVr>*~jzf0VU=2iqYal z@!8@R@YUiIc;2*EybJs4v*Hu*d*WqghT}|7=h?^I?|5eMrnAEMeB#@AoR$~=k-9Y( zpZisqr<3?aygzPzza0EJ&;Nt-p~Sz+`vqTn@wQGrqmK*4$Kv0MFUB{D&&T(P@5B#? z|A3znKZ{=wKZ)NK{|CP>ejkrr_u0oq%3WbS>E1WF>A8(F$B)5>;J7Qkn>-Iaw><8BlUQ#&t%K$HXSZdEMB<5W#}j#O zdEEOZX+5_*_xD(qNaT6(+jthwEsuNOB%kM&C(&NZ68zi@@%VT#&n=I8-=vbbd*7si zxO?BEjktT?q?fpR-{eDa_rA#_areH-JaPBF$#>%JeUlyH?tPQr#B)YfCla|N?%p@K zCtjHN*!9A`yT{{|?craf$K7?$y`Pf9bL&fX>YrQu1-yXdzd`=e;%=YWOuQrenLgsH zH-+OIirYACAJRTf=%0FS<1Ed1CW)88XNWh$=ZH7L7l{wXmx=es*NRWaH;7NhcZjdT zcZ+|E|LnP6ha;X_9S#zILVQ1d5qIlhGj)C<@qZE@z5cV;jo2|PlEAIMLdvjQa;}SoQ_|xKF;TI&&o8 zKLvvdj^5%)@FC*)@ZsXQ@Q*#WzPmbn>bcdS2Jw@`tKc(mSN~I+gD(n>H4@*F`1Rt= z@hy_)^5&>Ck>A7Jazm;6re97mtY#z^&T$ ze6SPspD6JeiJu~#0sm6+^dip+argf3e)0LlUlqTN-@{$s=hFAsZ$`7}Ma*r!c^OZ9 z@fY#r;QKXTtHU

xgg0 z8{t+x>-!_>+)d(-5#LMv2;N`v#M&Ap3yyK(x$wo}CGa1`hvL8DuJ0@9`+3i8zP~e` z%i^{1fq=_&jmzS(n| z?^ldxoA^w8pZHGvfcO^tnD}MMR3KaW2UPm4eH-1_e75c{oX z*I^mr3#^unOEYQ5?@ffFII z*7tWA&n)q__yX}U_!9At@YUi=@O9z~@NMFI@Ll3N@dM%)@I&Hf@ME4^-(4Nfdv0}z z@k`jp%i{l%=LYWj{wZ~i*ZA4>lZHIciKoJo;VzH+euJFihk2g3cuVZN51e&mi$E_z3Z;_(<^z_yqCx_+;_c_$=`c@wwtd@g?Fj@a5vu@YSAM z-(4Lxdv0~uO#C+S@9}-O>sMv!d`aR@6aT09N&LFxsY9NaO~Uo#@=wHH5dR8)Mf@+k zGVc0*gT6QP{6a9_YmBF{_#b#H@g%##S7v+#q;8W#B1V1#jD~Y#oORx z#arSNJh#5PI(*@|)nNqjv&7%W7vQch_o?#_5(aM$;L>3b2+ZN4iRPYLm5cscRIct!C8crEe!`0L`g@y6mQ4n~zB(o8%F z-cCFp-cdXk-qrJqL26frex6$$Y7jq2yb3-7cYXhwI?t5&mc)N0-W;DVd6to9qxeDm zxcGVezW5OC3(4LN*Ms%l=GB?=rV{UnXY%}F5T7V+@F6%}^xX0cAwI8oAG{Fm>NAq@ zlo6kS*A&0Z`%UdVPZ*r{L<}2E@bjNMj~;yfFY%Mb)A4%dT=4?%1QNH80-Q&wSR&P4T&Sq#3EzcC< z`-^{uefeG7!SS>BKjb;+x#e;1=brZ5@+>0HdGSg3 z@19#8_kQke&n-`y!@-B(xG$apf8@F4aqs8GY5BCjmZt#m3B>c@Nj$ea?)}`%;_m(2 zg5vJ|+*ifj`?+;+x32m&3r-RottH;Q@7cw38-FFn*+aZM-dFMuBL9ct)9^3F7vZbL zJHH(@%*Ynp?W698968{*jk77^IV9c)KP5f{KPTP~zal;pzb-x*O~)p7<|#BF}A~aGyG)_T1`lpZN6RH}Pz^TMxad^D7dc>PUEka^flQN|I*?c^Zpv z#rudKz$b{Obn66neUBZME3(9Mn{PJ8vs^qgzD~R{zDc|szDv9sW;8a!I7uz;@b8L4wR+}-z25}#50IG$DVPfC&O1*0GCt-1xr+$8%ds|&7;ypI*WJ2M~e5y z=ZOEser%KH_WbC3+uQ}mS+gXF>&A9d5 z_76ETng&0oR^pqp|Lo(%TVH;pZUe-(<0Hk-<7357<6nwD#%GKFjejei`dHNPBdf%d znd z<*7-0F7e8E3Gr@tY4MJDb@AzVZSha>`Qj(=#p3(%{hr&r+`j#|=Qgjp$Ab^Saaue% ze!+9g>J-0k#Pgs`V=RJtG#~*lZdECA|M!Tndv^*6~T9!y8j(Ax-q34#z?b}m( zZh77!KD~GYJhSJP$L-q-io1RLtKx3oUPs*R+nbBK@5AgO?!FImh`9Sc%yHuG`!HvU zyYItXChooub2IK9cP~y2CLJ7yB;GxbIODn1vnzGGAl?c8L-OY%|2^@6c$W5I-`#k| z;4h2+Jt=CKk&3uI-t9iMI%eqgJ-2aw#CRHs564@IFT~r3&&A&p--CA---!8~E3fXDWHt zi*Lk_itoekil?3&uET$E*Y`v8J@LEYIx)BTMox$E$;BTr&J5yNiO($lJf2Iu0-jI2 zEM82!IbKS<30^_GFJ4)^CtkyI>$|H%eb229lZkI6J^^ouyS`_p&iy5R3Gsu)7vaMt zPcHII5wD4VC*B1AMSLE90(X7CK;N%=Zu4Ewcy5TV!T%ONjXxGYipTEw?7I3Fk1zfY zo?JY`nP5=Cky<>>nJA8A7B7lt6EB44^4$9F>hO~1R)>1T7Zb08m&IM*ms009CB6gk zO~l*bEhNtx^7Ilvfqy1`1z##&V@f3ISc|*9-=^pN1b3pNO9q zUx!~7Uya`oKZ4&8KZrjTzllfd^z3@OhR62Y`tItG*mJ8xlC!~w;7Bf>5YK?SzSpPD zMI}B5@g>Ex;bkPx+vIsed??;Ud^|o%{5U=dcYTi@E1cIn&uzX%7|#Ong7^yYhWKjn zy7*@C-uO20?)X0O3HSl=ariOu#rP@l`S^Lyt?#Z5S3S2n>?Qt&_%8f!JeuVQeqEr> z$vT@n{XXP##HSKJgQv$`o@?YOD4vvkdrk4oczf}g_;B3%Zu9z!^L`|L6(9e9^30R? zN5n4>e}FHQJPR4mR`J-=tXe_6y*$>Jq^!>qp4<47pR?#l(ZSZO01$e%>+H-rIa3bGFany4g|7*lw5^ss$7Vm*a zx`h35by$d}6#p5|E&ew9=d$A0@Ot6{xc{~n&%)z+kodRwXX2B1ynHLZ0pBQIae7ds z;Mj}X`n1g-@L=H=Q7hVs1;Kkdz`iZ_j65ov{==$vX{}WFvehJSg9{YUMa3Wd6 zqvM6dGvh_YGvKd@m%^)v7sDHh*TWl&*TFl8cg8!5cfbdVe}um;J{+HoyY<-}*5BEjH=p>I_)DJKdEM)@uXt{GHWFV>d<|Yl z{4!oo{3L$Eb33nl9rYj2?Yy1rLvZ{no)VAQ{ptF&JnnVW#GYGd-b!`&&w>i{;KCj?se2U;_h|S=Hl*k z)Gp%gb<`o^?se30;_h|Snd0ts)MeuCb=1w`?se4t;_h|SGvYaeIoojqcaN8Y3qz09 zL;9XL^dz2JonND#Da2pJ)8a1wQS#>y&%Y?lUsk*V{-*dt9;YpFdz{)*`#M&br<>bu=pT+n0Oz2l=zqUIPt0Y|2n(#IGgJK58#KfN2bV9g_*>+&;O~0e%)5@mOplv!*o^x3 z$=AUbz;*oBW1Kfp{m-c1Mt%hT73E|g=LmV;C35~;BQFlWf0+M#7!NNC*LnXM^IpT_ zroU&=o=EbO@F&O%pOyABAYW8kmDb>82>IM1f~1*o4(o(BJja%LcB9r>!IGR_CcH^cuRzlEHeaGm#q znD>Ii2H6u=Nx$-_!aWW@ayCg;LfZ5^Xe0L9`fbzBIJAE_mS^} zKS+K8UYvj%=LxvZdsmEe2db}(oEOL|!MjpUFXSYU zpTzu5BL5S)2pvTQP>_vSM@*VIJaGm#}80Xql|2yjIk)MS} zQBEo3v?cEb??)aFA5Xpxo(k7_&y~;ZoO?_2xasdrv}Y0dRro6Mvgfm7oYmwd;2X&s z!?%)0!FQ5(f$t&j1V2cg2tQ0d5PsC-X5MuiPI=sn!&KCtBcBAn0@r!phH)-D^6q(M za+aVzoP06-0l4PuLQZw^^YCZL|AO}7xSK>jCu5%~ibr2R|D?}M)szl8h*d?n=!Mb4MxTj0mY_rh_Xit0cK8q!%zsC!^lpZEN$>X_QdL8w1 z$XCKYCEo}CihSJ1l7EE!G5lV3k-RfJ>^1-MZY?~V{1>dxA0~en*X4%fW$=7&Pre-9 zi~I@9`v~$j80Q&qGw-L}uUwU+zw@a+8K2i=ch*zA@W)9y5wu&4air+li>P1dK2UPA5XoR zm)#G`ykt^+)@1SPRIi_lSANd~mJsvm5)z8IBc-+kU@2G!}{3QGl^5R+2 z&YI+f;LAO3j;o)Gt@XG$?nKmoLEa3$)#D~dKNtJP<0j|kCHI?q`*r!Gk?ZGT{mAumv61BZx!6Rwp8s#pa694N=2N}?J%Z&PH{*5z zf}$s>yh_@KTaNm2gt|3o0E@#xAM4m96EU1jKe(Czd$}4-W{&z z!#Rxet5m-Q^`pr*!BZ&b3Ub~fub(RO@;P~P`1jo^ql zxEY5Us1GNv0xu2MdGC*Lu1oc8P~U*O6}%DUBq1k;e9|=O*8uXl@VCi-Le6x!&U-xO zeWAxqf4iVPOUOIH)5%A}*N_i`Zy}!x|B`$*d^h~Yi1;%Lt}@^JVh^2gy*$?L%9kiQ6@N8S;>gggnpf;<7fhI|ry z1NqzVEgm=XuH&%7<7ON_LH%y>rSJoAov%h1=krwmE$Xw#_rR}GPIKfG8ZYOM=D&&g ztx7%v{uKEs2ic6XgP@;j&>NL~i@iR2~W!#!^1UB@BW<7ONhqJA8C1NbDk&ij0f^T$*l zgZkyDNOz?oINF@Z4|vUr#ir zBwTaqpuP-wO?U+5+=KSiCU1@3TibZtyzl)w-uE8har3@+chrw0pAMfwz8=1aycd2C z`hvVT)>q$=N5PMi&xB`@Z^r9(-iiM4jK}lzLGl~$+T<57zpdeB{LTAd|1RTxb8nrf zKDC4RFi*Xi_m-HKk>t(cZ)hE0to=o-nKK45vH}mD(lKLs+ zf1^D!JZ{bheII+i$4yRInP75NzWI`a478_8$Fw|U$<4!b;_eg2?+FZmkyk8pEVnCr#880U*re+2cH$q&J= zQ%*_b6rSWi-!{S@A^!&6f;<-99H>v1#hIu46HZpPs?)Gs3+3I7bP^WL9?@YfDads$Yfr zp5)8muTaiCXwN9}aQK@RUqHS9*R`!4uk7|K#{Dw4V;(op$8D&;LVgUMcZ!UszHTR$ z&yI2Kf$RLH;rd&{Q*XxeEA%&#JOdt0ehmH;`A_hca6Rr~95>EWZ?042OsC!3AgWId zX0tP%>h-!m)#Il9H*nmUmg6oXzZZVa<0eP1`&T_~a$ZFJ zP4as1J03SVdfhLWdUqbP+jB?SQ-u5$a_;xI$x?Lg8Vf60(pUdr2Uu3^T2PBSA^doF9#2sdiT6C*9#qo z!X7u{5I}u6d1H8KxSkJ#G5&R_z7y&jkavVPqMVV)i6O6!c^p9g1pICC$?)lLo%edC z!R~FL$4!5GqCHEVUd;xqn`F8ks2Wjf zIu2(&ZpPs+)L$UK4!;IB!)UHY(=pCPr^&$Tb@@I=wu_M$g_nYB&RpcwB>w{5ntT_$ zH+f;aZY02U-kX^NxwmAGoBmcnd&ZHMflnfD2A@ja7(R!*2Yep+%kU-Suftc6kAklu zp9SAQJ{`Wr<7VD<9CmozjKezA?2RHw4=~Q(Q2jL2e@mVU-%mM< zkaL>+H+Y^IGJbm8OYjKtNL()(!S(fL9*!GLJ{R87lGBaqSE4?ad?~y)K zV=bOWzBlMjj(gkear3_Jf!x{b9P_xL(?Rho%W@8JE+TjZDE#om*4={T<{AUPGt_4kWM$@RGH z$z#h)&dcOgN{A06AC*^pB>BRJ#ix_cM9zHjwHSvL)y7LZ!;ZtZ%4_mVVrM} zN8cm$#b)`(ZT@}Ywa72tFJ71YY94W%*4eM(rir=WHRZ?u+z#_Z0_)}O?Hmg zmsskLqu*v&esjM*t0IM$EFPC%er`r1%>9a#O=mfrb2eDK2F^Rpi9*g`IOn`>@hg;* zfb%Mo|{dS8#ey`+fj*d@rJpc5#Nsmcg z2aE53YmUzQKbHC;#r*x!{1mw6=<{NerT!a>H!1G#mp(6s!?|B?TKp#E1SHm}hwA|6 zG`09zxb{oO`Ie==0A8m#e>7ZkUd8Ln4!nMA{h24Fp$9GAvYdZ>biPvHoHNDZVYpst zdvv~-gxr|6-{>XYnAe|C*!o-XdE1tL-^4R<=7@d^=ooQj1D` zmuCK)%<;0_$KpT3HAkPP4?Gn*=OK&Ff@_XGPk*q~|JULjo6B)ECm@?nI$Xy``?b~L zbzAsz^mVL38*$Ak_nvGwxA+FQwnzJQ$x{E1#fLoWZ%;rromFsd&nAo4ZtKs{^RO?R zbB0^|oF%7nJIUvqx)xsy*YVNzjO*afx%i=MPqTQwj{bh>d_}{xJ(|I*&}x}9U-+Ap2=^_KdtE&fC&f4>5<=?sB$zfvrIm2w_z?U&|! zXUS<64Bej5aL$=%@jH~Guk%r{lFvC$TYMutm%Ce-`=#Ujr=>o3kI?zU;GC0gso!ex zx;-UdbF^RaaPF6W{=<7?k#c*!V=N7NJ$e*Lv!FV|5 zjI{Vg$_b!7^%hG$=QOqWTDbPB8glgCSLU1oABWB#4c8oPPwwUZ`U*8oh5PSXS$rH^ zbM*Y#VyWL_@#qzDT+Pw*c?7(W`{Uo-FKy3T7XOQKbe&m$rQ~yt{(IHz>rt=m(VQ9S z{+x#&lkr(#@e-@0UURfxUEz9M&FO3Lqm+~L{Q4|(dm=5q2(InX^XHhQ{=CJ5pZoje z;QiUfaPHUV7O%R-pQGnb5Y9ORE&dzj1SHleyH*}>PIZgVgX{R{`=UQ?@aMerq|Ezq zi@*4V)N78;`&78LM|0*|yx2y6j?PzGIOlY=`1h2fuZJ}@h3;2Fi+=*we(8Mm$?)f_ zYa!z^)Z%C1nxo@W>&wtNjV+!I*BpIbT(#7P{X2C2aJc3Kq|DiDso!n!w%cV#&C%D5 zx8OQoIzBTkUg|4y zkB^RX??e9M9*8wZa{pf)i=TmOd-Qox^Kj^#Mi&1Rt~uJTl1D=4JZ$mz;W;_7yEFBu zKj(NK>DPRV7ds~Pnxp+{3)k~UbGlmmd&<%0d%^#PZcj;zzXR9y1Y~#TYfJqBi$9Yo zJ8F)Oa|)dMHO1m#zxs1@UDpK8Ic+Sy&5~30H_7Lm>K2~|&*knG=6>n(wAX2WPS_Ar z;W&vFKLytuo%cu2gwA=w;;Z1Aqw{{*QtzA%ou35P9G$N(EcH7r-sqg<=j6zylLXgs z*7146;@2oAfSg9Z%LC47W$`U=ZI50ra-R>K6K?TwaLv(m_*P5(UW*4V$Z<7CuNNcX z+^-21|C@63b+y68(EVy|@eOcok6tf|T=C~@Nj4RZQ^w*`;F_c7^Fd4fZx(;yPdTpU z==uBsT*pV-v&`bCm-&$U)XfQ@xavQR&?(x;jyVVmzpNnu0q}e+ttYjV7nIi zYHZgf#~+V%8ju$^4Rdb|9)$n2DyWpe9e}yNLUxdF&eiJ^CJReSqDda`r zGstVg-zRSXpGO{mFC=dTUrPQWd=+_j_-Ew(;OohU!@q=^?t`*L&pY)5)M-D|b*C8Y ztR9YSJ-&Jbd@R*RV*3<%2DX=(#i)DJ9R2skTjcfoK5TEo6JF~x@=M*E0{;30bE3Mp ziucI9)(7sD?Jh<9o_e2b@4&s*rHT@nQE!*+6{hr!XwjaR)Am_Nv#GB%Rm*XwO>tLMJBU_5MsW12H8MvT5 ziE&a7ZzFZL$em}!-@$ljeIzb`WicM=!5FD;fbmezdQQAOxzj}?Q{m5(r@{M?r^8doGvE<2Oxdpicqz{oOnZ*& zIt-9G(kL0OG;`;k6xn7qKlPBYPe4bpd*T0~Dnx9rmj+aHQ z*YV0&KjhToIu}K**Y`B?)H0I4hFq`vXUJ2_O8wvDY4EZbS8Y!wJd!*M-kDt274hV{ zu2@5!fa~Hea$Ro}$2v;elZN$!zOSaP>yXA&uj`N)a$SdvCXb1b@ku2Q!Z(v=!uOG9 z!OxRNR+M~w|5e+s>zr5cd%wDa^~q>*UH>d4PeuJ^a$OhgBiD7&336Q*mBRB$+Y?z? z+FyfQ*GmC%T`xUPuIr^=$P=nceipf|qmrcu>#G`t<>y)-f%R2@T-RNh z-tS!pR_)R`e&&=4IU)d^K1bGbBJvYd8-B`J(fBZ91A4RU~$#LWfSU=4s*LCJsRXcQI(Y)QgLU#;a$P^~BhNzp336Rm2aEgr z8_`a-lgM?wokboKBlUUkd#?6N*Woebna@dmFLGUvQ z0r)TEG4L$%AUsbAX_xjl0bYtc8D4`t6&@f@gFjE64u6F_13rU1{CR2LF7o6~;y1|k z-wUmQ_YHFT)kW%qQ-e4zd1g=X8d%5aaRYtCJCkSi6MvU|p!jC; z)I{;~o6oN*rW1OnIGJb^M!A?xxu%yTEy6YBvT|2 z?#5%fM_iAgeY*D@*n5zBtnA0JJ$fX@4fFljjf(F(B+0ZP`zTVE7(1{}oNKaM;=n%t zTNB@RV4N1X?be5eyJv6r95i4++(5UV0|zC=)q18~rUWitoOw3=U~0y!cNGP{_W zcAR}b$+r1*!yE_8X$MairwSau21>E{{-ZpMq60<<=X|f>J~ElSf8eB)WXfI6_lTTF z@}Q1;XWCVlT=yS9$2Ya+ruP9n=Ka&Jnku*da+mX6`>*?(mr?VW+n;<*c1go}S_2~V z0q)zo_o2o={V_=lHv{C}%=qVbzx4Pz{(W3C$G_kG^4=oKOAEDLf2P}dpXB}^)c)15 zKYq>D-Lu!hB=H)!p@iPaOI zbbsAG<~|6u|AtK2|CTwh=f?XVaeo%7{o#Y7<1S;QjNuKhNG@4}klbg=K%!X75eo^!{G# z@2xS9mtuc*-A~;-&CKfpo#r|F>-=lIwqLg!;P|}%VH`h*>%P|W{&n2_L(O03kurZb zF#cx#%+f;l*ZDL3;bWU)MO71Biub3?$6{yQFW%pr9{k$tl9Tzg?0<(|7|rp#H}h*= QsQqImNc#h{18qA02SdFBs{jB1 literal 0 HcmV?d00001 diff --git a/src/py21cmfast/src/logger.h b/src/py21cmfast/src/logger.h index 596a40b81..d38f8a471 100644 --- a/src/py21cmfast/src/logger.h +++ b/src/py21cmfast/src/logger.h @@ -119,17 +119,26 @@ static inline char *timenow(); #define LOG_IF_ERROR(condition, message, args...) #endif -static inline char *timenow() { - static char buffer[64]; - time_t rawtime; - struct tm *timeinfo; +#ifdef __cplusplus +extern "C" +{ +#endif + static inline char *timenow() + { + static char buffer[64]; + time_t rawtime; + struct tm *timeinfo; + + time(&rawtime); + timeinfo = localtime(&rawtime); - time(&rawtime); - timeinfo = localtime(&rawtime); + strftime(buffer, 64, "%Y-%m-%d %H:%M:%S", timeinfo); - strftime(buffer, 64, "%Y-%m-%d %H:%M:%S", timeinfo); + return buffer; + } - return buffer; +#ifdef __cplusplus } +#endif #endif diff --git a/src/py21cmfast/src/test_Stochasticity.cu b/src/py21cmfast/src/test_Stochasticity.cu new file mode 100644 index 000000000..c1bbdec12 --- /dev/null +++ b/src/py21cmfast/src/test_Stochasticity.cu @@ -0,0 +1,46 @@ +#include +#include + +#include "Stochasticity.cu" + +void testCondenseDeviceArray() +{ + // Input data + float h_array[] = {1.0f, 0.0f, 2.0f, 3.0f, 0.0f, 4.0f}; + float mask_value = 0.0f; + int original_size = 6; + + // Expected outputs + float expected_array[] = {1.0f, 2.0f, 3.0f, 4.0f, 0.0f, 0.0f}; + int expected_valid_size = 4; + + // Allocate and copy to device + float *d_array; + cudaMalloc(&d_array, original_size * sizeof(float)); + cudaMemcpy(d_array, h_array, original_size * sizeof(float), cudaMemcpyHostToDevice); + + // Call the function from Stochasticity.cu + int valid_size = condenseDeviceArray(d_array, original_size, mask_value); + + // Copy the results back to the host + float h_result[original_size]; + cudaMemcpy(h_result, d_array, original_size * sizeof(float), cudaMemcpyDeviceToHost); + + // Validate the results + assert(valid_size == expected_valid_size); + for (int i = 0; i < original_size; ++i) + { + assert(h_result[i] == expected_array[i]); + } + + std::cout << "Test passed: condenseDeviceArray\n"; + + // Free device memory + cudaFree(d_array); +} + +int main() +{ + testCondenseDeviceArray(); + return 0; +} diff --git a/src/py21cmfast/src/tiger_checks.h b/src/py21cmfast/src/tiger_checks.h new file mode 100644 index 000000000..8fb9b2e73 --- /dev/null +++ b/src/py21cmfast/src/tiger_checks.h @@ -0,0 +1,17 @@ +#ifndef _TIGER_CHECK_H +#define _TIGER_CHECK_H +#include + +#include "Stochasticity.h" + +#ifdef __cplusplus +extern "C" +{ +#endif + int checkComplextype(fftwf_complex *box, int total_elements, int xy_dim, int z_dim, int midpoint, int RES, int filter_type, float R, float R_param); + // int updateHaloOut(float *halo_masses, unsigned long long int n_halos, float *y_arr, int n_bin_y, double x_min, double x_width, struct HaloSamplingConstants hs_constants); +#ifdef __cplusplus +} +#endif + +#endif // TIGER_CHECK_H From ffb790227e4ded6c35e13480dd91a8c25b8ba8c0 Mon Sep 17 00:00:00 2001 From: JHu Date: Tue, 11 Feb 2025 18:15:49 +1100 Subject: [PATCH 085/145] fix linking errors --- build_cffi.py | 10 +++++----- src/py21cmfast/src/logger.h | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/build_cffi.py b/build_cffi.py index 57e15f3f5..d494e4026 100755 --- a/build_cffi.py +++ b/build_cffi.py @@ -34,12 +34,12 @@ os.path.join(CLOC, "HaloField_cuda.o"), os.path.join(CLOC, "device_rng_cuda.o"), os.path.join(CLOC, "combined_cuda.o"), - os.path.join(CLOC, "filtering.o"), - os.path.join(CLOC, "PerturbField.o"), - os.path.join(CLOC, "SpinTemperatureBox.o"), - os.path.join(CLOC, "IonisationBox.o"), + os.path.join(CLOC, "filtering_cuda.o"), + os.path.join(CLOC, "PerturbField_cuda.o"), + os.path.join(CLOC, "SpinTemperatureBox_cuda.o"), + os.path.join(CLOC, "IonisationBox_cuda.o"), ] -extra_link_args = ["-lcudart", "-lstdc++"] +extra_link_args = ["-lcudart", "-lcudadevrt", "-lstdc++"] # # compiled cuda code # extra_objects = [os.path.join(CLOC, "hello_world.o"), os.path.join(CLOC, "filtering_cuda.o"), os.path.join(CLOC, "Stochasticity_cuda.o") diff --git a/src/py21cmfast/src/logger.h b/src/py21cmfast/src/logger.h index d38f8a471..d377020bf 100644 --- a/src/py21cmfast/src/logger.h +++ b/src/py21cmfast/src/logger.h @@ -43,7 +43,7 @@ #include // === auxiliary functions -static inline char *timenow(); +// static inline char *timenow(); #define _FILE strrchr(__FILE__, '/') ? strrchr(__FILE__, '/') + 1 : __FILE__ From 00c8623d84e113ffd6eb0918b78912ffa75da8af Mon Sep 17 00:00:00 2001 From: JHu Date: Thu, 13 Feb 2025 15:08:58 +1100 Subject: [PATCH 086/145] add rand state init at the beginning of creating halo field; remove old method --- src/py21cmfast/src/InitialConditions.c | 3 +- src/py21cmfast/src/Stochasticity.c | 16 ++++++++- src/py21cmfast/src/Stochasticity.cu | 47 ++++++-------------------- src/py21cmfast/src/Stochasticity.cuh | 2 ++ src/py21cmfast/src/device_rng.cu | 5 ++- 5 files changed, 33 insertions(+), 40 deletions(-) diff --git a/src/py21cmfast/src/InitialConditions.c b/src/py21cmfast/src/InitialConditions.c index 8f8f6f84b..9c948d015 100644 --- a/src/py21cmfast/src/InitialConditions.c +++ b/src/py21cmfast/src/InitialConditions.c @@ -90,10 +90,9 @@ int ComputeInitialConditions( // Date: 9/29/06 int status; + printf("Start computing initial conditions\n"); call_cuda(); - init_rand_states(random_seed, 10000000); - printf("finish init rand states \n"); Try{ // This Try wraps the entire function so we don't indent. diff --git a/src/py21cmfast/src/Stochasticity.c b/src/py21cmfast/src/Stochasticity.c index efb3451a8..145ef9ee8 100644 --- a/src/py21cmfast/src/Stochasticity.c +++ b/src/py21cmfast/src/Stochasticity.c @@ -26,6 +26,7 @@ #include "Stochasticity.cuh" #include "interp_tables.cuh" #include "HaloField.cuh" +#include "device_rng.cuh" #include //buffer size (per cell of arbitrary size) in the sampling function @@ -1118,10 +1119,23 @@ int stochastic_halofield(UserParams *user_params, CosmoParams *cosmo_params, // Fill them // NOTE:Halos prev in the first box corresponds to the large DexM halos - if (redshift_desc < 0.) + if (redshift_desc <= 0.) { LOG_DEBUG("building first halo field at z=%.1f", redshift); sample_halo_grids(rng_stoc,redshift,dens_field,halo_overlap_box,halos_desc,halos,&hs_constants); + + // todo: add use_cuda/cuda_found condition here + // initiate rand states on the device + unsigned long long int nhalo_first = halos->n_halos; + int buffer_scale = HALO_CUDA_THREAD_FACTOR + 1; + unsigned long long int n_rstates = nhalo_first * buffer_scale; + printf("initializing %llu random states on the device... \n", n_rstates); + print_current_time(); + + init_rand_states(seed, n_rstates); + + printf("finish initializing \n"); + print_current_time(); } else{ LOG_DEBUG("Calculating halo progenitors from z=%.1f to z=%.1f | %llu", redshift_desc,redshift,halos_desc->n_halos); diff --git a/src/py21cmfast/src/Stochasticity.cu b/src/py21cmfast/src/Stochasticity.cu index 27b105d62..8b4f750da 100644 --- a/src/py21cmfast/src/Stochasticity.cu +++ b/src/py21cmfast/src/Stochasticity.cu @@ -585,17 +585,10 @@ __device__ void set_prop_rng(curandState *state, bool from_catalog, double *inte return; } -// kernel function -__global__ void setup_random_states(curandState *d_states, unsigned long long int random_seed){ - // get thread idx - int ind = blockIdx.x * blockDim.x + threadIdx.x; - curand_init(random_seed, ind, 0, &d_states[ind]); -} - __global__ void update_halo_constants(float *d_halo_masses, float *d_star_rng_in, float *d_sfr_rng_in, float *d_xray_rng_in, int *d_halo_coords_in, float *d_y_arr, double x_min, double x_width, unsigned long long int n_halos, int n_bin, struct HaloSamplingConstants d_hs_constants, - int HMF, curandState *d_states, + int HMF, float *d_halo_masses_out, float *d_star_rng_out, float *d_sfr_rng_out, float *d_xray_rng_out, int *d_halo_coords_out, int *d_sum_check, int *d_further_process, int *d_nprog_predict, int sparsity, unsigned long long int write_offset, @@ -680,14 +673,8 @@ __global__ void update_halo_constants(float *d_halo_masses, float *d_star_rng_in // printf("tmp res4 on gpu: %.17f \n", res4); // // tiger tmp: debug (end) } - // if (ind < 10000){ - // curandState ls_tmp = d_rngStates[ind]; - // } - - // todo: each thread across different blocks has unique random state - // curand_init(seed, threadIdx.x, 0, &d_states[threadIdx.x]); - // curandState local_state = d_states[threadIdx.x]; - curandState local_state = d_states[ind]; + + curandState local_state = d_randStates[ind]; // if (blockIdx.x > 100000){ // // printf("check here. \n"); // } @@ -795,7 +782,7 @@ __global__ void update_halo_constants(float *d_halo_masses, float *d_star_rng_in // double delta = get_delta_crit(HMF, sigma, d_hs_constants.growth_in)\ // / d_hs_constants.growth_in * d_hs_constants.growth_out; - d_states[ind] = local_state; + d_randStates[ind] = local_state; return; } @@ -889,21 +876,6 @@ int updateHaloOut(float *halo_masses, float *star_rng, float *sfr_rng, float *xr CALL_CUDA(cudaMalloc(&d_halo_coords_out, sizeof(int) * d_n_buffer * 3)); initializeArray(d_halo_coords_out, d_n_buffer * 3, -1000); - // setup RNG (todo: set it only once for iteration over different redshift) - GridLayout grids_rng = getWorkload(1, d_n_buffer); - int total_threads_rng = grids_rng.n_threads * grids_rng.n_blocks; - // Allocate memory for RNG states - curandState *d_states; - CALL_CUDA(cudaMalloc((void **)&d_states, total_threads_rng * sizeof(curandState))); - - // setup random states - setup_random_states<<>>(d_states, 1234ULL); - // Check kernel launch errors - CALL_CUDA(cudaGetLastError()); - // CALL_CUDA(cudaDeviceSynchronize()); - - free_rand_states(); - // initiate n_halo check unsigned long long int n_halo_check = n_halos; @@ -942,14 +914,14 @@ int updateHaloOut(float *halo_masses, float *star_rng, float *sfr_rng, float *xr int offset_shared = grids.n_threads; printf("start launching kernel function.\n"); update_halo_constants<<>>(d_halo_masses, d_star_rng, d_sfr_rng, d_xray_rng, d_halo_coords, - d_y_arr, x_min, x_width, n_halos_tbp, n_bin_y, hs_constants, HMF, d_states, d_halo_masses_out, d_star_rng_out, + d_y_arr, x_min, x_width, n_halos_tbp, n_bin_y, hs_constants, HMF, d_halo_masses_out, d_star_rng_out, d_sfr_rng_out, d_xray_rng_out, d_halo_coords_out, d_sum_check, d_further_process, d_nprog_predict, sparsity, write_offset, d_expected_mass, d_n_prog, offset_shared); // Check kernel launch errors CALL_CUDA(cudaGetLastError()); - // CALL_CUDA(cudaDeviceSynchronize()); + CALL_CUDA(cudaDeviceSynchronize()); // filter device halo masses in-place n_halos_tbp = filterWithMask(d_halo_masses, d_further_process, n_halos_tbp); @@ -1038,6 +1010,9 @@ int updateHaloOut(float *halo_masses, float *star_rng, float *sfr_rng, float *xr // float *h_halo_masses_out; // CALL_CUDA(cudaHostAlloc((void **)&h_halo_masses_out, out_size, cudaHostAllocDefault)); + CALL_CUDA(cudaGetLastError()); + CALL_CUDA(cudaDeviceSynchronize()); + CALL_CUDA(cudaMemcpy(halofield_out->halo_masses, d_halo_masses_out, out_size, cudaMemcpyDeviceToHost)); @@ -1052,7 +1027,6 @@ int updateHaloOut(float *halo_masses, float *star_rng, float *sfr_rng, float *xr // Free device memory CALL_CUDA(cudaFree(d_halo_masses)); CALL_CUDA(cudaFree(d_y_arr)); - CALL_CUDA(cudaFree(d_states)); CALL_CUDA(cudaFree(d_halo_masses_out)); CALL_CUDA(cudaFree(d_star_rng_out)); CALL_CUDA(cudaFree(d_sfr_rng_out)); @@ -1068,7 +1042,8 @@ int updateHaloOut(float *halo_masses, float *star_rng, float *sfr_rng, float *xr testFilterWithMask(); - + CALL_CUDA(cudaGetLastError()); + CALL_CUDA(cudaDeviceSynchronize()); return 0; } diff --git a/src/py21cmfast/src/Stochasticity.cuh b/src/py21cmfast/src/Stochasticity.cuh index 62a4065f1..4c71d5705 100644 --- a/src/py21cmfast/src/Stochasticity.cuh +++ b/src/py21cmfast/src/Stochasticity.cuh @@ -1,6 +1,8 @@ #ifndef _STOCHASTICITY_CUH #define _STOCHASTICITY_CUH +#define HALO_CUDA_THREAD_FACTOR (int) (4) + #ifdef __cplusplus extern "C" { diff --git a/src/py21cmfast/src/device_rng.cu b/src/py21cmfast/src/device_rng.cu index 107b96dfe..218058369 100644 --- a/src/py21cmfast/src/device_rng.cu +++ b/src/py21cmfast/src/device_rng.cu @@ -33,6 +33,9 @@ __global__ void initRandStates(unsigned long long int random_seed, int totalStat // Function to initialize RNG states. void init_rand_states(unsigned long long int seed, int numStates) { + // ensure previously allocated random states on the device are freed before allocating new ones + free_rand_states(); + CALL_CUDA(cudaMemcpyToSymbol(d_numStates, &numStates, sizeof(int), 0, cudaMemcpyHostToDevice)); // todo: add the following block to debug @@ -73,7 +76,7 @@ void free_rand_states() CALL_CUDA(cudaMemcpyToSymbol(d_randStates, &h_randStates, sizeof(h_randStates), 0, cudaMemcpyHostToDevice)); } - if (h_numStates != 0){ + if (h_numStates){ h_numStates = 0; CALL_CUDA(cudaMemcpyToSymbol(d_numStates, &h_numStates, sizeof(int), 0, cudaMemcpyHostToDevice)); } From b3d546ba17a9895059a0d441216b9846d880f395 Mon Sep 17 00:00:00 2001 From: Gregory Poole Date: Mon, 3 Feb 2025 15:48:18 +1100 Subject: [PATCH 087/145] Refactor PerturbField.c to separate-out MapMass_cpu(). --- src/py21cmfast/src/MapMass_cpu.c | 175 ++++++++++++++++++ .../src/{PerturbField.cu => MapMass_gpu.cu} | 0 src/py21cmfast/src/PerturbField.c | 165 ++--------------- 3 files changed, 194 insertions(+), 146 deletions(-) create mode 100644 src/py21cmfast/src/MapMass_cpu.c rename src/py21cmfast/src/{PerturbField.cu => MapMass_gpu.cu} (100%) diff --git a/src/py21cmfast/src/MapMass_cpu.c b/src/py21cmfast/src/MapMass_cpu.c new file mode 100644 index 000000000..d94623de5 --- /dev/null +++ b/src/py21cmfast/src/MapMass_cpu.c @@ -0,0 +1,175 @@ +// Re-write of perturb_field.c for being accessible within the MCMC +#include +#include +#include +#include +#include +#include + +#include "cexcept.h" +#include "exceptions.h" +#include "logger.h" +#include "Constants.h" +#include "indexing.h" +#include "InputParameters.h" +#include "OutputStructs.h" +#include "cosmology.h" +#include "dft.h" +#include "debugging.h" +#include "filtering.h" + +#include "PerturbField.h" + +double *MapMass_cpu( + UserParams *user_params, + CosmoParams *cosmo_params, + InitialConditions *boxes, + double *resampled_box, + int dimension, + float f_pixel_factor, + float init_growth_factor +){ + #pragma omp parallel \ + shared(init_growth_factor,boxes,f_pixel_factor,resampled_box,dimension) \ + private(i,j,k,xi,xf,yi,yf,zi,zf,HII_i,HII_j,HII_k,d_x,d_y,d_z,t_x,t_y,t_z,xp1,yp1,zp1) \ + num_threads(user_params->N_THREADS) + { + #pragma omp for + unsigned long long int i,j,k; + for (i=0; iDIM;i++){ + for (j=0; jDIM;j++){ + for (k=0; kDIM)+0.0); + yf = (j+0.5)/((user_params->DIM)+0.0); + zf = (k+0.5)/((D_PARA)+0.0); + + // update locations + if(user_params->PERTURB_ON_HIGH_RES) { + xf += (boxes->hires_vx)[R_INDEX(i, j, k)]; + yf += (boxes->hires_vy)[R_INDEX(i, j, k)]; + zf += (boxes->hires_vz)[R_INDEX(i, j, k)]; + } + else { + HII_i = (unsigned long long)(i/f_pixel_factor); + HII_j = (unsigned long long)(j/f_pixel_factor); + HII_k = (unsigned long long)(k/f_pixel_factor); + xf += (boxes->lowres_vx)[HII_R_INDEX(HII_i, HII_j, HII_k)]; + yf += (boxes->lowres_vy)[HII_R_INDEX(HII_i, HII_j, HII_k)]; + zf += (boxes->lowres_vz)[HII_R_INDEX(HII_i, HII_j, HII_k)]; + } + + // 2LPT PART + // add second order corrections + if(user_params->USE_2LPT){ + if(user_params->PERTURB_ON_HIGH_RES) { + xf -= (boxes->hires_vx_2LPT)[R_INDEX(i,j,k)]; + yf -= (boxes->hires_vy_2LPT)[R_INDEX(i,j,k)]; + zf -= (boxes->hires_vz_2LPT)[R_INDEX(i,j,k)]; + } + else { + xf -= (boxes->lowres_vx_2LPT)[HII_R_INDEX(HII_i,HII_j,HII_k)]; + yf -= (boxes->lowres_vy_2LPT)[HII_R_INDEX(HII_i,HII_j,HII_k)]; + zf -= (boxes->lowres_vz_2LPT)[HII_R_INDEX(HII_i,HII_j,HII_k)]; + } + } + xf *= (double)(dimension); + yf *= (double)(dimension); + zf *= (double)((unsigned long long)(user_params->NON_CUBIC_FACTOR*dimension)); + while (xf >= (double)(dimension)){ xf -= (dimension);} + while (xf < 0){ xf += (dimension);} + while (yf >= (double)(dimension)){ yf -= (dimension);} + while (yf < 0){ yf += (dimension);} + while (zf >= (double)(user_params->NON_CUBIC_FACTOR*dimension)){ zf -= (user_params->NON_CUBIC_FACTOR*dimension);} + while (zf < 0){ zf += (user_params->NON_CUBIC_FACTOR*dimension);} + xi = xf; + yi = yf; + zi = zf; + if (xi >= (dimension)){ xi -= (dimension);} + if (xi < 0) {xi += (dimension);} + if (yi >= (dimension)){ yi -= (dimension);} + if (yi < 0) {yi += (dimension);} + if (zi >= ((unsigned long long)(user_params->NON_CUBIC_FACTOR*dimension))){ zi -= ((unsigned long long)(user_params->NON_CUBIC_FACTOR*dimension));} + if (zi < 0) {zi += ((unsigned long long)(user_params->NON_CUBIC_FACTOR*dimension));} + + // Determine the fraction of the perturbed cell which overlaps with the 8 nearest grid cells, + // based on the grid cell which contains the centre of the perturbed cell + d_x = fabs(xf - (double)(xi+0.5)); + d_y = fabs(yf - (double)(yi+0.5)); + d_z = fabs(zf - (double)(zi+0.5)); + if(xf < (double)(xi+0.5)) { + // If perturbed cell centre is less than the mid-point then update fraction + // of mass in the cell and determine the cell centre of neighbour to be the + // lowest grid point index + d_x = 1. - d_x; + xi -= 1; + if (xi < 0) {xi += (dimension);} // Only this critera is possible as iterate back by one (we cannot exceed DIM) + } + if(yf < (double)(yi+0.5)) { + d_y = 1. - d_y; + yi -= 1; + if (yi < 0) {yi += (dimension);} + } + if(zf < (double)(zi+0.5)) { + d_z = 1. - d_z; + zi -= 1; + if (zi < 0) {zi += ((unsigned long long)(user_params->NON_CUBIC_FACTOR*dimension));} + } + t_x = 1. - d_x; + t_y = 1. - d_y; + t_z = 1. - d_z; + + // Determine the grid coordinates of the 8 neighbouring cells + // Takes into account the offset based on cell centre determined above + xp1 = xi + 1; + if(xp1 >= dimension) { xp1 -= (dimension);} + yp1 = yi + 1; + if(yp1 >= dimension) { yp1 -= (dimension);} + zp1 = zi + 1; + if(zp1 >= ((unsigned long long)(user_params->NON_CUBIC_FACTOR*dimension))) { zp1 -= ((unsigned long long)(user_params->NON_CUBIC_FACTOR*dimension));} + + if(user_params->PERTURB_ON_HIGH_RES) { + // Redistribute the mass over the 8 neighbouring cells according to cloud in cell +#pragma omp atomic + resampled_box[R_INDEX(xi,yi,zi)] += (double)(1. + init_growth_factor*(boxes->hires_density)[R_INDEX(i,j,k)])*(t_x*t_y*t_z); +#pragma omp atomic + resampled_box[R_INDEX(xp1,yi,zi)] += (double)(1. + init_growth_factor*(boxes->hires_density)[R_INDEX(i,j,k)])*(d_x*t_y*t_z); +#pragma omp atomic + resampled_box[R_INDEX(xi,yp1,zi)] += (double)(1. + init_growth_factor*(boxes->hires_density)[R_INDEX(i,j,k)])*(t_x*d_y*t_z); +#pragma omp atomic + resampled_box[R_INDEX(xp1,yp1,zi)] += (double)(1. + init_growth_factor*(boxes->hires_density)[R_INDEX(i,j,k)])*(d_x*d_y*t_z); +#pragma omp atomic + resampled_box[R_INDEX(xi,yi,zp1)] += (double)(1. + init_growth_factor*(boxes->hires_density)[R_INDEX(i,j,k)])*(t_x*t_y*d_z); +#pragma omp atomic + resampled_box[R_INDEX(xp1,yi,zp1)] += (double)(1. + init_growth_factor*(boxes->hires_density)[R_INDEX(i,j,k)])*(d_x*t_y*d_z); +#pragma omp atomic + resampled_box[R_INDEX(xi,yp1,zp1)] += (double)(1. + init_growth_factor*(boxes->hires_density)[R_INDEX(i,j,k)])*(t_x*d_y*d_z); +#pragma omp atomic + resampled_box[R_INDEX(xp1,yp1,zp1)] += (double)(1. + init_growth_factor*(boxes->hires_density)[R_INDEX(i,j,k)])*(d_x*d_y*d_z); + } + else { + // Redistribute the mass over the 8 neighbouring cells according to cloud in cell +#pragma omp atomic + resampled_box[HII_R_INDEX(xi,yi,zi)] += (double)(1. + init_growth_factor*(boxes->hires_density)[R_INDEX(i,j,k)])*(t_x*t_y*t_z); +#pragma omp atomic + resampled_box[HII_R_INDEX(xp1,yi,zi)] += (double)(1. + init_growth_factor*(boxes->hires_density)[R_INDEX(i,j,k)])*(d_x*t_y*t_z); +#pragma omp atomic + resampled_box[HII_R_INDEX(xi,yp1,zi)] += (double)(1. + init_growth_factor*(boxes->hires_density)[R_INDEX(i,j,k)])*(t_x*d_y*t_z); +#pragma omp atomic + resampled_box[HII_R_INDEX(xp1,yp1,zi)] += (double)(1. + init_growth_factor*(boxes->hires_density)[R_INDEX(i,j,k)])*(d_x*d_y*t_z); +#pragma omp atomic + resampled_box[HII_R_INDEX(xi,yi,zp1)] += (double)(1. + init_growth_factor*(boxes->hires_density)[R_INDEX(i,j,k)])*(t_x*t_y*d_z); +#pragma omp atomic + resampled_box[HII_R_INDEX(xp1,yi,zp1)] += (double)(1. + init_growth_factor*(boxes->hires_density)[R_INDEX(i,j,k)])*(d_x*t_y*d_z); +#pragma omp atomic + resampled_box[HII_R_INDEX(xi,yp1,zp1)] += (double)(1. + init_growth_factor*(boxes->hires_density)[R_INDEX(i,j,k)])*(t_x*d_y*d_z); +#pragma omp atomic + resampled_box[HII_R_INDEX(xp1,yp1,zp1)] += (double)(1. + init_growth_factor*(boxes->hires_density)[R_INDEX(i,j,k)])*(d_x*d_y*d_z); + } + } + } + } + } + return resampled_box; +} diff --git a/src/py21cmfast/src/PerturbField.cu b/src/py21cmfast/src/MapMass_gpu.cu similarity index 100% rename from src/py21cmfast/src/PerturbField.cu rename to src/py21cmfast/src/MapMass_gpu.cu diff --git a/src/py21cmfast/src/PerturbField.c b/src/py21cmfast/src/PerturbField.c index 64b493dd0..06c9e2204 100644 --- a/src/py21cmfast/src/PerturbField.c +++ b/src/py21cmfast/src/PerturbField.c @@ -157,9 +157,18 @@ void compute_perturbed_velocities( } +/** + * @brief + * + * @param redshift + * @param user_params + * @param cosmo_params + * @param boxes + * @param perturbed_field + * @return + */ int ComputePerturbField( - float redshift, UserParams *user_params, CosmoParams *cosmo_params, - InitialConditions *boxes, PerturbedField *perturbed_field + float redshift, UserParams *user_params, CosmoParams *cosmo_params, InitialConditions *boxes, PerturbedField *perturbed_field ){ /* ComputePerturbField uses the first-order Langragian displacement field to move the @@ -362,152 +371,16 @@ int ComputePerturbField( } // If using GPU, call CUDA function - if (1) { + LOG_DEBUG("Perturb the density field"); + bool use_cuda = false; // pass this as a parameter later + if (use_cuda) { +#if CUDA_FOUND resampled_box = MapMass_gpu(user_params, cosmo_params, boxes, resampled_box, dimension, f_pixel_factor, init_growth_factor); +#else + LOG_ERROR("CUDA version of MapMass() called but code was not compiled for CUDA."); +#endif } else { - // go through the high-res box, mapping the mass onto the low-res (updated) box - LOG_DEBUG("Perturb the density field"); - #pragma omp parallel \ - shared(init_growth_factor,boxes,f_pixel_factor,resampled_box,dimension) \ - private(i,j,k,xi,xf,yi,yf,zi,zf,HII_i,HII_j,HII_k,d_x,d_y,d_z,t_x,t_y,t_z,xp1,yp1,zp1) \ - num_threads(user_params->N_THREADS) - { - #pragma omp for - for (i=0; iDIM;i++){ - for (j=0; jDIM;j++){ - for (k=0; kDIM)+0.0); - yf = (j+0.5)/((user_params->DIM)+0.0); - zf = (k+0.5)/((D_PARA)+0.0); - - // update locations - if(user_params->PERTURB_ON_HIGH_RES) { - xf += (boxes->hires_vx)[R_INDEX(i, j, k)]; - yf += (boxes->hires_vy)[R_INDEX(i, j, k)]; - zf += (boxes->hires_vz)[R_INDEX(i, j, k)]; - } - else { - HII_i = (unsigned long long)(i/f_pixel_factor); - HII_j = (unsigned long long)(j/f_pixel_factor); - HII_k = (unsigned long long)(k/f_pixel_factor); - xf += (boxes->lowres_vx)[HII_R_INDEX(HII_i, HII_j, HII_k)]; - yf += (boxes->lowres_vy)[HII_R_INDEX(HII_i, HII_j, HII_k)]; - zf += (boxes->lowres_vz)[HII_R_INDEX(HII_i, HII_j, HII_k)]; - } - - // 2LPT PART - // add second order corrections - if(user_params->USE_2LPT){ - if(user_params->PERTURB_ON_HIGH_RES) { - xf -= (boxes->hires_vx_2LPT)[R_INDEX(i,j,k)]; - yf -= (boxes->hires_vy_2LPT)[R_INDEX(i,j,k)]; - zf -= (boxes->hires_vz_2LPT)[R_INDEX(i,j,k)]; - } - else { - xf -= (boxes->lowres_vx_2LPT)[HII_R_INDEX(HII_i,HII_j,HII_k)]; - yf -= (boxes->lowres_vy_2LPT)[HII_R_INDEX(HII_i,HII_j,HII_k)]; - zf -= (boxes->lowres_vz_2LPT)[HII_R_INDEX(HII_i,HII_j,HII_k)]; - } - } - xf *= (double)(dimension); - yf *= (double)(dimension); - zf *= (double)((unsigned long long)(user_params->NON_CUBIC_FACTOR*dimension)); - while (xf >= (double)(dimension)){ xf -= (dimension);} - while (xf < 0){ xf += (dimension);} - while (yf >= (double)(dimension)){ yf -= (dimension);} - while (yf < 0){ yf += (dimension);} - while (zf >= (double)(user_params->NON_CUBIC_FACTOR*dimension)){ zf -= (user_params->NON_CUBIC_FACTOR*dimension);} - while (zf < 0){ zf += (user_params->NON_CUBIC_FACTOR*dimension);} - xi = xf; - yi = yf; - zi = zf; - if (xi >= (dimension)){ xi -= (dimension);} - if (xi < 0) {xi += (dimension);} - if (yi >= (dimension)){ yi -= (dimension);} - if (yi < 0) {yi += (dimension);} - if (zi >= ((unsigned long long)(user_params->NON_CUBIC_FACTOR*dimension))){ zi -= ((unsigned long long)(user_params->NON_CUBIC_FACTOR*dimension));} - if (zi < 0) {zi += ((unsigned long long)(user_params->NON_CUBIC_FACTOR*dimension));} - - // Determine the fraction of the perturbed cell which overlaps with the 8 nearest grid cells, - // based on the grid cell which contains the centre of the perturbed cell - d_x = fabs(xf - (double)(xi+0.5)); - d_y = fabs(yf - (double)(yi+0.5)); - d_z = fabs(zf - (double)(zi+0.5)); - if(xf < (double)(xi+0.5)) { - // If perturbed cell centre is less than the mid-point then update fraction - // of mass in the cell and determine the cell centre of neighbour to be the - // lowest grid point index - d_x = 1. - d_x; - xi -= 1; - if (xi < 0) {xi += (dimension);} // Only this critera is possible as iterate back by one (we cannot exceed DIM) - } - if(yf < (double)(yi+0.5)) { - d_y = 1. - d_y; - yi -= 1; - if (yi < 0) {yi += (dimension);} - } - if(zf < (double)(zi+0.5)) { - d_z = 1. - d_z; - zi -= 1; - if (zi < 0) {zi += ((unsigned long long)(user_params->NON_CUBIC_FACTOR*dimension));} - } - t_x = 1. - d_x; - t_y = 1. - d_y; - t_z = 1. - d_z; - - // Determine the grid coordinates of the 8 neighbouring cells - // Takes into account the offset based on cell centre determined above - xp1 = xi + 1; - if(xp1 >= dimension) { xp1 -= (dimension);} - yp1 = yi + 1; - if(yp1 >= dimension) { yp1 -= (dimension);} - zp1 = zi + 1; - if(zp1 >= ((unsigned long long)(user_params->NON_CUBIC_FACTOR*dimension))) { zp1 -= ((unsigned long long)(user_params->NON_CUBIC_FACTOR*dimension));} - - if(user_params->PERTURB_ON_HIGH_RES) { - // Redistribute the mass over the 8 neighbouring cells according to cloud in cell - #pragma omp atomic - resampled_box[R_INDEX(xi,yi,zi)] += (double)(1. + init_growth_factor*(boxes->hires_density)[R_INDEX(i,j,k)])*(t_x*t_y*t_z); - #pragma omp atomic - resampled_box[R_INDEX(xp1,yi,zi)] += (double)(1. + init_growth_factor*(boxes->hires_density)[R_INDEX(i,j,k)])*(d_x*t_y*t_z); - #pragma omp atomic - resampled_box[R_INDEX(xi,yp1,zi)] += (double)(1. + init_growth_factor*(boxes->hires_density)[R_INDEX(i,j,k)])*(t_x*d_y*t_z); - #pragma omp atomic - resampled_box[R_INDEX(xp1,yp1,zi)] += (double)(1. + init_growth_factor*(boxes->hires_density)[R_INDEX(i,j,k)])*(d_x*d_y*t_z); - #pragma omp atomic - resampled_box[R_INDEX(xi,yi,zp1)] += (double)(1. + init_growth_factor*(boxes->hires_density)[R_INDEX(i,j,k)])*(t_x*t_y*d_z); - #pragma omp atomic - resampled_box[R_INDEX(xp1,yi,zp1)] += (double)(1. + init_growth_factor*(boxes->hires_density)[R_INDEX(i,j,k)])*(d_x*t_y*d_z); - #pragma omp atomic - resampled_box[R_INDEX(xi,yp1,zp1)] += (double)(1. + init_growth_factor*(boxes->hires_density)[R_INDEX(i,j,k)])*(t_x*d_y*d_z); - #pragma omp atomic - resampled_box[R_INDEX(xp1,yp1,zp1)] += (double)(1. + init_growth_factor*(boxes->hires_density)[R_INDEX(i,j,k)])*(d_x*d_y*d_z); - } - else { - // Redistribute the mass over the 8 neighbouring cells according to cloud in cell - #pragma omp atomic - resampled_box[HII_R_INDEX(xi,yi,zi)] += (double)(1. + init_growth_factor*(boxes->hires_density)[R_INDEX(i,j,k)])*(t_x*t_y*t_z); - #pragma omp atomic - resampled_box[HII_R_INDEX(xp1,yi,zi)] += (double)(1. + init_growth_factor*(boxes->hires_density)[R_INDEX(i,j,k)])*(d_x*t_y*t_z); - #pragma omp atomic - resampled_box[HII_R_INDEX(xi,yp1,zi)] += (double)(1. + init_growth_factor*(boxes->hires_density)[R_INDEX(i,j,k)])*(t_x*d_y*t_z); - #pragma omp atomic - resampled_box[HII_R_INDEX(xp1,yp1,zi)] += (double)(1. + init_growth_factor*(boxes->hires_density)[R_INDEX(i,j,k)])*(d_x*d_y*t_z); - #pragma omp atomic - resampled_box[HII_R_INDEX(xi,yi,zp1)] += (double)(1. + init_growth_factor*(boxes->hires_density)[R_INDEX(i,j,k)])*(t_x*t_y*d_z); - #pragma omp atomic - resampled_box[HII_R_INDEX(xp1,yi,zp1)] += (double)(1. + init_growth_factor*(boxes->hires_density)[R_INDEX(i,j,k)])*(d_x*t_y*d_z); - #pragma omp atomic - resampled_box[HII_R_INDEX(xi,yp1,zp1)] += (double)(1. + init_growth_factor*(boxes->hires_density)[R_INDEX(i,j,k)])*(t_x*d_y*d_z); - #pragma omp atomic - resampled_box[HII_R_INDEX(xp1,yp1,zp1)] += (double)(1. + init_growth_factor*(boxes->hires_density)[R_INDEX(i,j,k)])*(d_x*d_y*d_z); - } - } - } - } - } + resampled_box = MapMass_cpu(user_params, cosmo_params, boxes, resampled_box, dimension, f_pixel_factor, init_growth_factor); } // LOG_DEBUG("resampled_box[:50] = "); From 8170c6b677c68525ce5c31b97b03a3a41329ec56 Mon Sep 17 00:00:00 2001 From: Gregory Poole Date: Mon, 3 Feb 2025 16:26:33 +1100 Subject: [PATCH 088/145] Place proper CUDA compile guards around code code in filtering.c --- .gitignore | 2 ++ src/py21cmfast/src/filtering.c | 15 +++++++++++++-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index 969a59736..e31cb4d6b 100644 --- a/.gitignore +++ b/.gitignore @@ -14,6 +14,8 @@ src/21cmFAST.egg-info/ pip-wheel-metadata/ src/py21cmfast\.egg-info/ +.python-version + *.so build/ *.*~ diff --git a/src/py21cmfast/src/filtering.c b/src/py21cmfast/src/filtering.c index 0d65c2494..c822dd40a 100644 --- a/src/py21cmfast/src/filtering.c +++ b/src/py21cmfast/src/filtering.c @@ -150,8 +150,13 @@ void filter_box_cpu(fftwf_complex *box, int RES, int filter_type, float R, float } void filter_box(fftwf_complex *box, int RES, int filter_type, float R, float R_param){ - if (1) { + bool use_cuda = false; // pass this as a parameter later + if (use_cuda) { +#if CUDA_FOUND filter_box_gpu(box, RES, filter_type, R, R_param); +#else + LOG_ERROR("CUDA version of filter_box() called but code was not compiled for CUDA."); +#endif } else { filter_box_cpu(box, RES, filter_type, R, R_param); } @@ -199,8 +204,14 @@ int test_filter_cpu(UserParams *user_params, CosmoParams *cosmo_params, AstroPar int test_filter(UserParams *user_params, CosmoParams *cosmo_params, AstroParams *astro_params, FlagOptions *flag_options, float *input_box, double R, double R_param, int filter_flag, double *result) { - if (1) { + bool use_cuda = false; // pass this as a parameter later + if (use_cuda) { +#if CUDA_FOUND return test_filter_gpu(user_params, cosmo_params, astro_params, flag_options, input_box, R, R_param, filter_flag, result); +#else + LOG_ERROR("CUDA version of test_filter() called but code was not compiled for CUDA."); + return 1; +#endif } else { return test_filter_cpu(user_params, cosmo_params, astro_params, flag_options, input_box, R, R_param, filter_flag, result); } From d138e1fd5b55eb8ba0dfdc94d6076728503ecca9 Mon Sep 17 00:00:00 2001 From: Gregory Poole Date: Tue, 4 Feb 2025 11:16:12 +1100 Subject: [PATCH 089/145] Place proper CUDA compile guards around code code in SpinTemperatureBox.c --- src/py21cmfast/src/IonisationBox.h | 33 ----------------- src/py21cmfast/src/IonisationBox_gpu.h | 48 +++++++++++++++++++++++++ src/py21cmfast/src/SpinTemperatureBox.c | 34 ++++++++++++------ 3 files changed, 71 insertions(+), 44 deletions(-) create mode 100644 src/py21cmfast/src/IonisationBox_gpu.h diff --git a/src/py21cmfast/src/IonisationBox.h b/src/py21cmfast/src/IonisationBox.h index 287928408..37d423a18 100644 --- a/src/py21cmfast/src/IonisationBox.h +++ b/src/py21cmfast/src/IonisationBox.h @@ -2,7 +2,6 @@ #define _IONBOX_H #include -#include #include "InputParameters.h" #include "OutputStructs.h" @@ -16,38 +15,6 @@ int ComputeIonizedBox(float redshift, float prev_redshift, UserParams *user_para IonizedBox *previous_ionize_box, TsBox *spin_temp, HaloBox *halos, InitialConditions *ini_boxes, IonizedBox *box); -void init_ionbox_gpu_data( - fftwf_complex **d_deltax_filtered, // copies of pointers to pointers - fftwf_complex **d_xe_filtered, - float **d_y_arr, - float **d_Fcoll, - unsigned int nbins, // nbins for Nion_conditional_table1D->y - unsigned long long hii_tot_num_pixels, // HII_TOT_NUM_PIXELS - unsigned long long hii_kspace_num_pixels, // HII_KSPACE_NUM_PIXELS - unsigned int *threadsPerBlock, - unsigned int *numBlocks -); -void calculate_fcoll_grid_gpu( - IonizedBox *box, // for box->Fcoll - fftwf_complex *h_deltax_filtered, // members of fg_struct - fftwf_complex *h_xe_filtered, - double *f_coll_grid_mean, // member of rspec - fftwf_complex *d_deltax_filtered, // device pointers - fftwf_complex *d_xe_filtered, - float *d_Fcoll, - float *d_y_arr, - unsigned long long hii_tot_num_pixels, // HII_TOT_NUM_PIXELS - unsigned long long hii_kspace_num_pixels, // HII_KSPACE_NUM_PIXELS - unsigned int *threadsPerBlock, - unsigned int *numBlocks -); -void free_ionbox_gpu_data( - fftwf_complex **d_deltax_filtered, // copies of pointers to pointers - fftwf_complex **d_xe_filtered, - float **d_y_arr, - float **d_Fcoll -); - #ifdef __cplusplus } #endif diff --git a/src/py21cmfast/src/IonisationBox_gpu.h b/src/py21cmfast/src/IonisationBox_gpu.h new file mode 100644 index 000000000..d48bbacaf --- /dev/null +++ b/src/py21cmfast/src/IonisationBox_gpu.h @@ -0,0 +1,48 @@ +#ifndef _IONBOX_H +#define _IONBOX_H + +#include +#include + +#include "InputParameters.h" +#include "OutputStructs.h" + +#ifdef __cplusplus +extern "C" { +#endif +void init_ionbox_gpu_data( + fftwf_complex **d_deltax_filtered, // copies of pointers to pointers + fftwf_complex **d_xe_filtered, + float **d_y_arr, + float **d_Fcoll, + unsigned int nbins, // nbins for Nion_conditional_table1D->y + unsigned long long hii_tot_num_pixels, // HII_TOT_NUM_PIXELS + unsigned long long hii_kspace_num_pixels, // HII_KSPACE_NUM_PIXELS + unsigned int *threadsPerBlock, + unsigned int *numBlocks +); +void calculate_fcoll_grid_gpu( + IonizedBox *box, // for box->Fcoll + fftwf_complex *h_deltax_filtered, // members of fg_struct + fftwf_complex *h_xe_filtered, + double *f_coll_grid_mean, // member of rspec + fftwf_complex *d_deltax_filtered, // device pointers + fftwf_complex *d_xe_filtered, + float *d_Fcoll, + float *d_y_arr, + unsigned long long hii_tot_num_pixels, // HII_TOT_NUM_PIXELS + unsigned long long hii_kspace_num_pixels, // HII_KSPACE_NUM_PIXELS + unsigned int *threadsPerBlock, + unsigned int *numBlocks +); +void free_ionbox_gpu_data( + fftwf_complex **d_deltax_filtered, // copies of pointers to pointers + fftwf_complex **d_xe_filtered, + float **d_y_arr, + float **d_Fcoll +); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/src/py21cmfast/src/SpinTemperatureBox.c b/src/py21cmfast/src/SpinTemperatureBox.c index ff5acc483..5539b52db 100644 --- a/src/py21cmfast/src/SpinTemperatureBox.c +++ b/src/py21cmfast/src/SpinTemperatureBox.c @@ -932,13 +932,17 @@ void calculate_sfrd_from_grid(int R_ct, float *dens_R_grid, float *Mcrit_R_grid, // -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- // If GPU is to be used and flags are ideal, call GPU version of reduction - if (true && flag_options_global->USE_MASS_DEPENDENT_ZETA && user_params_global->USE_INTERPOLATION_TABLES && !flag_options_global->USE_MINI_HALOS) { + bool use_cuda = false; // pass this as a parameter later + if (use_cuda && flag_options_global->USE_MASS_DEPENDENT_ZETA && user_params_global->USE_INTERPOLATION_TABLES && !flag_options_global->USE_MINI_HALOS) { RGTable1D_f* SFRD_conditional_table = get_SFRD_conditional_table(); +#if CUDA_FOUND ave_sfrd_buf = calculate_sfrd_from_grid_gpu(SFRD_conditional_table, dens_R_grid, zpp_growth, R_ct, sfrd_grid, HII_TOT_NUM_PIXELS, threadsPerBlock, // d_data - d_y_arr, d_dens_R_grid, d_sfrd_grid, d_ave_sfrd_buf - ); + d_y_arr, d_dens_R_grid, d_sfrd_grid, d_ave_sfrd_buf); +#else + LOG_ERROR("calculate_sfrd_from_grid_gpu() called but code was not compiled for CUDA."); +#endif } else { // Else, run CPU reduction #pragma omp parallel num_threads(user_params_global->N_THREADS) @@ -1438,13 +1442,15 @@ void ts_main(float redshift, float prev_redshift, UserParams *user_params, Cosmo // struct --------------------------------------------------------------------------------------------------------------------------------------------------------- // threadsPerBlock = init_sfrd_gpu_data(delta_box_input, del_fcoll_Rct, HII_TOT_NUM_PIXELS, sfrd_nbins, &device_data); // pointers ------------------------------------------------------------------------------------------------------------------------------------------------------- - threadsPerBlock = init_sfrd_gpu_data(delta_box_input, del_fcoll_Rct, HII_TOT_NUM_PIXELS, sfrd_nbins, &d_y_arr, &d_dens_R_grid, &d_sfrd_grid, &d_ave_sfrd_buf); - // threadsPerBlock = init_sfrd_gpu_data(delta_box_input, del_fcoll_Rct, HII_TOT_NUM_PIXELS, sfrd_nbins, d_y_arr, d_dens_R_grid, d_sfrd_grid, d_ave_sfrd_buf); - if (threadsPerBlock == 0) { - LOG_DEBUG("Memory allocation failed inside init_sfrd_gpu_data."); - } else { - LOG_DEBUG("threadsPerBlock = %u", threadsPerBlock); - } // --------------------------------------------------------------------------------------------------------------------------------------------------------------- + bool use_cuda = false; // pass this as a parameter later + if (use_cuda) { +#if CUDA_FOUND + threadsPerBlock = init_sfrd_gpu_data(delta_box_input, del_fcoll_Rct, HII_TOT_NUM_PIXELS, sfrd_nbins, &d_y_arr, &d_dens_R_grid, &d_sfrd_grid, &d_ave_sfrd_buf); +#else + LOG_ERROR("CUDA function init_sfrd_gpu_data() called but code was not compiled for CUDA."); +#endif + } + // --------------------------------------------------------------------------------------------------------------------------------------------------------------- // R_ct starts at 39 and goes down to 0 for(R_ct=global_params.NUM_FILTER_STEPS_FOR_Ts; R_ct--;){ @@ -1606,7 +1612,13 @@ void ts_main(float redshift, float prev_redshift, UserParams *user_params, Cosmo // free_sfrd_gpu_data(device_data); // free(device_data); // pointers ---------------------------------------------------------------------------------------------------------------------------------------------------------------- - free_sfrd_gpu_data(&d_y_arr, &d_dens_R_grid, &d_sfrd_grid, &d_ave_sfrd_buf); + if (use_cuda) { +#if CUDA_FOUND + free_sfrd_gpu_data(&d_y_arr, &d_dens_R_grid, &d_sfrd_grid, &d_ave_sfrd_buf); +#else + LOG_ERROR("CUDA function free_sfrd_gpu_data() called but code was not compiled for CUDA."); +#endif + } // ------------------------------------------------------------------------------------------------------------------------------------------------------------------------- } From a00d463900460018265a62caf2744fe113b61cca Mon Sep 17 00:00:00 2001 From: Gregory Poole Date: Tue, 4 Feb 2025 11:28:53 +1100 Subject: [PATCH 090/145] Place proper CUDA compile guards around code code in IonisationBox.c --- src/py21cmfast/src/IonisationBox.c | 36 +++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/src/py21cmfast/src/IonisationBox.c b/src/py21cmfast/src/IonisationBox.c index 9e15bbe39..f814ff2cd 100644 --- a/src/py21cmfast/src/IonisationBox.c +++ b/src/py21cmfast/src/IonisationBox.c @@ -1326,9 +1326,10 @@ int ComputeIonizedBox(float redshift, float prev_redshift, UserParams *user_para unsigned int numBlocks; // If GPU & flags call init_ionbox_gpu_data() - if (flag_options_global->USE_MASS_DEPENDENT_ZETA && !flag_options_global->USE_MINI_HALOS && !flag_options_global->USE_HALO_FIELD) { - + bool use_cuda = false; // pass this as a parameter later + if (use_cuda && flag_options_global->USE_MASS_DEPENDENT_ZETA && !flag_options_global->USE_MINI_HALOS && !flag_options_global->USE_HALO_FIELD) { unsigned int Nion_nbins = get_nbins(); +#if CUDA_FOUND init_ionbox_gpu_data( &d_deltax_filtered, &d_xe_filtered, @@ -1340,6 +1341,9 @@ int ComputeIonizedBox(float redshift, float prev_redshift, UserParams *user_para &threadsPerBlock, &numBlocks ); +#else + LOG_ERROR("CUDA function init_ionbox_gpu_data() called but code was not compiled for CUDA."); +#endif } int R_ct; @@ -1369,7 +1373,9 @@ int ComputeIonizedBox(float redshift, float prev_redshift, UserParams *user_para } // If GPU & flags, call gpu version of calculate_fcoll_grid() - if (flag_options_global->USE_MASS_DEPENDENT_ZETA && !flag_options_global->USE_MINI_HALOS && !flag_options_global->USE_HALO_FIELD) { + bool use_cuda = false; // pass this as a parameter later + if (use_cuda && flag_options_global->USE_MASS_DEPENDENT_ZETA && !flag_options_global->USE_MINI_HALOS && !flag_options_global->USE_HALO_FIELD) { +#if CUDA_FOUND calculate_fcoll_grid_gpu( box, grid_struct->deltax_filtered, @@ -1384,6 +1390,9 @@ int ComputeIonizedBox(float redshift, float prev_redshift, UserParams *user_para &threadsPerBlock, &numBlocks ); +#else + LOG_ERROR("CUDA function calculate_fcoll_grid_gpu() called but code was not compiled for CUDA."); +#endif } else { calculate_fcoll_grid(box, previous_ionize_box, grid_struct, &ionbox_constants, &curr_radius); } @@ -1413,14 +1422,19 @@ int ComputeIonizedBox(float redshift, float prev_redshift, UserParams *user_para #endif } // If GPU & flags, call free_ionbox_gpu_data() - if (flag_options_global->USE_MASS_DEPENDENT_ZETA && !flag_options_global->USE_MINI_HALOS && !flag_options_global->USE_HALO_FIELD) { - free_ionbox_gpu_data( - &d_deltax_filtered, - &d_xe_filtered, - &d_y_arr, - &d_Fcoll - ); - } + bool use_cuda = false; // pass this as a parameter later + if (use_cuda && flag_options_global->USE_MASS_DEPENDENT_ZETA && !flag_options_global->USE_MINI_HALOS && !flag_options_global->USE_HALO_FIELD) { +#if USE_CUDA + free_ionbox_gpu_data( + &d_deltax_filtered, + &d_xe_filtered, + &d_y_arr, + &d_Fcoll + ); +#else + LOG_ERROR("CUDA function free_ionbox_gpu_data() called but code was not compiled for CUDA."); +#endif + } set_ionized_temperatures(box,perturbed_field,spin_temp,&ionbox_constants); From 3058a1f13d235d1b719313ac50b83a52d021c462 Mon Sep 17 00:00:00 2001 From: Gregory Poole Date: Tue, 4 Feb 2025 15:30:26 +1100 Subject: [PATCH 091/145] Remove several unneeded instances of '#include '. --- src/py21cmfast/src/IonisationBox.cu | 377 ++++++++++++------------ src/py21cmfast/src/IonisationBox_gpu.h | 2 +- src/py21cmfast/src/PerturbField.h | 2 +- src/py21cmfast/src/SpinTemperatureBox.h | 2 +- src/py21cmfast/src/filtering.h | 2 +- 5 files changed, 197 insertions(+), 188 deletions(-) diff --git a/src/py21cmfast/src/IonisationBox.cu b/src/py21cmfast/src/IonisationBox.cu index 948ee0efa..83b105d4f 100644 --- a/src/py21cmfast/src/IonisationBox.cu +++ b/src/py21cmfast/src/IonisationBox.cu @@ -1,231 +1,240 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include #include "cexcept.h" #include "exceptions.h" #include "logger.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include // GPU +#include #include #include -#include // We use thrust for reduction #include -#include #include // thrust::plus +#include #include "Constants.h" +#include "InitialConditions.h" #include "InputParameters.h" #include "OutputStructs.h" +#include "bubble_helper_progs.h" #include "cosmology.h" -#include "hmf.h" -#include "indexing.h" -#include "dft.h" -#include "recombinations.h" #include "debugging.h" +#include "dft.h" +#include "filtering.h" #include "heating_helper_progs.h" +#include "hmf.h" +#include "indexing.h" +#include "interp_tables.h" #include "photoncons.h" +#include "recombinations.h" #include "thermochem.h" -#include "interp_tables.h" -#include "filtering.h" -#include "bubble_helper_progs.h" -#include "InitialConditions.h" -#include "IonisationBox.h" +#include "IonisationBox_gpu.h" #include "cuda_utils.cuh" +__device__ inline double EvaluateRGTable1D_f_gpu(double x, double x_min, + double x_width, float *y_arr) { -__device__ inline double EvaluateRGTable1D_f_gpu(double x, double x_min, double x_width, float *y_arr) { - - int idx = (int)floor((x - x_min) / x_width); + int idx = (int)floor((x - x_min) / x_width); - double table_val = x_min + x_width * (float)idx; - double interp_point = (x - table_val) / x_width; + double table_val = x_min + x_width * (float)idx; + double interp_point = (x - table_val) / x_width; - return y_arr[idx] * (1 - interp_point) + y_arr[idx + 1] * (interp_point); + return y_arr[idx] * (1 - interp_point) + y_arr[idx + 1] * (interp_point); } // template -__global__ void compute_Fcoll( - cuFloatComplex *deltax_filtered, // fg_struct - cuFloatComplex *xe_filtered, // fg_struct - float *y_arr, // Nion_conditional_table1D - double x_min, // Nion_conditional_table1D - double x_width, // Nion_conditional_table1D - double fract_float_err, // FRACT_FLOAT_ERR - bool use_ts_fluct, // flag_options_global->USE_TS_FLUCT - unsigned long long hii_tot_num_pixels, // HII_TOT_NUM_PIXELS - long long hii_d, // HII_D - long long hii_d_para, // HII_D_PARA - long long hii_mid_para, // HII_MID_PARA - float *Fcoll // box +__global__ void +compute_Fcoll(cuFloatComplex *deltax_filtered, // fg_struct + cuFloatComplex *xe_filtered, // fg_struct + float *y_arr, // Nion_conditional_table1D + double x_min, // Nion_conditional_table1D + double x_width, // Nion_conditional_table1D + double fract_float_err, // FRACT_FLOAT_ERR + bool use_ts_fluct, // flag_options_global->USE_TS_FLUCT + unsigned long long hii_tot_num_pixels, // HII_TOT_NUM_PIXELS + long long hii_d, // HII_D + long long hii_d_para, // HII_D_PARA + long long hii_mid_para, // HII_MID_PARA + float *Fcoll // box ) { - // Get index of grids - unsigned long long idx = blockIdx.x * blockDim.x + threadIdx.x; - - // Bound check - if (idx >= hii_tot_num_pixels) { - return; - } - - // Get x, y, z from idx using HII_R_INDEX macro formula - int z = idx % hii_d_para; - unsigned long long remaining = idx / hii_d_para; - int y = remaining % hii_d; - int x = remaining / hii_d; - - // Get FFT index using HII_R_FFT_INDEX macro formula - unsigned long long fft_idx = z + 2 * (hii_mid_para + 1) * (y + hii_d * x); - - // These clippings could be made in the calling function, using thrust, rather than here... - - // Clip the filtered grids to physical values - // delta cannot be less than -1 - *((float *) deltax_filtered + fft_idx) = fmaxf(*((float *) deltax_filtered + fft_idx), -1. + fract_float_err); - // cannot be less than zero - // x_e has to be between zero and unity - if (use_ts_fluct) { - *((float *) xe_filtered + fft_idx) = fmaxf(*((float *) xe_filtered + fft_idx), 0.0); - *((float *) xe_filtered + fft_idx) = fminf(*((float *) xe_filtered + fft_idx), 0.999); - } - - // Compute collapse fraction - Fcoll[idx] = exp(EvaluateRGTable1D_f_gpu(*((float *) deltax_filtered + fft_idx), x_min, x_width, y_arr)); + // Get index of grids + unsigned long long idx = blockIdx.x * blockDim.x + threadIdx.x; + + // Bound check + if (idx >= hii_tot_num_pixels) { + return; + } + + // Get x, y, z from idx using HII_R_INDEX macro formula + int z = idx % hii_d_para; + unsigned long long remaining = idx / hii_d_para; + int y = remaining % hii_d; + int x = remaining / hii_d; + + // Get FFT index using HII_R_FFT_INDEX macro formula + unsigned long long fft_idx = z + 2 * (hii_mid_para + 1) * (y + hii_d * x); + + // These clippings could be made in the calling function, using thrust, rather + // than here... + + // Clip the filtered grids to physical values + // delta cannot be less than -1 + *((float *)deltax_filtered + fft_idx) = + fmaxf(*((float *)deltax_filtered + fft_idx), -1. + fract_float_err); + // cannot be less than zero + // x_e has to be between zero and unity + if (use_ts_fluct) { + *((float *)xe_filtered + fft_idx) = + fmaxf(*((float *)xe_filtered + fft_idx), 0.0); + *((float *)xe_filtered + fft_idx) = + fminf(*((float *)xe_filtered + fft_idx), 0.999); + } + + // Compute collapse fraction + Fcoll[idx] = exp(EvaluateRGTable1D_f_gpu( + *((float *)deltax_filtered + fft_idx), x_min, x_width, y_arr)); } void init_ionbox_gpu_data( fftwf_complex **d_deltax_filtered, // copies of pointers to pointers - fftwf_complex **d_xe_filtered, - float **d_y_arr, - float **d_Fcoll, + fftwf_complex **d_xe_filtered, float **d_y_arr, float **d_Fcoll, unsigned int nbins, // nbins for Nion_conditional_table1D->y - unsigned long long hii_tot_num_pixels, // HII_TOT_NUM_PIXELS + unsigned long long hii_tot_num_pixels, // HII_TOT_NUM_PIXELS unsigned long long hii_kspace_num_pixels, // HII_KSPACE_NUM_PIXELS - unsigned int *threadsPerBlock, - unsigned int *numBlocks -) { - CALL_CUDA(cudaMalloc((void**)d_deltax_filtered, sizeof(fftwf_complex) * hii_kspace_num_pixels)); // already pointers to pointers (no & needed) - CALL_CUDA(cudaMemset(*d_deltax_filtered, 0, sizeof(fftwf_complex) * hii_kspace_num_pixels)); // dereference the pointer to a pointer (*) - - if (flag_options_global->USE_TS_FLUCT) { - CALL_CUDA(cudaMalloc((void**)d_xe_filtered, sizeof(fftwf_complex) * hii_kspace_num_pixels)); - CALL_CUDA(cudaMemset(*d_xe_filtered, 0, sizeof(fftwf_complex) * hii_kspace_num_pixels)); - } - - CALL_CUDA(cudaMalloc((void**)d_y_arr, sizeof(float) * nbins)); - CALL_CUDA(cudaMemset(*d_y_arr, 0, sizeof(float) * nbins)); - - CALL_CUDA(cudaMalloc((void**)d_Fcoll, sizeof(float) * hii_tot_num_pixels)); - CALL_CUDA(cudaMemset(*d_Fcoll, 0, sizeof(float) * hii_tot_num_pixels)); - - LOG_INFO("Ionisation grids allocated on device."); - LOG_INFO("Ionisation grids initialised on device."); - - // Get max threads/block for device - int maxThreadsPerBlock; - CALL_CUDA(cudaDeviceGetAttribute(&maxThreadsPerBlock, cudaDevAttrMaxThreadsPerBlock, 0)); - - // Set threads/block based on device max - if (maxThreadsPerBlock >= 512) { - *threadsPerBlock = 512; - } else if (maxThreadsPerBlock >= 256) { - *threadsPerBlock = 256; - } else if (maxThreadsPerBlock >= 128) { - *threadsPerBlock = 128; - } else if (maxThreadsPerBlock >= 64) { - *threadsPerBlock = 64; - } else if (maxThreadsPerBlock >= 32) { - *threadsPerBlock = 32; - } else { - *threadsPerBlock = 16; - } - - *numBlocks = (hii_tot_num_pixels + *threadsPerBlock - 1) / *threadsPerBlock; + unsigned int *threadsPerBlock, unsigned int *numBlocks) { + CALL_CUDA(cudaMalloc( + (void **)d_deltax_filtered, + sizeof(fftwf_complex) * + hii_kspace_num_pixels)); // already pointers to pointers (no & needed) + CALL_CUDA(cudaMemset( + *d_deltax_filtered, 0, + sizeof(fftwf_complex) * + hii_kspace_num_pixels)); // dereference the pointer to a pointer (*) + + if (flag_options_global->USE_TS_FLUCT) { + CALL_CUDA(cudaMalloc((void **)d_xe_filtered, + sizeof(fftwf_complex) * hii_kspace_num_pixels)); + CALL_CUDA(cudaMemset(*d_xe_filtered, 0, + sizeof(fftwf_complex) * hii_kspace_num_pixels)); + } + + CALL_CUDA(cudaMalloc((void **)d_y_arr, sizeof(float) * nbins)); + CALL_CUDA(cudaMemset(*d_y_arr, 0, sizeof(float) * nbins)); + + CALL_CUDA(cudaMalloc((void **)d_Fcoll, sizeof(float) * hii_tot_num_pixels)); + CALL_CUDA(cudaMemset(*d_Fcoll, 0, sizeof(float) * hii_tot_num_pixels)); + + LOG_INFO("Ionisation grids allocated on device."); + LOG_INFO("Ionisation grids initialised on device."); + + // Get max threads/block for device + int maxThreadsPerBlock; + CALL_CUDA(cudaDeviceGetAttribute(&maxThreadsPerBlock, + cudaDevAttrMaxThreadsPerBlock, 0)); + + // Set threads/block based on device max + if (maxThreadsPerBlock >= 512) { + *threadsPerBlock = 512; + } else if (maxThreadsPerBlock >= 256) { + *threadsPerBlock = 256; + } else if (maxThreadsPerBlock >= 128) { + *threadsPerBlock = 128; + } else if (maxThreadsPerBlock >= 64) { + *threadsPerBlock = 64; + } else if (maxThreadsPerBlock >= 32) { + *threadsPerBlock = 32; + } else { + *threadsPerBlock = 16; + } + + *numBlocks = (hii_tot_num_pixels + *threadsPerBlock - 1) / *threadsPerBlock; } void calculate_fcoll_grid_gpu( - IonizedBox *box, // for box->Fcoll + IonizedBox *box, // for box->Fcoll fftwf_complex *h_deltax_filtered, // members of fg_struct fftwf_complex *h_xe_filtered, - double *f_coll_grid_mean, // member of rspec + double *f_coll_grid_mean, // member of rspec fftwf_complex *d_deltax_filtered, // device pointers - fftwf_complex *d_xe_filtered, - float *d_Fcoll, - float *d_y_arr, - unsigned long long hii_tot_num_pixels, // HII_TOT_NUM_PIXELS + fftwf_complex *d_xe_filtered, float *d_Fcoll, float *d_y_arr, + unsigned long long hii_tot_num_pixels, // HII_TOT_NUM_PIXELS unsigned long long hii_kspace_num_pixels, // HII_KSPACE_NUM_PIXELS - unsigned int *threadsPerBlock, - unsigned int *numBlocks -) { - RGTable1D_f* Nion_conditional_table1D = get_Nion_conditional_table1D(); - - // Copy grids from host to device - CALL_CUDA(cudaMemcpy(d_deltax_filtered, h_deltax_filtered, sizeof(fftwf_complex) * hii_kspace_num_pixels, cudaMemcpyHostToDevice)); - if (flag_options_global->USE_TS_FLUCT) { - CALL_CUDA(cudaMemcpy(d_xe_filtered, h_xe_filtered, sizeof(fftwf_complex) * hii_kspace_num_pixels, cudaMemcpyHostToDevice)); - } - CALL_CUDA(cudaMemcpy(d_y_arr, Nion_conditional_table1D->y_arr, sizeof(float) * Nion_conditional_table1D->n_bin, cudaMemcpyHostToDevice)); - LOG_INFO("Ionisation grids copied to device."); - - // TODO: Can I pass these straight to kernel? (or access in kernel w/ Tiger's method) - double fract_float_err = FRACT_FLOAT_ERR; - bool use_ts_fluct = flag_options_global->USE_TS_FLUCT; - long long hii_d = HII_D; - long long hii_d_para = HII_D_PARA; - long long hii_mid_para = HII_MID_PARA; - - // Invoke kernel - compute_Fcoll<<< *numBlocks, *threadsPerBlock >>>( - reinterpret_cast(d_deltax_filtered), - reinterpret_cast(d_xe_filtered), - d_y_arr, - Nion_conditional_table1D->x_min, - Nion_conditional_table1D->x_width, - fract_float_err, - use_ts_fluct, - hii_tot_num_pixels, - hii_d, - hii_d_para, - hii_mid_para, - d_Fcoll - ); - CALL_CUDA(cudaDeviceSynchronize()); - LOG_INFO("IonisationBox compute_Fcoll kernel called."); - - // Use thrust to reduce computed sums to one value. - // Wrap device pointer in a thrust::device_ptr - thrust::device_ptr d_Fcoll_ptr(d_Fcoll); - // Reduce final buffer sums to one value - double f_coll_grid_total = thrust::reduce(d_Fcoll_ptr, d_Fcoll_ptr + hii_tot_num_pixels, 0., thrust::plus()); - *f_coll_grid_mean = f_coll_grid_total / (double) hii_tot_num_pixels; - LOG_INFO("Fcoll sum reduced to single value by thrust::reduce operation."); - - // Copy results from device to host - CALL_CUDA(cudaMemcpy(box->Fcoll, d_Fcoll, sizeof(float) * hii_tot_num_pixels, cudaMemcpyDeviceToHost)); - CALL_CUDA(cudaMemcpy(h_deltax_filtered, d_deltax_filtered, sizeof(fftwf_complex) * hii_kspace_num_pixels, cudaMemcpyDeviceToHost)); - if (flag_options_global->USE_TS_FLUCT) { - CALL_CUDA(cudaMemcpy(h_xe_filtered, d_xe_filtered, sizeof(fftwf_complex) * hii_kspace_num_pixels, cudaMemcpyDeviceToHost)); - } - LOG_INFO("Grids copied to host."); + unsigned int *threadsPerBlock, unsigned int *numBlocks) { + RGTable1D_f *Nion_conditional_table1D = get_Nion_conditional_table1D(); + + // Copy grids from host to device + CALL_CUDA(cudaMemcpy(d_deltax_filtered, h_deltax_filtered, + sizeof(fftwf_complex) * hii_kspace_num_pixels, + cudaMemcpyHostToDevice)); + if (flag_options_global->USE_TS_FLUCT) { + CALL_CUDA(cudaMemcpy(d_xe_filtered, h_xe_filtered, + sizeof(fftwf_complex) * hii_kspace_num_pixels, + cudaMemcpyHostToDevice)); + } + CALL_CUDA(cudaMemcpy(d_y_arr, Nion_conditional_table1D->y_arr, + sizeof(float) * Nion_conditional_table1D->n_bin, + cudaMemcpyHostToDevice)); + LOG_INFO("Ionisation grids copied to device."); + + // TODO: Can I pass these straight to kernel? (or access in kernel w/ Tiger's + // method) + double fract_float_err = FRACT_FLOAT_ERR; + bool use_ts_fluct = flag_options_global->USE_TS_FLUCT; + long long hii_d = HII_D; + long long hii_d_para = HII_D_PARA; + long long hii_mid_para = HII_MID_PARA; + + // Invoke kernel + compute_Fcoll<<<*numBlocks, *threadsPerBlock>>>( + reinterpret_cast(d_deltax_filtered), + reinterpret_cast(d_xe_filtered), d_y_arr, + Nion_conditional_table1D->x_min, Nion_conditional_table1D->x_width, + fract_float_err, use_ts_fluct, hii_tot_num_pixels, hii_d, hii_d_para, + hii_mid_para, d_Fcoll); + CALL_CUDA(cudaDeviceSynchronize()); + LOG_INFO("IonisationBox compute_Fcoll kernel called."); + + // Use thrust to reduce computed sums to one value. + // Wrap device pointer in a thrust::device_ptr + thrust::device_ptr d_Fcoll_ptr(d_Fcoll); + // Reduce final buffer sums to one value + double f_coll_grid_total = thrust::reduce( + d_Fcoll_ptr, d_Fcoll_ptr + hii_tot_num_pixels, 0., thrust::plus()); + *f_coll_grid_mean = f_coll_grid_total / (double)hii_tot_num_pixels; + LOG_INFO("Fcoll sum reduced to single value by thrust::reduce operation."); + + // Copy results from device to host + CALL_CUDA(cudaMemcpy(box->Fcoll, d_Fcoll, sizeof(float) * hii_tot_num_pixels, + cudaMemcpyDeviceToHost)); + CALL_CUDA(cudaMemcpy(h_deltax_filtered, d_deltax_filtered, + sizeof(fftwf_complex) * hii_kspace_num_pixels, + cudaMemcpyDeviceToHost)); + if (flag_options_global->USE_TS_FLUCT) { + CALL_CUDA(cudaMemcpy(h_xe_filtered, d_xe_filtered, + sizeof(fftwf_complex) * hii_kspace_num_pixels, + cudaMemcpyDeviceToHost)); + } + LOG_INFO("Grids copied to host."); } void free_ionbox_gpu_data( fftwf_complex **d_deltax_filtered, // copies of pointers to pointers - fftwf_complex **d_xe_filtered, - float **d_y_arr, - float **d_Fcoll -) { - CALL_CUDA(cudaFree(*d_deltax_filtered)); // Need to dereference the pointers to pointers (*) - if (flag_options_global->USE_TS_FLUCT) { - CALL_CUDA(cudaFree(*d_xe_filtered)); - } - CALL_CUDA(cudaFree(*d_y_arr)); - CALL_CUDA(cudaFree(*d_Fcoll)); - LOG_INFO("Device memory freed."); + fftwf_complex **d_xe_filtered, float **d_y_arr, float **d_Fcoll) { + CALL_CUDA(cudaFree( + *d_deltax_filtered)); // Need to dereference the pointers to pointers (*) + if (flag_options_global->USE_TS_FLUCT) { + CALL_CUDA(cudaFree(*d_xe_filtered)); + } + CALL_CUDA(cudaFree(*d_y_arr)); + CALL_CUDA(cudaFree(*d_Fcoll)); + LOG_INFO("Device memory freed."); } diff --git a/src/py21cmfast/src/IonisationBox_gpu.h b/src/py21cmfast/src/IonisationBox_gpu.h index d48bbacaf..219eee16b 100644 --- a/src/py21cmfast/src/IonisationBox_gpu.h +++ b/src/py21cmfast/src/IonisationBox_gpu.h @@ -2,7 +2,7 @@ #define _IONBOX_H #include -#include +// #include #include "InputParameters.h" #include "OutputStructs.h" diff --git a/src/py21cmfast/src/PerturbField.h b/src/py21cmfast/src/PerturbField.h index 79f713aa0..06f6c782c 100644 --- a/src/py21cmfast/src/PerturbField.h +++ b/src/py21cmfast/src/PerturbField.h @@ -1,7 +1,7 @@ #ifndef _PERTURBFIELD_H #define _PERTURBFIELD_H -#include +// #include #include "InputParameters.h" #include "OutputStructs.h" diff --git a/src/py21cmfast/src/SpinTemperatureBox.h b/src/py21cmfast/src/SpinTemperatureBox.h index 110bfc519..b43a8b5e9 100644 --- a/src/py21cmfast/src/SpinTemperatureBox.h +++ b/src/py21cmfast/src/SpinTemperatureBox.h @@ -1,7 +1,7 @@ #ifndef _SPINTEMP_H #define _SPINTEMP_H -#include +// #include #include "InputParameters.h" #include "OutputStructs.h" diff --git a/src/py21cmfast/src/filtering.h b/src/py21cmfast/src/filtering.h index f82961565..65ca1a493 100644 --- a/src/py21cmfast/src/filtering.h +++ b/src/py21cmfast/src/filtering.h @@ -2,7 +2,7 @@ #define _FILTERING_H #include -#include +// #include #ifdef __cplusplus extern "C" { From 79eda01c2333c6347ee13e0ac7ea893fc0164b29 Mon Sep 17 00:00:00 2001 From: Gregory Poole Date: Wed, 5 Feb 2025 15:44:57 +1100 Subject: [PATCH 092/145] CPU-only build is working once again but tests are failing. --- build_cffi.py | 7 ++++--- src/py21cmfast/src/IonisationBox.c | 1 - src/py21cmfast/src/MapMass_cpu.c | 9 ++++++++- src/py21cmfast/src/PerturbField.c | 6 ------ src/py21cmfast/src/PerturbField.h | 4 ++++ 5 files changed, 16 insertions(+), 11 deletions(-) diff --git a/build_cffi.py b/build_cffi.py index d494e4026..97f59bded 100755 --- a/build_cffi.py +++ b/build_cffi.py @@ -70,8 +70,9 @@ except ValueError: # note: for py35 support, can't use f strings. raise ValueError( - "LOG_LEVEL must be specified as a positive integer, or one " - "of {}".format(available_levels) + "LOG_LEVEL must be specified as a positive integer, or one " "of {}".format( + available_levels + ) ) # ================================================== @@ -98,7 +99,7 @@ # stuff for gperftools if "PROFILE" in os.environ: - #libraries += ["profiler", "tcmalloc"] + # libraries += ["profiler", "tcmalloc"] libraries += ["profiler"] # we need this even if DEBUG is off extra_compile_args += ["-g"] diff --git a/src/py21cmfast/src/IonisationBox.c b/src/py21cmfast/src/IonisationBox.c index f814ff2cd..769d43c30 100644 --- a/src/py21cmfast/src/IonisationBox.c +++ b/src/py21cmfast/src/IonisationBox.c @@ -1422,7 +1422,6 @@ int ComputeIonizedBox(float redshift, float prev_redshift, UserParams *user_para #endif } // If GPU & flags, call free_ionbox_gpu_data() - bool use_cuda = false; // pass this as a parameter later if (use_cuda && flag_options_global->USE_MASS_DEPENDENT_ZETA && !flag_options_global->USE_MINI_HALOS && !flag_options_global->USE_HALO_FIELD) { #if USE_CUDA free_ionbox_gpu_data( diff --git a/src/py21cmfast/src/MapMass_cpu.c b/src/py21cmfast/src/MapMass_cpu.c index d94623de5..56999ba3a 100644 --- a/src/py21cmfast/src/MapMass_cpu.c +++ b/src/py21cmfast/src/MapMass_cpu.c @@ -29,13 +29,20 @@ double *MapMass_cpu( float f_pixel_factor, float init_growth_factor ){ + double xf, yf, zf; + unsigned long long int i,j,k; + int xi, yi, zi; + unsigned long long HII_i, HII_j, HII_k; + // Variables to perform cloud in cell re-distribution of mass for the perturbed field + int xp1,yp1,zp1; + float d_x,d_y,d_z,t_x,t_y,t_z; + #pragma omp parallel \ shared(init_growth_factor,boxes,f_pixel_factor,resampled_box,dimension) \ private(i,j,k,xi,xf,yi,yf,zi,zf,HII_i,HII_j,HII_k,d_x,d_y,d_z,t_x,t_y,t_z,xp1,yp1,zp1) \ num_threads(user_params->N_THREADS) { #pragma omp for - unsigned long long int i,j,k; for (i=0; iDIM;i++){ for (j=0; jDIM;j++){ for (k=0; kPERTURB_ON_HIGH_RES) { diff --git a/src/py21cmfast/src/PerturbField.h b/src/py21cmfast/src/PerturbField.h index 06f6c782c..01f305df2 100644 --- a/src/py21cmfast/src/PerturbField.h +++ b/src/py21cmfast/src/PerturbField.h @@ -13,6 +13,10 @@ int ComputePerturbField( float redshift, UserParams *user_params, CosmoParams *cosmo_params, InitialConditions *boxes, PerturbedField *perturbed_field ); +double* MapMass_cpu( + UserParams *user_params, CosmoParams *cosmo_params, InitialConditions *boxes, double *resampled_box, + int dimension, float f_pixel_factor, float init_growth_factor +); double* MapMass_gpu( UserParams *user_params, CosmoParams *cosmo_params, InitialConditions *boxes, double *resampled_box, int dimension, float f_pixel_factor, float init_growth_factor From e8242d84d3b5f01fd83d0eb8d679be07050a3673 Mon Sep 17 00:00:00 2001 From: Gregory Poole Date: Tue, 11 Feb 2025 13:48:03 +1100 Subject: [PATCH 093/145] Add meson.build --- meson.build | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 meson.build diff --git a/meson.build b/meson.build new file mode 100644 index 000000000..ba0cc9d26 --- /dev/null +++ b/meson.build @@ -0,0 +1,40 @@ +project('21cmFAST', 'c') + +source_files = [ + 'src/py21cmfast/src/BrightnessTemperatureBox.c', + 'src/py21cmfast/src/HaloBox.c', + 'src/py21cmfast/src/HaloField.c', + 'src/py21cmfast/src/InitialConditions.c', + 'src/py21cmfast/src/InputParameters.c', + 'src/py21cmfast/src/IonisationBox.c', + 'src/py21cmfast/src/LuminosityFunction.c', + 'src/py21cmfast/src/PerturbField.c', + 'src/py21cmfast/src/PerturbHaloField.c', + 'src/py21cmfast/src/SpinTemperatureBox.c', + 'src/py21cmfast/src/Stochasticity.c', + 'src/py21cmfast/src/bubble_helper_progs.c', + 'src/py21cmfast/src/cosmology.c', + 'src/py21cmfast/src/debugging.c', + 'src/py21cmfast/src/dft.c', + 'src/py21cmfast/src/elec_interp.c', + 'src/py21cmfast/src/filtering.c', + 'src/py21cmfast/src/heating_helper_progs.c', + 'src/py21cmfast/src/hmf.c', + 'src/py21cmfast/src/interp_tables.c', + 'src/py21cmfast/src/interpolation.c', + 'src/py21cmfast/src/photoncons.c', + 'src/py21cmfast/src/recombinations.c', + 'src/py21cmfast/src/subcell_rsds.c', + 'src/py21cmfast/src/thermochem.c', +] + +omp = dependency('openmp') +gsl = dependency('gsl') + +# fftw = dependency('fftw3f_threads') +cc = meson.get_compiler ('c') +search_paths = [ '/usr/lib', '/usr/local/lib', '/opt/homebrew/lib' ] +fftw = cc.find_library ('fftw3f', required: true, dirs: search_paths) +fftw_threads = cc.find_library ('fftw3f_threads', required: true, dirs: search_paths) + +library('21cmFAST', source_files, dependencies: [omp,gsl,fftw,fftw_threads]) From 637b30fc3771904cbc1d66668547ad9bf49e25ba Mon Sep 17 00:00:00 2001 From: Gregory Poole Date: Tue, 11 Feb 2025 21:49:02 +1100 Subject: [PATCH 094/145] Migrate setup.py metadata to pyproject.toml and flesh-out meson build. Tests failing. --- meson.build | 43 ++---------- pyproject.toml | 100 ++++++++++++++++++++++++++-- setup.py | 101 ----------------------------- src/meson.build | 1 + src/py21cmfast/drivers/meson.build | 12 ++++ src/py21cmfast/meson.build | 20 ++++++ src/py21cmfast/src/meson.build | 40 ++++++++++++ src/py21cmfast/wrapper/meson.build | 17 +++++ 8 files changed, 191 insertions(+), 143 deletions(-) delete mode 100644 setup.py create mode 100644 src/meson.build create mode 100644 src/py21cmfast/drivers/meson.build create mode 100644 src/py21cmfast/meson.build create mode 100644 src/py21cmfast/src/meson.build create mode 100644 src/py21cmfast/wrapper/meson.build diff --git a/meson.build b/meson.build index ba0cc9d26..09926b38d 100644 --- a/meson.build +++ b/meson.build @@ -1,40 +1,7 @@ -project('21cmFAST', 'c') +project('21cmFAST', 'c', + version : run_command('cat', 'VERSION').stdout().strip(), +) -source_files = [ - 'src/py21cmfast/src/BrightnessTemperatureBox.c', - 'src/py21cmfast/src/HaloBox.c', - 'src/py21cmfast/src/HaloField.c', - 'src/py21cmfast/src/InitialConditions.c', - 'src/py21cmfast/src/InputParameters.c', - 'src/py21cmfast/src/IonisationBox.c', - 'src/py21cmfast/src/LuminosityFunction.c', - 'src/py21cmfast/src/PerturbField.c', - 'src/py21cmfast/src/PerturbHaloField.c', - 'src/py21cmfast/src/SpinTemperatureBox.c', - 'src/py21cmfast/src/Stochasticity.c', - 'src/py21cmfast/src/bubble_helper_progs.c', - 'src/py21cmfast/src/cosmology.c', - 'src/py21cmfast/src/debugging.c', - 'src/py21cmfast/src/dft.c', - 'src/py21cmfast/src/elec_interp.c', - 'src/py21cmfast/src/filtering.c', - 'src/py21cmfast/src/heating_helper_progs.c', - 'src/py21cmfast/src/hmf.c', - 'src/py21cmfast/src/interp_tables.c', - 'src/py21cmfast/src/interpolation.c', - 'src/py21cmfast/src/photoncons.c', - 'src/py21cmfast/src/recombinations.c', - 'src/py21cmfast/src/subcell_rsds.c', - 'src/py21cmfast/src/thermochem.c', -] +py3 = import('python').find_installation(pure: false) -omp = dependency('openmp') -gsl = dependency('gsl') - -# fftw = dependency('fftw3f_threads') -cc = meson.get_compiler ('c') -search_paths = [ '/usr/lib', '/usr/local/lib', '/opt/homebrew/lib' ] -fftw = cc.find_library ('fftw3f', required: true, dirs: search_paths) -fftw_threads = cc.find_library ('fftw3f_threads', required: true, dirs: search_paths) - -library('21cmFAST', source_files, dependencies: [omp,gsl,fftw,fftw_threads]) +subdir('src') diff --git a/pyproject.toml b/pyproject.toml index 235264bae..bdee90d25 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,8 +1,93 @@ -[build-system] -requires = ["setuptools>=42", "wheel", "setuptools_scm[toml]>=3.4"] -build-backend = "setuptools.build_meta" +[project] +name="21cmFAST" +dynamic = ["version"] +license="MIT license" +license-files = ["LICENSE"] +description="A semi-numerical cosmological simulation code for the 21cm signal" +# long_description="%s\n%s" +# % ( +# re.compile("^.. start-badges.*^.. end-badges", re.M | re.S).sub( +# "", _read("README.rst") +# ), +# re.sub(":[a-z]+:`~?(.*?)`", r"``\1``", _read("CHANGELOG.rst")), +# ) +# long_description_content_type="text/x-rst" +authors=[ {name = "The 21cmFAST coredev team" ,email = "21cmfast.coredev@gmail.com"}] +readme= {file = "README.rst", content-type = "text/x-rst" } +include_package_data=true +requires-python=">=3.10" +classifiers=[ + # complete classifier list: http://pypi.python.org/pypi?%3Aaction=list_classifiers + "Development Status :: 5 - Production/Stable", + "Intended Audience :: Developers", + "Operating System :: Unix", + "Operating System :: POSIX", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: Implementation :: CPython", +] +keywords=["Epoch of Reionization", "Cosmology"] +dependencies=[ + "click", + "numpy<2", + "pyyaml", + "scipy", + "astropy>=2.0", + "h5py>=2.8.0", + "cached_property", + "matplotlib", + "bidict", + "cosmotile>=0.2.0", + "attrs", +] + +# [tool.setuptools.packages.find] +# where = ["src/py21cmfast"] + +[project.optional-dependencies] +tests = [ + "pre-commit", + "pytest>=5.0", + "pytest-cov", + "tox", + "pytest-remotedata>=0.3.2", + "powerbox", + "pytest-plt", + "questionary", +] +docs = [ + "nbsphinx", + "numpydoc", + "sphinx>=1.3", + "sphinx-rtd-theme", +] +# When the min python version supports PEP 735, this can be simplified +# as dev = test_req + doc_req again (as it was implemented in setup.py) +dev = [ + "pre-commit", + "pytest>=5.0", + "pytest-cov", + "tox", + "pytest-remotedata>=0.3.2", + "powerbox", + "pytest-plt", + "questionary", + "nbsphinx", + "numpydoc", + "sphinx>=1.3", + "sphinx-rtd-theme", +] -[tool.setuptools_scm] +# UPDATE THESE +[project.urls] +Homepage="https://github.com/21cmFAST/21cmFAST" +Documentation="https://github.com/21cmFAST/21cmFAST" +Repository="https://github.com/21cmFAST/21cmFAST" +Issues="https://github.com/21cmFAST/21cmFAST" +Changelog="https://github.com/21cmFAST/21cmFAST" [tool.black] include = '\.pyi?$' @@ -18,3 +103,10 @@ exclude = ''' | dist )/ ''' + +[project.scripts] +21cmfast = "py21cmfast.cli:main" + +[build-system] +build-backend = 'mesonpy' +requires = ['meson-python'] diff --git a/setup.py b/setup.py deleted file mode 100644 index b32cf9ce3..000000000 --- a/setup.py +++ /dev/null @@ -1,101 +0,0 @@ -#!/usr/bin/env python -"""Setup the package.""" - - -from setuptools import find_packages, setup - -import glob -import io -import os -import re -import shutil -from os.path import dirname, expanduser, join - - -def _read(*names, **kwargs): - return open( - join(dirname(__file__), *names), encoding=kwargs.get("encoding", "utf8") - ).read() - - -pkgdir = os.path.dirname(os.path.abspath(__file__)) - - -# Enable code coverage for C code: we can't use CFLAGS=-coverage in tox.ini, since that -# may mess with compiling dependencies (e.g. numpy). Therefore we set SETUPPY_ -# CFLAGS=-coverage in tox.ini and copy it to CFLAGS here (after deps have been safely installed). -if "TOXENV" in os.environ and "SETUPPY_CFLAGS" in os.environ: - os.environ["CFLAGS"] = os.environ["SETUPPY_CFLAGS"] - -test_req = [ - "pre-commit", - "pytest>=5.0", - "pytest-cov", - "tox", - "pytest-remotedata>=0.3.2", - "powerbox", - "pytest-plt", - "questionary", -] - -doc_req = ["nbsphinx", "numpydoc", "sphinx >= 1.3", "sphinx-rtd-theme"] - -setup( - name="21cmFAST", - license="MIT license", - description="A semi-numerical cosmological simulation code for the 21cm signal", - long_description="%s\n%s" - % ( - re.compile("^.. start-badges.*^.. end-badges", re.M | re.S).sub( - "", _read("README.rst") - ), - re.sub(":[a-z]+:`~?(.*?)`", r"``\1``", _read("CHANGELOG.rst")), - ), - long_description_content_type="text/x-rst", - author="The 21cmFAST coredev team", - author_email="21cmfast.coredev@gmail.com", - url="https://github.com/21cmFAST/21cmFAST", - packages=find_packages("src"), - package_dir={"": "src"}, - include_package_data=True, - python_requires=">=3.10", - zip_safe=False, - classifiers=[ - # complete classifier list: http://pypi.python.org/pypi?%3Aaction=list_classifiers - "Development Status :: 5 - Production/Stable", - "Intended Audience :: Developers", - "License :: OSI Approved :: MIT License", - "Operating System :: Unix", - "Operating System :: POSIX", - "Programming Language :: Python", - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "Programming Language :: Python :: Implementation :: CPython", - ], - keywords=["Epoch of Reionization", "Cosmology"], - install_requires=[ - "click", - "numpy<2", - "pyyaml", - "cffi>=1.0", - "scipy", - "astropy>=2.0", - "h5py>=2.8.0", - "cached_property", - "matplotlib", - "bidict", - "cosmotile>=0.2.0", - "attrs", - ], - extras_require={"tests": test_req, "docs": doc_req, "dev": test_req + doc_req}, - setup_requires=["cffi>=1.0", "setuptools_scm"], - entry_points={"console_scripts": ["21cmfast = py21cmfast.cli:main"]}, - cffi_modules=[f"{pkgdir}/build_cffi.py:ffi"], - use_scm_version={ - "write_to": "src/py21cmfast/_version.py", - "parentdir_prefix_version": "21cmFAST-", - "fallback_version": "0.0.0", - }, -) diff --git a/src/meson.build b/src/meson.build new file mode 100644 index 000000000..9b87673d7 --- /dev/null +++ b/src/meson.build @@ -0,0 +1 @@ +subdir('py21cmfast') diff --git a/src/py21cmfast/drivers/meson.build b/src/py21cmfast/drivers/meson.build new file mode 100644 index 000000000..54dc5bbc7 --- /dev/null +++ b/src/py21cmfast/drivers/meson.build @@ -0,0 +1,12 @@ +source_files = [ + '__init__.py', + 'coeval.py', + 'lightcone.py', + 'param_config.py', + 'single_field.py', +] + +py3.install_sources( + source_files, + subdir: 'py21cmfast' +) diff --git a/src/py21cmfast/meson.build b/src/py21cmfast/meson.build new file mode 100644 index 000000000..b03231393 --- /dev/null +++ b/src/py21cmfast/meson.build @@ -0,0 +1,20 @@ +source_files = [ + '__init__.py', + '_cfg.py', + '_logging.py', + 'cache_tools.py', + 'cli.py', + 'lightcones.py', + 'plotting.py', + 'utils.py', + 'yaml.py', +] + +py3.install_sources( + source_files, + subdir: 'py21cmfast' +) + +subdir('src') +subdir('drivers') +subdir('wrapper') diff --git a/src/py21cmfast/src/meson.build b/src/py21cmfast/src/meson.build new file mode 100644 index 000000000..44c182b52 --- /dev/null +++ b/src/py21cmfast/src/meson.build @@ -0,0 +1,40 @@ +source_files = [ + 'BrightnessTemperatureBox.c', + 'HaloBox.c', + 'HaloField.c', + 'InitialConditions.c', + 'InputParameters.c', + 'IonisationBox.c', + 'LuminosityFunction.c', + 'PerturbField.c', + 'PerturbHaloField.c', + 'SpinTemperatureBox.c', + 'Stochasticity.c', + 'bubble_helper_progs.c', + 'cosmology.c', + 'debugging.c', + 'dft.c', + 'elec_interp.c', + 'filtering.c', + 'heating_helper_progs.c', + 'hmf.c', + 'interp_tables.c', + 'interpolation.c', + 'photoncons.c', + 'recombinations.c', + 'subcell_rsds.c', + 'thermochem.c', + 'MapMass_cpu.c', + 'rng.c', +] + +omp = dependency('openmp') +gsl = dependency('gsl') + +# fftw = dependency('fftw3f_threads') +cc = meson.get_compiler ('c') +search_paths = [ '/usr/lib', '/usr/local/lib', '/opt/homebrew/lib' ] +fftw = cc.find_library ('fftw3f', required: true, dirs: search_paths) +fftw_threads = cc.find_library ('fftw3f_threads', required: true, dirs: search_paths) + +library('21cmFAST', source_files, dependencies: [omp,gsl,fftw,fftw_threads]) diff --git a/src/py21cmfast/wrapper/meson.build b/src/py21cmfast/wrapper/meson.build new file mode 100644 index 000000000..bf9b24be3 --- /dev/null +++ b/src/py21cmfast/wrapper/meson.build @@ -0,0 +1,17 @@ +source_files = [ + '__init__.py', + '_utils.py', + 'arraystate.py', + 'cfuncs.py', + 'exceptions.py', + 'globals.py', + 'inputs.py', + 'outputs.py', + 'photoncons.py', + 'structs.py', +] + +py3.install_sources( + source_files, + subdir: 'py21cmfast' +) From 896a62f2f8fc66628d1b1f3c591cd77c1a776303 Mon Sep 17 00:00:00 2001 From: Gregory Poole Date: Mon, 17 Feb 2025 11:24:15 +1100 Subject: [PATCH 095/145] meson build for CPU mostly working. Import and tests fail due to broken wrapper. --- meson.build | 2 +- pyproject.toml | 1 + src/py21cmfast/drivers/meson.build | 12 ------------ src/py21cmfast/meson.build | 17 ++++++++++++++--- src/py21cmfast/src/meson.build | 10 +++++++++- src/py21cmfast/wrapper/meson.build | 17 ----------------- 6 files changed, 25 insertions(+), 34 deletions(-) delete mode 100644 src/py21cmfast/drivers/meson.build delete mode 100644 src/py21cmfast/wrapper/meson.build diff --git a/meson.build b/meson.build index 09926b38d..18a470e85 100644 --- a/meson.build +++ b/meson.build @@ -2,6 +2,6 @@ project('21cmFAST', 'c', version : run_command('cat', 'VERSION').stdout().strip(), ) -py3 = import('python').find_installation(pure: false) +py = import('python').find_installation(pure: false) subdir('src') diff --git a/pyproject.toml b/pyproject.toml index bdee90d25..90764e373 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,6 +42,7 @@ dependencies=[ "bidict", "cosmotile>=0.2.0", "attrs", + "ninja", ] # [tool.setuptools.packages.find] diff --git a/src/py21cmfast/drivers/meson.build b/src/py21cmfast/drivers/meson.build deleted file mode 100644 index 54dc5bbc7..000000000 --- a/src/py21cmfast/drivers/meson.build +++ /dev/null @@ -1,12 +0,0 @@ -source_files = [ - '__init__.py', - 'coeval.py', - 'lightcone.py', - 'param_config.py', - 'single_field.py', -] - -py3.install_sources( - source_files, - subdir: 'py21cmfast' -) diff --git a/src/py21cmfast/meson.build b/src/py21cmfast/meson.build index b03231393..7ae5cad23 100644 --- a/src/py21cmfast/meson.build +++ b/src/py21cmfast/meson.build @@ -10,11 +10,22 @@ source_files = [ 'yaml.py', ] -py3.install_sources( +py.install_sources( source_files, subdir: 'py21cmfast' ) +pure_subdirs = [ + 'drivers', + 'wrapper', + '_data', +] + +# Install the Python code +install_root = py.get_install_dir() +foreach subdir: pure_subdirs + install_subdir(subdir, install_dir: install_root / 'py21cmfast') +endforeach + +# Build C-extension subdir('src') -subdir('drivers') -subdir('wrapper') diff --git a/src/py21cmfast/src/meson.build b/src/py21cmfast/src/meson.build index 44c182b52..0cd508783 100644 --- a/src/py21cmfast/src/meson.build +++ b/src/py21cmfast/src/meson.build @@ -37,4 +37,12 @@ search_paths = [ '/usr/lib', '/usr/local/lib', '/opt/homebrew/lib' ] fftw = cc.find_library ('fftw3f', required: true, dirs: search_paths) fftw_threads = cc.find_library ('fftw3f_threads', required: true, dirs: search_paths) -library('21cmFAST', source_files, dependencies: [omp,gsl,fftw,fftw_threads]) +deps = [omp,gsl,fftw,fftw_threads] +target_dir = 'py21cmfast/c_21cmfast' +py.extension_module( + source_files, + dependencies: deps, + install: true, + subdir: target_dir +) +# library('21cmFAST', source_files, dependencies: [omp,gsl,fftw,fftw_threads]) diff --git a/src/py21cmfast/wrapper/meson.build b/src/py21cmfast/wrapper/meson.build deleted file mode 100644 index bf9b24be3..000000000 --- a/src/py21cmfast/wrapper/meson.build +++ /dev/null @@ -1,17 +0,0 @@ -source_files = [ - '__init__.py', - '_utils.py', - 'arraystate.py', - 'cfuncs.py', - 'exceptions.py', - 'globals.py', - 'inputs.py', - 'outputs.py', - 'photoncons.py', - 'structs.py', -] - -py3.install_sources( - source_files, - subdir: 'py21cmfast' -) From 8cce0b0775d0fd7dd8362adcd6925d6f1e20ab01 Mon Sep 17 00:00:00 2001 From: Gregory Poole Date: Mon, 24 Feb 2025 20:54:26 +1100 Subject: [PATCH 096/145] Add nanobind as a dependency. --- meson.build | 1 + pyproject.toml | 2 +- src/py21cmfast/src/meson.build | 4 +++- subprojects/.gitignore | 3 +++ subprojects/nanobind.wrap | 13 +++++++++++++ subprojects/robin-map.wrap | 13 +++++++++++++ 6 files changed, 34 insertions(+), 2 deletions(-) create mode 100644 subprojects/.gitignore create mode 100644 subprojects/nanobind.wrap create mode 100644 subprojects/robin-map.wrap diff --git a/meson.build b/meson.build index 18a470e85..9f9383124 100644 --- a/meson.build +++ b/meson.build @@ -1,5 +1,6 @@ project('21cmFAST', 'c', version : run_command('cat', 'VERSION').stdout().strip(), + default_options : ['cpp_std=c++17'], ) py = import('python').find_installation(pure: false) diff --git a/pyproject.toml b/pyproject.toml index 90764e373..1045d2ad7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -110,4 +110,4 @@ exclude = ''' [build-system] build-backend = 'mesonpy' -requires = ['meson-python'] +requires = ['meson-python','nanobind>=2.4.0'] diff --git a/src/py21cmfast/src/meson.build b/src/py21cmfast/src/meson.build index 0cd508783..e751d45d2 100644 --- a/src/py21cmfast/src/meson.build +++ b/src/py21cmfast/src/meson.build @@ -30,6 +30,7 @@ source_files = [ omp = dependency('openmp') gsl = dependency('gsl') +nanobind = dependency('nanobind', static: true) # fftw = dependency('fftw3f_threads') cc = meson.get_compiler ('c') @@ -37,7 +38,8 @@ search_paths = [ '/usr/lib', '/usr/local/lib', '/opt/homebrew/lib' ] fftw = cc.find_library ('fftw3f', required: true, dirs: search_paths) fftw_threads = cc.find_library ('fftw3f_threads', required: true, dirs: search_paths) -deps = [omp,gsl,fftw,fftw_threads] + +deps = [omp,gsl,fftw,fftw_threads,nanobind] target_dir = 'py21cmfast/c_21cmfast' py.extension_module( source_files, diff --git a/subprojects/.gitignore b/subprojects/.gitignore new file mode 100644 index 000000000..9a1d1e46e --- /dev/null +++ b/subprojects/.gitignore @@ -0,0 +1,3 @@ +* +!.gitignore +!*.wrap diff --git a/subprojects/nanobind.wrap b/subprojects/nanobind.wrap new file mode 100644 index 000000000..78e2e7c5d --- /dev/null +++ b/subprojects/nanobind.wrap @@ -0,0 +1,13 @@ +[wrap-file] +directory = nanobind-2.4.0 +source_url = https://github.com/wjakob/nanobind/archive/refs/tags/v2.4.0.tar.gz +source_filename = nanobind-2.4.0.tar.gz +source_hash = bb35deaed7efac5029ed1e33880a415638352f757d49207a8e6013fefb6c49a7 +patch_filename = nanobind_2.4.0-2_patch.zip +patch_url = https://wrapdb.mesonbuild.com/v2/nanobind_2.4.0-2/get_patch +patch_hash = cf493bda0b11ea4e8d9dd42229c3bbdd52af88cc4aedac75a1eccb102b86dd4a +source_fallback_url = https://github.com/mesonbuild/wrapdb/releases/download/nanobind_2.4.0-2/nanobind-2.4.0.tar.gz +wrapdb_version = 2.4.0-2 + +[provide] +nanobind = nanobind_dep diff --git a/subprojects/robin-map.wrap b/subprojects/robin-map.wrap new file mode 100644 index 000000000..3da2993bb --- /dev/null +++ b/subprojects/robin-map.wrap @@ -0,0 +1,13 @@ +[wrap-file] +directory = robin-map-1.3.0 +source_url = https://github.com/Tessil/robin-map/archive/refs/tags/v1.3.0.tar.gz +source_filename = robin-map-1.3.0.tar.gz +source_hash = a8424ad3b0affd4c57ed26f0f3d8a29604f0e1f2ef2089f497f614b1c94c7236 +patch_filename = robin-map_1.3.0-1_patch.zip +patch_url = https://wrapdb.mesonbuild.com/v2/robin-map_1.3.0-1/get_patch +patch_hash = 6d090f988541ffb053512607e0942cbd0dbc2a4fa0563e44ff6a37e810b8c739 +source_fallback_url = https://github.com/mesonbuild/wrapdb/releases/download/robin-map_1.3.0-1/robin-map-1.3.0.tar.gz +wrapdb_version = 1.3.0-1 + +[provide] +robin-map = robin_map_dep From fc636c8ebbe22a28db187389635ea9e87ca6e5dd Mon Sep 17 00:00:00 2001 From: Gregory Poole Date: Mon, 24 Feb 2025 21:35:55 +1100 Subject: [PATCH 097/145] Add wrapper code. Build broken due to C/C++ linkage issues. --- meson.build | 2 +- src/py21cmfast/src/_wrapper.cpp | 380 ++++++++++++++++++++++++++++++++ src/py21cmfast/src/meson.build | 1 + 3 files changed, 382 insertions(+), 1 deletion(-) create mode 100644 src/py21cmfast/src/_wrapper.cpp diff --git a/meson.build b/meson.build index 9f9383124..0eb63bc09 100644 --- a/meson.build +++ b/meson.build @@ -1,4 +1,4 @@ -project('21cmFAST', 'c', +project('21cmFAST', [ 'c', 'cpp' ], version : run_command('cat', 'VERSION').stdout().strip(), default_options : ['cpp_std=c++17'], ) diff --git a/src/py21cmfast/src/_wrapper.cpp b/src/py21cmfast/src/_wrapper.cpp new file mode 100644 index 000000000..6f748f01d --- /dev/null +++ b/src/py21cmfast/src/_wrapper.cpp @@ -0,0 +1,380 @@ +#include +#include +// #include + +namespace nb = nanobind; + +extern "C" { +#include "InputParameters.h" +#include "OutputStructs.h" +#include "21cmFAST.h" +#include "BrightnessTemperatureBox.h" +#include "Constants.h" +#include "HaloBox.h" +#include "HaloField.h" +#include "InitialConditions.h" +#include "InputParameters.h" +#include "IonisationBox.h" +#include "LuminosityFunction.h" +#include "OutputStructs.h" +#include "PerturbField.h" +#include "PerturbHaloField.h" +#include "SpinTemperatureBox.h" +#include "Stochasticity.h" +#include "bubble_helper_progs.h" +#include "cexcept.h" +#include "cosmology.h" +#include "debugging.h" +#include "dft.h" +#include "elec_interp.h" +#include "exceptions.h" +#include "filtering.h" +#include "heating_helper_progs.h" +#include "hmf.h" +#include "indexing.h" +#include "interp_tables.h" +#include "interpolation.h" +#include "logger.h" +#include "photoncons.h" +#include "recombinations.h" +#include "rng.h" +#include "subcell_rsds.h" +#include "thermochem.h" +} + +NB_MODULE(wrapper_module, m) { + // Bind input parameters + + // Bind CosmoParams + nb::class_(m, "CosmoParams") + .def_readwrite("SIGMA_8", &CosmoParams::SIGMA_8) + .def_readwrite("hlittle", &CosmoParams::hlittle) + .def_readwrite("OMm", &CosmoParams::OMm) + .def_readwrite("OMl", &CosmoParams::OMl) + .def_readwrite("OMb", &CosmoParams::OMb) + .def_readwrite("POWER_INDEX", &CosmoParams::POWER_INDEX); + + // Bind UserParams + nb::class_(m, "UserParams") + .def_readwrite("HII_DIM", &UserParams::HII_DIM) + .def_readwrite("DIM", &UserParams::DIM) + .def_readwrite("BOX_LEN", &UserParams::BOX_LEN) + .def_readwrite("NON_CUBIC_FACTOR", &UserParams::NON_CUBIC_FACTOR) + .def_readwrite("USE_FFTW_WISDOM", &UserParams::USE_FFTW_WISDOM) + .def_readwrite("HMF", &UserParams::HMF) + .def_readwrite("USE_RELATIVE_VELOCITIES", + &UserParams::USE_RELATIVE_VELOCITIES) + .def_readwrite("POWER_SPECTRUM", &UserParams::POWER_SPECTRUM) + .def_readwrite("N_THREADS", &UserParams::N_THREADS) + .def_readwrite("PERTURB_ON_HIGH_RES", &UserParams::PERTURB_ON_HIGH_RES) + .def_readwrite("NO_RNG", &UserParams::NO_RNG) + .def_readwrite("USE_INTERPOLATION_TABLES", + &UserParams::USE_INTERPOLATION_TABLES) + .def_readwrite("INTEGRATION_METHOD_ATOMIC", + &UserParams::INTEGRATION_METHOD_ATOMIC) + .def_readwrite("INTEGRATION_METHOD_MINI", + &UserParams::INTEGRATION_METHOD_MINI) + .def_readwrite("USE_2LPT", &UserParams::USE_2LPT) + .def_readwrite("MINIMIZE_MEMORY", &UserParams::MINIMIZE_MEMORY) + .def_readwrite("KEEP_3D_VELOCITIES", &UserParams::KEEP_3D_VELOCITIES) + .def_readwrite("SAMPLER_MIN_MASS", &UserParams::SAMPLER_MIN_MASS) + .def_readwrite("SAMPLER_BUFFER_FACTOR", + &UserParams::SAMPLER_BUFFER_FACTOR) + .def_readwrite("MAXHALO_FACTOR", &UserParams::MAXHALO_FACTOR) + .def_readwrite("N_COND_INTERP", &UserParams::N_COND_INTERP) + .def_readwrite("N_PROB_INTERP", &UserParams::N_PROB_INTERP) + .def_readwrite("MIN_LOGPROB", &UserParams::MIN_LOGPROB) + .def_readwrite("SAMPLE_METHOD", &UserParams::SAMPLE_METHOD) + .def_readwrite("AVG_BELOW_SAMPLER", &UserParams::AVG_BELOW_SAMPLER) + .def_readwrite("HALOMASS_CORRECTION", &UserParams::HALOMASS_CORRECTION) + .def_readwrite("PARKINSON_G0", &UserParams::PARKINSON_G0) + .def_readwrite("PARKINSON_y1", &UserParams::PARKINSON_y1) + .def_readwrite("PARKINSON_y2", &UserParams::PARKINSON_y2); + + // Bind AstroParams + nb::class_(m, "AstroParams") + .def_readwrite("HII_EFF_FACTOR", &AstroParams::HII_EFF_FACTOR) + .def_readwrite("F_STAR10", &AstroParams::F_STAR10) + .def_readwrite("ALPHA_STAR", &AstroParams::ALPHA_STAR) + .def_readwrite("ALPHA_STAR_MINI", &AstroParams::ALPHA_STAR_MINI) + .def_readwrite("SIGMA_STAR", &AstroParams::SIGMA_STAR) + .def_readwrite("CORR_STAR", &AstroParams::CORR_STAR) + .def_readwrite("UPPER_STELLAR_TURNOVER_MASS", + &AstroParams::UPPER_STELLAR_TURNOVER_MASS) + .def_readwrite("UPPER_STELLAR_TURNOVER_INDEX", + &AstroParams::UPPER_STELLAR_TURNOVER_INDEX) + .def_readwrite("F_STAR7_MINI", &AstroParams::F_STAR7_MINI) + .def_readwrite("t_STAR", &AstroParams::t_STAR) + .def_readwrite("CORR_SFR", &AstroParams::CORR_SFR) + .def_readwrite("SIGMA_SFR_INDEX", &AstroParams::SIGMA_SFR_INDEX) + .def_readwrite("SIGMA_SFR_LIM", &AstroParams::SIGMA_SFR_LIM) + .def_readwrite("L_X", &AstroParams::L_X) + .def_readwrite("L_X_MINI", &AstroParams::L_X_MINI) + .def_readwrite("SIGMA_LX", &AstroParams::SIGMA_LX) + .def_readwrite("CORR_LX", &AstroParams::CORR_LX) + .def_readwrite("F_ESC10", &AstroParams::F_ESC10) + .def_readwrite("ALPHA_ESC", &AstroParams::ALPHA_ESC) + .def_readwrite("F_ESC7_MINI", &AstroParams::F_ESC7_MINI) + .def_readwrite("M_TURN", &AstroParams::M_TURN) + .def_readwrite("R_BUBBLE_MAX", &AstroParams::R_BUBBLE_MAX) + .def_readwrite("ION_Tvir_MIN", &AstroParams::ION_Tvir_MIN); + + // Bind FlagOptions + nb::class_(m, "FlagOptions") + .def_readwrite("USE_HALO_FIELD", &FlagOptions::USE_HALO_FIELD) + .def_readwrite("USE_MINI_HALOS", &FlagOptions::USE_MINI_HALOS) + .def_readwrite("USE_CMB_HEATING", &FlagOptions::USE_CMB_HEATING) + .def_readwrite("USE_LYA_HEATING", &FlagOptions::USE_LYA_HEATING) + .def_readwrite("USE_MASS_DEPENDENT_ZETA", + &FlagOptions::USE_MASS_DEPENDENT_ZETA) + .def_readwrite("SUBCELL_RSD", &FlagOptions::SUBCELL_RSD) + .def_readwrite("APPLY_RSDS", &FlagOptions::APPLY_RSDS) + .def_readwrite("INHOMO_RECO", &FlagOptions::INHOMO_RECO) + .def_readwrite("USE_TS_FLUCT", &FlagOptions::USE_TS_FLUCT) + .def_readwrite("M_MIN_in_Mass", &FlagOptions::M_MIN_in_Mass) + .def_readwrite("FIX_VCB_AVG", &FlagOptions::FIX_VCB_AVG) + .def_readwrite("HALO_STOCHASTICITY", &FlagOptions::HALO_STOCHASTICITY) + .def_readwrite("USE_EXP_FILTER", &FlagOptions::USE_EXP_FILTER) + .def_readwrite("FIXED_HALO_GRIDS", &FlagOptions::FIXED_HALO_GRIDS) + .def_readwrite("CELL_RECOMB", &FlagOptions::CELL_RECOMB) + .def_readwrite("PHOTON_CONS_TYPE", &FlagOptions::PHOTON_CONS_TYPE) + .def_readwrite("USE_UPPER_STELLAR_TURNOVER", + &FlagOptions::USE_UPPER_STELLAR_TURNOVER) + .def_readwrite("HALO_SCALING_RELATIONS_MEDIAN", + &FlagOptions::HALO_SCALING_RELATIONS_MEDIAN); + + nb::class_(m, "GlobalParams") + .def_readwrite("ALPHA_UVB", &GlobalParams::ALPHA_UVB) + .def_readwrite("EVOLVE_DENSITY_LINEARLY", + &GlobalParams::EVOLVE_DENSITY_LINEARLY) + .def_readwrite("SMOOTH_EVOLVED_DENSITY_FIELD", + &GlobalParams::SMOOTH_EVOLVED_DENSITY_FIELD) + .def_readwrite("R_smooth_density", &GlobalParams::R_smooth_density) + .def_readwrite("HII_ROUND_ERR", &GlobalParams::HII_ROUND_ERR) + .def_readwrite("FIND_BUBBLE_ALGORITHM", + &GlobalParams::FIND_BUBBLE_ALGORITHM) + .def_readwrite("N_POISSON", &GlobalParams::N_POISSON) + .def_readwrite("T_USE_VELOCITIES", &GlobalParams::T_USE_VELOCITIES) + .def_readwrite("MAX_DVDR", &GlobalParams::MAX_DVDR) + .def_readwrite("DELTA_R_HII_FACTOR", &GlobalParams::DELTA_R_HII_FACTOR) + .def_readwrite("DELTA_R_FACTOR", &GlobalParams::DELTA_R_FACTOR) + .def_readwrite("HII_FILTER", &GlobalParams::HII_FILTER) + .def_readwrite("INITIAL_REDSHIFT", &GlobalParams::INITIAL_REDSHIFT) + .def_readwrite("R_OVERLAP_FACTOR", &GlobalParams::R_OVERLAP_FACTOR) + .def_readwrite("DELTA_CRIT_MODE", &GlobalParams::DELTA_CRIT_MODE) + .def_readwrite("HALO_FILTER", &GlobalParams::HALO_FILTER) + .def_readwrite("OPTIMIZE", &GlobalParams::OPTIMIZE) + .def_readwrite("OPTIMIZE_MIN_MASS", &GlobalParams::OPTIMIZE_MIN_MASS) + .def_readwrite("CRIT_DENS_TRANSITION", + &GlobalParams::CRIT_DENS_TRANSITION) + .def_readwrite("MIN_DENSITY_LOW_LIMIT", + &GlobalParams::MIN_DENSITY_LOW_LIMIT) + .def_readwrite("RecombPhotonCons", &GlobalParams::RecombPhotonCons) + .def_readwrite("PhotonConsStart", &GlobalParams::PhotonConsStart) + .def_readwrite("PhotonConsEnd", &GlobalParams::PhotonConsEnd) + .def_readwrite("PhotonConsAsymptoteTo", + &GlobalParams::PhotonConsAsymptoteTo) + .def_readwrite("PhotonConsEndCalibz", &GlobalParams::PhotonConsEndCalibz) + .def_readwrite("PhotonConsSmoothing", &GlobalParams::PhotonConsSmoothing) + .def_readwrite("HEAT_FILTER", &GlobalParams::HEAT_FILTER) + .def_readwrite("CLUMPING_FACTOR", &GlobalParams::CLUMPING_FACTOR) + .def_readwrite("Z_HEAT_MAX", &GlobalParams::Z_HEAT_MAX) + .def_readwrite("R_XLy_MAX", &GlobalParams::R_XLy_MAX) + .def_readwrite("NUM_FILTER_STEPS_FOR_Ts", + &GlobalParams::NUM_FILTER_STEPS_FOR_Ts) + .def_readwrite("ZPRIME_STEP_FACTOR", &GlobalParams::ZPRIME_STEP_FACTOR) + .def_readwrite("TK_at_Z_HEAT_MAX", &GlobalParams::TK_at_Z_HEAT_MAX) + .def_readwrite("XION_at_Z_HEAT_MAX", &GlobalParams::XION_at_Z_HEAT_MAX) + .def_readwrite("Pop", &GlobalParams::Pop) + .def_readwrite("Pop2_ion", &GlobalParams::Pop2_ion) + .def_readwrite("Pop3_ion", &GlobalParams::Pop3_ion) + .def_readwrite("NU_X_BAND_MAX", &GlobalParams::NU_X_BAND_MAX) + .def_readwrite("NU_X_MAX", &GlobalParams::NU_X_MAX) + .def_readwrite("NBINS_LF", &GlobalParams::NBINS_LF) + .def_readwrite("P_CUTOFF", &GlobalParams::P_CUTOFF) + .def_readwrite("M_WDM", &GlobalParams::M_WDM) + .def_readwrite("g_x", &GlobalParams::g_x) + .def_readwrite("OMn", &GlobalParams::OMn) + .def_readwrite("OMk", &GlobalParams::OMk) + .def_readwrite("OMr", &GlobalParams::OMr) + .def_readwrite("OMtot", &GlobalParams::OMtot) + .def_readwrite("Y_He", &GlobalParams::Y_He) + .def_readwrite("wl", &GlobalParams::wl) + .def_readwrite("SHETH_b", &GlobalParams::SHETH_b) + .def_readwrite("SHETH_c", &GlobalParams::SHETH_c) + .def_readwrite("Zreion_HeII", &GlobalParams::Zreion_HeII) + .def_readwrite("FILTER", &GlobalParams::FILTER) + .def_readwrite("external_table_path", &GlobalParams::external_table_path) + .def_readwrite("wisdoms_path", &GlobalParams::wisdoms_path) + .def_readwrite("R_BUBBLE_MIN", &GlobalParams::R_BUBBLE_MIN) + .def_readwrite("M_MIN_INTEGRAL", &GlobalParams::M_MIN_INTEGRAL) + .def_readwrite("M_MAX_INTEGRAL", &GlobalParams::M_MAX_INTEGRAL) + .def_readwrite("T_RE", &GlobalParams::T_RE) + .def_readwrite("VAVG", &GlobalParams::VAVG) + .def_readwrite("USE_ADIABATIC_FLUCTUATIONS", + &GlobalParams::USE_ADIABATIC_FLUCTUATIONS); + + // Bind output parameters + nb::class_(m, "InitialConditions") + .def_readwrite("lowres_density", &InitialConditions::lowres_density) + .def_readwrite("lowres_vx", &InitialConditions::lowres_vx) + .def_readwrite("lowres_vy", &InitialConditions::lowres_vy) + .def_readwrite("lowres_vz", &InitialConditions::lowres_vz) + .def_readwrite("lowres_vx_2LPT", &InitialConditions::lowres_vx_2LPT) + .def_readwrite("lowres_vy_2LPT", &InitialConditions::lowres_vy_2LPT) + .def_readwrite("lowres_vz_2LPT", &InitialConditions::lowres_vz_2LPT) + .def_readwrite("hires_density", &InitialConditions::hires_density) + .def_readwrite("hires_vx", &InitialConditions::hires_vx) + .def_readwrite("hires_vy", &InitialConditions::hires_vy) + .def_readwrite("hires_vz", &InitialConditions::hires_vz) + .def_readwrite("hires_vx_2LPT", &InitialConditions::hires_vx_2LPT) + .def_readwrite("hires_vy_2LPT", &InitialConditions::hires_vy_2LPT) + .def_readwrite("hires_vz_2LPT", &InitialConditions::hires_vz_2LPT) + .def_readwrite("lowres_vcb", &InitialConditions::lowres_vcb); + + nb::class_(m, "PerturbedField") + .def_readwrite("density", &PerturbedField::density) + .def_readwrite("velocity_x", &PerturbedField::velocity_x) + .def_readwrite("velocity_y", &PerturbedField::velocity_y) + .def_readwrite("velocity_z", &PerturbedField::velocity_z); + + nb::class_(m, "HaloField") + .def_readwrite("n_halos", &HaloField::n_halos) + .def_readwrite("buffer_size", &HaloField::buffer_size) + .def_readwrite("halo_masses", &HaloField::halo_masses) + .def_readwrite("halo_coords", &HaloField::halo_coords) + .def_readwrite("star_rng", &HaloField::star_rng) + .def_readwrite("sfr_rng", &HaloField::sfr_rng) + .def_readwrite("xray_rng", &HaloField::xray_rng); + + nb::class_(m, "PerturbHaloField") + .def_readwrite("n_halos", &PerturbHaloField::n_halos) + .def_readwrite("buffer_size", &PerturbHaloField::buffer_size) + .def_readwrite("halo_masses", &PerturbHaloField::halo_masses) + .def_readwrite("halo_coords", &PerturbHaloField::halo_coords) + .def_readwrite("star_rng", &PerturbHaloField::star_rng) + .def_readwrite("sfr_rng", &PerturbHaloField::sfr_rng) + .def_readwrite("xray_rng", &PerturbHaloField::xray_rng); + + nb::class_(m, "HaloBox") + .def_readwrite("halo_mass", &HaloBox::halo_mass) + .def_readwrite("halo_stars", &HaloBox::halo_stars) + .def_readwrite("halo_stars_mini", &HaloBox::halo_stars_mini) + .def_readwrite("count", &HaloBox::count) + .def_readwrite("n_ion", &HaloBox::n_ion) + .def_readwrite("halo_sfr", &HaloBox::halo_sfr) + .def_readwrite("halo_xray", &HaloBox::halo_xray) + .def_readwrite("halo_sfr_mini", &HaloBox::halo_sfr_mini) + .def_readwrite("whalo_sfr", &HaloBox::whalo_sfr) + .def_readwrite("log10_Mcrit_ACG_ave", &HaloBox::log10_Mcrit_ACG_ave) + .def_readwrite("log10_Mcrit_MCG_ave", &HaloBox::log10_Mcrit_MCG_ave); + + nb::class_(m, "XraySourceBox") + .def_readwrite("filtered_sfr", &XraySourceBox::filtered_sfr) + .def_readwrite("filtered_xray", &XraySourceBox::filtered_xray) + .def_readwrite("filtered_sfr_mini", &XraySourceBox::filtered_sfr_mini) + .def_readwrite("mean_log10_Mcrit_LW", &XraySourceBox::mean_log10_Mcrit_LW) + .def_readwrite("mean_sfr", &XraySourceBox::mean_sfr) + .def_readwrite("mean_sfr_mini", &XraySourceBox::mean_sfr_mini); + + nb::class_(m, "TsBox") + .def_readwrite("Ts_box", &TsBox::Ts_box) + .def_readwrite("x_e_box", &TsBox::x_e_box) + .def_readwrite("Tk_box", &TsBox::Tk_box) + .def_readwrite("J_21_LW_box", &TsBox::J_21_LW_box); + + nb::class_(m, "IonizedBox") + .def_readwrite("mean_f_coll", &IonizedBox::mean_f_coll) + .def_readwrite("mean_f_coll_MINI", &IonizedBox::mean_f_coll_MINI) + .def_readwrite("log10_Mturnover_ave", &IonizedBox::log10_Mturnover_ave) + .def_readwrite("log10_Mturnover_MINI_ave", + &IonizedBox::log10_Mturnover_MINI_ave) + .def_readwrite("xH_box", &IonizedBox::xH_box) + .def_readwrite("Gamma12_box", &IonizedBox::Gamma12_box) + .def_readwrite("MFP_box", &IonizedBox::MFP_box) + .def_readwrite("z_re_box", &IonizedBox::z_re_box) + .def_readwrite("dNrec_box", &IonizedBox::dNrec_box) + .def_readwrite("temp_kinetic_all_gas", &IonizedBox::temp_kinetic_all_gas) + .def_readwrite("Fcoll", &IonizedBox::Fcoll) + .def_readwrite("Fcoll_MINI", &IonizedBox::Fcoll_MINI); + + nb::class_(m, "BrightnessTemp ") + .def_readwrite("brightness_temp", &BrightnessTemp::brightness_temp); + + // Bind functions + m.def("ComputeInitialConditions", &ComputeInitialConditions); + m.def("ComputePerturbField", &ComputePerturbField); + m.def("ComputeHaloField", &ComputeHaloField); + m.def("ComputePerturbHaloField", &ComputePerturbHaloField); + m.def("ComputeTsBox", &ComputeTsBox); + m.def("ComputeIonizedBox", &ComputeIonizedBox); + m.def("ComputeBrightnessTemp", &ComputeBrightnessTemp); + m.def("ComputeHaloBox", &ComputeHaloBox); + m.def("UpdateXraySourceBox", &UpdateXraySourceBox); + m.def("InitialisePhotonCons", &InitialisePhotonCons); + m.def("PhotonCons_Calibration", &PhotonCons_Calibration); + m.def("ComputeZstart_PhotonCons", &ComputeZstart_PhotonCons); + m.def("adjust_redshifts_for_photoncons", &adjust_redshifts_for_photoncons); + m.def("determine_deltaz_for_photoncons", &determine_deltaz_for_photoncons); + m.def("ObtainPhotonConsData", &ObtainPhotonConsData); + m.def("FreePhotonConsMemory", &FreePhotonConsMemory); + m.def( + "photon_cons_allocated", []() -> bool { return photon_cons_allocated; }, + "Returns whether photon conservation memory is allocated"); + m.def("set_alphacons_params", &set_alphacons_params); + m.def("ComputeLF", &ComputeLF); + m.def("ComputeTau", &ComputeTau); + m.def("init_ps", &init_ps); + m.def("init_heat", &init_heat); + m.def("CreateFFTWWisdoms", &CreateFFTWWisdoms); + m.def("Broadcast_struct_global_noastro", &Broadcast_struct_global_noastro); + m.def("Broadcast_struct_global_all", &Broadcast_struct_global_all); + m.def("initialiseSigmaMInterpTable", &initialiseSigmaMInterpTable); + m.def("initialise_SFRD_spline", &initialise_SFRD_spline); + m.def("initialise_Nion_Ts_spline", &initialise_Nion_Ts_spline); + m.def("initialise_FgtrM_delta_table", &initialise_FgtrM_delta_table); + m.def("init_FcollTable", &init_FcollTable); + m.def("initialise_Nion_Conditional_spline", + &initialise_Nion_Conditional_spline); + m.def("initialise_SFRD_Conditional_table", + &initialise_SFRD_Conditional_table); + m.def("initialise_dNdM_tables", &initialise_dNdM_tables); + m.def("initialise_dNdM_inverse_table", &initialise_dNdM_inverse_table); + m.def("EvaluateNionTs", &EvaluateNionTs); + m.def("EvaluateNionTs_MINI", &EvaluateNionTs_MINI); + m.def("EvaluateSFRD", &EvaluateSFRD); + m.def("EvaluateSFRD_MINI", &EvaluateSFRD_MINI); + m.def("EvaluateSFRD_Conditional", &EvaluateSFRD_Conditional); + m.def("EvaluateSFRD_Conditional_MINI", &EvaluateSFRD_Conditional_MINI); + m.def("EvaluateNion_Conditional", &EvaluateNion_Conditional); + m.def("EvaluateNion_Conditional_MINI", &EvaluateNion_Conditional_MINI); + m.def("EvaluateNhalo", &EvaluateNhalo); + m.def("EvaluateMcoll", &EvaluateMcoll); + m.def("EvaluateNhaloInv", &EvaluateNhaloInv); + m.def("EvaluateFcoll_delta", &EvaluateFcoll_delta); + m.def("EvaluatedFcolldz", &EvaluatedFcolldz); + m.def("EvaluateSigma", &EvaluateSigma); + m.def("EvaluatedSigmasqdm", &EvaluatedSigmasqdm); + m.def("initialise_GL", &initialise_GL); + m.def("Nhalo_Conditional", &Nhalo_Conditional); + m.def("Mcoll_Conditional", &Mcoll_Conditional); + m.def("Nion_ConditionalM", &Nion_ConditionalM); + m.def("Nion_ConditionalM_MINI", &Nion_ConditionalM_MINI); + m.def("Nion_General", &Nion_General); + m.def("Nion_General_MINI", &Nion_General_MINI); + m.def("Fcoll_General", &Fcoll_General); + m.def("unconditional_mf", &unconditional_mf); + m.def("conditional_mf", &conditional_mf); + m.def("SomethingThatCatches", &SomethingThatCatches); + m.def("FunctionThatCatches", &FunctionThatCatches); + m.def("FunctionThatThrows", &FunctionThatThrows); + m.def("single_test_sample", &single_test_sample); + m.def("test_halo_props", &test_halo_props); + m.def("test_filter", &test_filter); + m.def("dicke", &dicke); + m.def("sigma_z0", &sigma_z0); + m.def("dsigmasqdm_z0", &dsigmasqdm_z0); + m.def("get_delta_crit", &get_delta_crit); + m.def("atomic_cooling_threshold", &atomic_cooling_threshold); + m.def("expected_nhalo", &expected_nhalo); +} diff --git a/src/py21cmfast/src/meson.build b/src/py21cmfast/src/meson.build index e751d45d2..927f45f9c 100644 --- a/src/py21cmfast/src/meson.build +++ b/src/py21cmfast/src/meson.build @@ -26,6 +26,7 @@ source_files = [ 'thermochem.c', 'MapMass_cpu.c', 'rng.c', + '_wrapper.cpp', ] omp = dependency('openmp') From aabe2b13bea2fcc5ec70f80f77f9a1376d86def4 Mon Sep 17 00:00:00 2001 From: Gregory Poole Date: Mon, 24 Feb 2025 22:22:07 +1100 Subject: [PATCH 098/145] Code builds but error with 'importing ffi' killing package import. --- src/py21cmfast/src/_wrapper.cpp | 440 +++++++++++++++----------------- 1 file changed, 204 insertions(+), 236 deletions(-) diff --git a/src/py21cmfast/src/_wrapper.cpp b/src/py21cmfast/src/_wrapper.cpp index 6f748f01d..801ac4f67 100644 --- a/src/py21cmfast/src/_wrapper.cpp +++ b/src/py21cmfast/src/_wrapper.cpp @@ -5,41 +5,9 @@ namespace nb = nanobind; extern "C" { -#include "InputParameters.h" -#include "OutputStructs.h" #include "21cmFAST.h" -#include "BrightnessTemperatureBox.h" #include "Constants.h" -#include "HaloBox.h" -#include "HaloField.h" -#include "InitialConditions.h" -#include "InputParameters.h" -#include "IonisationBox.h" -#include "LuminosityFunction.h" -#include "OutputStructs.h" -#include "PerturbField.h" -#include "PerturbHaloField.h" -#include "SpinTemperatureBox.h" -#include "Stochasticity.h" -#include "bubble_helper_progs.h" -#include "cexcept.h" -#include "cosmology.h" -#include "debugging.h" -#include "dft.h" -#include "elec_interp.h" -#include "exceptions.h" -#include "filtering.h" -#include "heating_helper_progs.h" -#include "hmf.h" #include "indexing.h" -#include "interp_tables.h" -#include "interpolation.h" -#include "logger.h" -#include "photoncons.h" -#include "recombinations.h" -#include "rng.h" -#include "subcell_rsds.h" -#include "thermochem.h" } NB_MODULE(wrapper_module, m) { @@ -47,259 +15,259 @@ NB_MODULE(wrapper_module, m) { // Bind CosmoParams nb::class_(m, "CosmoParams") - .def_readwrite("SIGMA_8", &CosmoParams::SIGMA_8) - .def_readwrite("hlittle", &CosmoParams::hlittle) - .def_readwrite("OMm", &CosmoParams::OMm) - .def_readwrite("OMl", &CosmoParams::OMl) - .def_readwrite("OMb", &CosmoParams::OMb) - .def_readwrite("POWER_INDEX", &CosmoParams::POWER_INDEX); + .def_rw("SIGMA_8", &CosmoParams::SIGMA_8) + .def_rw("hlittle", &CosmoParams::hlittle) + .def_rw("OMm", &CosmoParams::OMm) + .def_rw("OMl", &CosmoParams::OMl) + .def_rw("OMb", &CosmoParams::OMb) + .def_rw("POWER_INDEX", &CosmoParams::POWER_INDEX); // Bind UserParams nb::class_(m, "UserParams") - .def_readwrite("HII_DIM", &UserParams::HII_DIM) - .def_readwrite("DIM", &UserParams::DIM) - .def_readwrite("BOX_LEN", &UserParams::BOX_LEN) - .def_readwrite("NON_CUBIC_FACTOR", &UserParams::NON_CUBIC_FACTOR) - .def_readwrite("USE_FFTW_WISDOM", &UserParams::USE_FFTW_WISDOM) - .def_readwrite("HMF", &UserParams::HMF) - .def_readwrite("USE_RELATIVE_VELOCITIES", + .def_rw("HII_DIM", &UserParams::HII_DIM) + .def_rw("DIM", &UserParams::DIM) + .def_rw("BOX_LEN", &UserParams::BOX_LEN) + .def_rw("NON_CUBIC_FACTOR", &UserParams::NON_CUBIC_FACTOR) + .def_rw("USE_FFTW_WISDOM", &UserParams::USE_FFTW_WISDOM) + .def_rw("HMF", &UserParams::HMF) + .def_rw("USE_RELATIVE_VELOCITIES", &UserParams::USE_RELATIVE_VELOCITIES) - .def_readwrite("POWER_SPECTRUM", &UserParams::POWER_SPECTRUM) - .def_readwrite("N_THREADS", &UserParams::N_THREADS) - .def_readwrite("PERTURB_ON_HIGH_RES", &UserParams::PERTURB_ON_HIGH_RES) - .def_readwrite("NO_RNG", &UserParams::NO_RNG) - .def_readwrite("USE_INTERPOLATION_TABLES", + .def_rw("POWER_SPECTRUM", &UserParams::POWER_SPECTRUM) + .def_rw("N_THREADS", &UserParams::N_THREADS) + .def_rw("PERTURB_ON_HIGH_RES", &UserParams::PERTURB_ON_HIGH_RES) + .def_rw("NO_RNG", &UserParams::NO_RNG) + .def_rw("USE_INTERPOLATION_TABLES", &UserParams::USE_INTERPOLATION_TABLES) - .def_readwrite("INTEGRATION_METHOD_ATOMIC", + .def_rw("INTEGRATION_METHOD_ATOMIC", &UserParams::INTEGRATION_METHOD_ATOMIC) - .def_readwrite("INTEGRATION_METHOD_MINI", + .def_rw("INTEGRATION_METHOD_MINI", &UserParams::INTEGRATION_METHOD_MINI) - .def_readwrite("USE_2LPT", &UserParams::USE_2LPT) - .def_readwrite("MINIMIZE_MEMORY", &UserParams::MINIMIZE_MEMORY) - .def_readwrite("KEEP_3D_VELOCITIES", &UserParams::KEEP_3D_VELOCITIES) - .def_readwrite("SAMPLER_MIN_MASS", &UserParams::SAMPLER_MIN_MASS) - .def_readwrite("SAMPLER_BUFFER_FACTOR", + .def_rw("USE_2LPT", &UserParams::USE_2LPT) + .def_rw("MINIMIZE_MEMORY", &UserParams::MINIMIZE_MEMORY) + .def_rw("KEEP_3D_VELOCITIES", &UserParams::KEEP_3D_VELOCITIES) + .def_rw("SAMPLER_MIN_MASS", &UserParams::SAMPLER_MIN_MASS) + .def_rw("SAMPLER_BUFFER_FACTOR", &UserParams::SAMPLER_BUFFER_FACTOR) - .def_readwrite("MAXHALO_FACTOR", &UserParams::MAXHALO_FACTOR) - .def_readwrite("N_COND_INTERP", &UserParams::N_COND_INTERP) - .def_readwrite("N_PROB_INTERP", &UserParams::N_PROB_INTERP) - .def_readwrite("MIN_LOGPROB", &UserParams::MIN_LOGPROB) - .def_readwrite("SAMPLE_METHOD", &UserParams::SAMPLE_METHOD) - .def_readwrite("AVG_BELOW_SAMPLER", &UserParams::AVG_BELOW_SAMPLER) - .def_readwrite("HALOMASS_CORRECTION", &UserParams::HALOMASS_CORRECTION) - .def_readwrite("PARKINSON_G0", &UserParams::PARKINSON_G0) - .def_readwrite("PARKINSON_y1", &UserParams::PARKINSON_y1) - .def_readwrite("PARKINSON_y2", &UserParams::PARKINSON_y2); + .def_rw("MAXHALO_FACTOR", &UserParams::MAXHALO_FACTOR) + .def_rw("N_COND_INTERP", &UserParams::N_COND_INTERP) + .def_rw("N_PROB_INTERP", &UserParams::N_PROB_INTERP) + .def_rw("MIN_LOGPROB", &UserParams::MIN_LOGPROB) + .def_rw("SAMPLE_METHOD", &UserParams::SAMPLE_METHOD) + .def_rw("AVG_BELOW_SAMPLER", &UserParams::AVG_BELOW_SAMPLER) + .def_rw("HALOMASS_CORRECTION", &UserParams::HALOMASS_CORRECTION) + .def_rw("PARKINSON_G0", &UserParams::PARKINSON_G0) + .def_rw("PARKINSON_y1", &UserParams::PARKINSON_y1) + .def_rw("PARKINSON_y2", &UserParams::PARKINSON_y2); // Bind AstroParams nb::class_(m, "AstroParams") - .def_readwrite("HII_EFF_FACTOR", &AstroParams::HII_EFF_FACTOR) - .def_readwrite("F_STAR10", &AstroParams::F_STAR10) - .def_readwrite("ALPHA_STAR", &AstroParams::ALPHA_STAR) - .def_readwrite("ALPHA_STAR_MINI", &AstroParams::ALPHA_STAR_MINI) - .def_readwrite("SIGMA_STAR", &AstroParams::SIGMA_STAR) - .def_readwrite("CORR_STAR", &AstroParams::CORR_STAR) - .def_readwrite("UPPER_STELLAR_TURNOVER_MASS", + .def_rw("HII_EFF_FACTOR", &AstroParams::HII_EFF_FACTOR) + .def_rw("F_STAR10", &AstroParams::F_STAR10) + .def_rw("ALPHA_STAR", &AstroParams::ALPHA_STAR) + .def_rw("ALPHA_STAR_MINI", &AstroParams::ALPHA_STAR_MINI) + .def_rw("SIGMA_STAR", &AstroParams::SIGMA_STAR) + .def_rw("CORR_STAR", &AstroParams::CORR_STAR) + .def_rw("UPPER_STELLAR_TURNOVER_MASS", &AstroParams::UPPER_STELLAR_TURNOVER_MASS) - .def_readwrite("UPPER_STELLAR_TURNOVER_INDEX", + .def_rw("UPPER_STELLAR_TURNOVER_INDEX", &AstroParams::UPPER_STELLAR_TURNOVER_INDEX) - .def_readwrite("F_STAR7_MINI", &AstroParams::F_STAR7_MINI) - .def_readwrite("t_STAR", &AstroParams::t_STAR) - .def_readwrite("CORR_SFR", &AstroParams::CORR_SFR) - .def_readwrite("SIGMA_SFR_INDEX", &AstroParams::SIGMA_SFR_INDEX) - .def_readwrite("SIGMA_SFR_LIM", &AstroParams::SIGMA_SFR_LIM) - .def_readwrite("L_X", &AstroParams::L_X) - .def_readwrite("L_X_MINI", &AstroParams::L_X_MINI) - .def_readwrite("SIGMA_LX", &AstroParams::SIGMA_LX) - .def_readwrite("CORR_LX", &AstroParams::CORR_LX) - .def_readwrite("F_ESC10", &AstroParams::F_ESC10) - .def_readwrite("ALPHA_ESC", &AstroParams::ALPHA_ESC) - .def_readwrite("F_ESC7_MINI", &AstroParams::F_ESC7_MINI) - .def_readwrite("M_TURN", &AstroParams::M_TURN) - .def_readwrite("R_BUBBLE_MAX", &AstroParams::R_BUBBLE_MAX) - .def_readwrite("ION_Tvir_MIN", &AstroParams::ION_Tvir_MIN); + .def_rw("F_STAR7_MINI", &AstroParams::F_STAR7_MINI) + .def_rw("t_STAR", &AstroParams::t_STAR) + .def_rw("CORR_SFR", &AstroParams::CORR_SFR) + .def_rw("SIGMA_SFR_INDEX", &AstroParams::SIGMA_SFR_INDEX) + .def_rw("SIGMA_SFR_LIM", &AstroParams::SIGMA_SFR_LIM) + .def_rw("L_X", &AstroParams::L_X) + .def_rw("L_X_MINI", &AstroParams::L_X_MINI) + .def_rw("SIGMA_LX", &AstroParams::SIGMA_LX) + .def_rw("CORR_LX", &AstroParams::CORR_LX) + .def_rw("F_ESC10", &AstroParams::F_ESC10) + .def_rw("ALPHA_ESC", &AstroParams::ALPHA_ESC) + .def_rw("F_ESC7_MINI", &AstroParams::F_ESC7_MINI) + .def_rw("M_TURN", &AstroParams::M_TURN) + .def_rw("R_BUBBLE_MAX", &AstroParams::R_BUBBLE_MAX) + .def_rw("ION_Tvir_MIN", &AstroParams::ION_Tvir_MIN); // Bind FlagOptions nb::class_(m, "FlagOptions") - .def_readwrite("USE_HALO_FIELD", &FlagOptions::USE_HALO_FIELD) - .def_readwrite("USE_MINI_HALOS", &FlagOptions::USE_MINI_HALOS) - .def_readwrite("USE_CMB_HEATING", &FlagOptions::USE_CMB_HEATING) - .def_readwrite("USE_LYA_HEATING", &FlagOptions::USE_LYA_HEATING) - .def_readwrite("USE_MASS_DEPENDENT_ZETA", + .def_rw("USE_HALO_FIELD", &FlagOptions::USE_HALO_FIELD) + .def_rw("USE_MINI_HALOS", &FlagOptions::USE_MINI_HALOS) + .def_rw("USE_CMB_HEATING", &FlagOptions::USE_CMB_HEATING) + .def_rw("USE_LYA_HEATING", &FlagOptions::USE_LYA_HEATING) + .def_rw("USE_MASS_DEPENDENT_ZETA", &FlagOptions::USE_MASS_DEPENDENT_ZETA) - .def_readwrite("SUBCELL_RSD", &FlagOptions::SUBCELL_RSD) - .def_readwrite("APPLY_RSDS", &FlagOptions::APPLY_RSDS) - .def_readwrite("INHOMO_RECO", &FlagOptions::INHOMO_RECO) - .def_readwrite("USE_TS_FLUCT", &FlagOptions::USE_TS_FLUCT) - .def_readwrite("M_MIN_in_Mass", &FlagOptions::M_MIN_in_Mass) - .def_readwrite("FIX_VCB_AVG", &FlagOptions::FIX_VCB_AVG) - .def_readwrite("HALO_STOCHASTICITY", &FlagOptions::HALO_STOCHASTICITY) - .def_readwrite("USE_EXP_FILTER", &FlagOptions::USE_EXP_FILTER) - .def_readwrite("FIXED_HALO_GRIDS", &FlagOptions::FIXED_HALO_GRIDS) - .def_readwrite("CELL_RECOMB", &FlagOptions::CELL_RECOMB) - .def_readwrite("PHOTON_CONS_TYPE", &FlagOptions::PHOTON_CONS_TYPE) - .def_readwrite("USE_UPPER_STELLAR_TURNOVER", + .def_rw("SUBCELL_RSD", &FlagOptions::SUBCELL_RSD) + .def_rw("APPLY_RSDS", &FlagOptions::APPLY_RSDS) + .def_rw("INHOMO_RECO", &FlagOptions::INHOMO_RECO) + .def_rw("USE_TS_FLUCT", &FlagOptions::USE_TS_FLUCT) + .def_rw("M_MIN_in_Mass", &FlagOptions::M_MIN_in_Mass) + .def_rw("FIX_VCB_AVG", &FlagOptions::FIX_VCB_AVG) + .def_rw("HALO_STOCHASTICITY", &FlagOptions::HALO_STOCHASTICITY) + .def_rw("USE_EXP_FILTER", &FlagOptions::USE_EXP_FILTER) + .def_rw("FIXED_HALO_GRIDS", &FlagOptions::FIXED_HALO_GRIDS) + .def_rw("CELL_RECOMB", &FlagOptions::CELL_RECOMB) + .def_rw("PHOTON_CONS_TYPE", &FlagOptions::PHOTON_CONS_TYPE) + .def_rw("USE_UPPER_STELLAR_TURNOVER", &FlagOptions::USE_UPPER_STELLAR_TURNOVER) - .def_readwrite("HALO_SCALING_RELATIONS_MEDIAN", + .def_rw("HALO_SCALING_RELATIONS_MEDIAN", &FlagOptions::HALO_SCALING_RELATIONS_MEDIAN); nb::class_(m, "GlobalParams") - .def_readwrite("ALPHA_UVB", &GlobalParams::ALPHA_UVB) - .def_readwrite("EVOLVE_DENSITY_LINEARLY", + .def_rw("ALPHA_UVB", &GlobalParams::ALPHA_UVB) + .def_rw("EVOLVE_DENSITY_LINEARLY", &GlobalParams::EVOLVE_DENSITY_LINEARLY) - .def_readwrite("SMOOTH_EVOLVED_DENSITY_FIELD", + .def_rw("SMOOTH_EVOLVED_DENSITY_FIELD", &GlobalParams::SMOOTH_EVOLVED_DENSITY_FIELD) - .def_readwrite("R_smooth_density", &GlobalParams::R_smooth_density) - .def_readwrite("HII_ROUND_ERR", &GlobalParams::HII_ROUND_ERR) - .def_readwrite("FIND_BUBBLE_ALGORITHM", + .def_rw("R_smooth_density", &GlobalParams::R_smooth_density) + .def_rw("HII_ROUND_ERR", &GlobalParams::HII_ROUND_ERR) + .def_rw("FIND_BUBBLE_ALGORITHM", &GlobalParams::FIND_BUBBLE_ALGORITHM) - .def_readwrite("N_POISSON", &GlobalParams::N_POISSON) - .def_readwrite("T_USE_VELOCITIES", &GlobalParams::T_USE_VELOCITIES) - .def_readwrite("MAX_DVDR", &GlobalParams::MAX_DVDR) - .def_readwrite("DELTA_R_HII_FACTOR", &GlobalParams::DELTA_R_HII_FACTOR) - .def_readwrite("DELTA_R_FACTOR", &GlobalParams::DELTA_R_FACTOR) - .def_readwrite("HII_FILTER", &GlobalParams::HII_FILTER) - .def_readwrite("INITIAL_REDSHIFT", &GlobalParams::INITIAL_REDSHIFT) - .def_readwrite("R_OVERLAP_FACTOR", &GlobalParams::R_OVERLAP_FACTOR) - .def_readwrite("DELTA_CRIT_MODE", &GlobalParams::DELTA_CRIT_MODE) - .def_readwrite("HALO_FILTER", &GlobalParams::HALO_FILTER) - .def_readwrite("OPTIMIZE", &GlobalParams::OPTIMIZE) - .def_readwrite("OPTIMIZE_MIN_MASS", &GlobalParams::OPTIMIZE_MIN_MASS) - .def_readwrite("CRIT_DENS_TRANSITION", + .def_rw("N_POISSON", &GlobalParams::N_POISSON) + .def_rw("T_USE_VELOCITIES", &GlobalParams::T_USE_VELOCITIES) + .def_rw("MAX_DVDR", &GlobalParams::MAX_DVDR) + .def_rw("DELTA_R_HII_FACTOR", &GlobalParams::DELTA_R_HII_FACTOR) + .def_rw("DELTA_R_FACTOR", &GlobalParams::DELTA_R_FACTOR) + .def_rw("HII_FILTER", &GlobalParams::HII_FILTER) + .def_rw("INITIAL_REDSHIFT", &GlobalParams::INITIAL_REDSHIFT) + .def_rw("R_OVERLAP_FACTOR", &GlobalParams::R_OVERLAP_FACTOR) + .def_rw("DELTA_CRIT_MODE", &GlobalParams::DELTA_CRIT_MODE) + .def_rw("HALO_FILTER", &GlobalParams::HALO_FILTER) + .def_rw("OPTIMIZE", &GlobalParams::OPTIMIZE) + .def_rw("OPTIMIZE_MIN_MASS", &GlobalParams::OPTIMIZE_MIN_MASS) + .def_rw("CRIT_DENS_TRANSITION", &GlobalParams::CRIT_DENS_TRANSITION) - .def_readwrite("MIN_DENSITY_LOW_LIMIT", + .def_rw("MIN_DENSITY_LOW_LIMIT", &GlobalParams::MIN_DENSITY_LOW_LIMIT) - .def_readwrite("RecombPhotonCons", &GlobalParams::RecombPhotonCons) - .def_readwrite("PhotonConsStart", &GlobalParams::PhotonConsStart) - .def_readwrite("PhotonConsEnd", &GlobalParams::PhotonConsEnd) - .def_readwrite("PhotonConsAsymptoteTo", + .def_rw("RecombPhotonCons", &GlobalParams::RecombPhotonCons) + .def_rw("PhotonConsStart", &GlobalParams::PhotonConsStart) + .def_rw("PhotonConsEnd", &GlobalParams::PhotonConsEnd) + .def_rw("PhotonConsAsymptoteTo", &GlobalParams::PhotonConsAsymptoteTo) - .def_readwrite("PhotonConsEndCalibz", &GlobalParams::PhotonConsEndCalibz) - .def_readwrite("PhotonConsSmoothing", &GlobalParams::PhotonConsSmoothing) - .def_readwrite("HEAT_FILTER", &GlobalParams::HEAT_FILTER) - .def_readwrite("CLUMPING_FACTOR", &GlobalParams::CLUMPING_FACTOR) - .def_readwrite("Z_HEAT_MAX", &GlobalParams::Z_HEAT_MAX) - .def_readwrite("R_XLy_MAX", &GlobalParams::R_XLy_MAX) - .def_readwrite("NUM_FILTER_STEPS_FOR_Ts", + .def_rw("PhotonConsEndCalibz", &GlobalParams::PhotonConsEndCalibz) + .def_rw("PhotonConsSmoothing", &GlobalParams::PhotonConsSmoothing) + .def_rw("HEAT_FILTER", &GlobalParams::HEAT_FILTER) + .def_rw("CLUMPING_FACTOR", &GlobalParams::CLUMPING_FACTOR) + .def_rw("Z_HEAT_MAX", &GlobalParams::Z_HEAT_MAX) + .def_rw("R_XLy_MAX", &GlobalParams::R_XLy_MAX) + .def_rw("NUM_FILTER_STEPS_FOR_Ts", &GlobalParams::NUM_FILTER_STEPS_FOR_Ts) - .def_readwrite("ZPRIME_STEP_FACTOR", &GlobalParams::ZPRIME_STEP_FACTOR) - .def_readwrite("TK_at_Z_HEAT_MAX", &GlobalParams::TK_at_Z_HEAT_MAX) - .def_readwrite("XION_at_Z_HEAT_MAX", &GlobalParams::XION_at_Z_HEAT_MAX) - .def_readwrite("Pop", &GlobalParams::Pop) - .def_readwrite("Pop2_ion", &GlobalParams::Pop2_ion) - .def_readwrite("Pop3_ion", &GlobalParams::Pop3_ion) - .def_readwrite("NU_X_BAND_MAX", &GlobalParams::NU_X_BAND_MAX) - .def_readwrite("NU_X_MAX", &GlobalParams::NU_X_MAX) - .def_readwrite("NBINS_LF", &GlobalParams::NBINS_LF) - .def_readwrite("P_CUTOFF", &GlobalParams::P_CUTOFF) - .def_readwrite("M_WDM", &GlobalParams::M_WDM) - .def_readwrite("g_x", &GlobalParams::g_x) - .def_readwrite("OMn", &GlobalParams::OMn) - .def_readwrite("OMk", &GlobalParams::OMk) - .def_readwrite("OMr", &GlobalParams::OMr) - .def_readwrite("OMtot", &GlobalParams::OMtot) - .def_readwrite("Y_He", &GlobalParams::Y_He) - .def_readwrite("wl", &GlobalParams::wl) - .def_readwrite("SHETH_b", &GlobalParams::SHETH_b) - .def_readwrite("SHETH_c", &GlobalParams::SHETH_c) - .def_readwrite("Zreion_HeII", &GlobalParams::Zreion_HeII) - .def_readwrite("FILTER", &GlobalParams::FILTER) - .def_readwrite("external_table_path", &GlobalParams::external_table_path) - .def_readwrite("wisdoms_path", &GlobalParams::wisdoms_path) - .def_readwrite("R_BUBBLE_MIN", &GlobalParams::R_BUBBLE_MIN) - .def_readwrite("M_MIN_INTEGRAL", &GlobalParams::M_MIN_INTEGRAL) - .def_readwrite("M_MAX_INTEGRAL", &GlobalParams::M_MAX_INTEGRAL) - .def_readwrite("T_RE", &GlobalParams::T_RE) - .def_readwrite("VAVG", &GlobalParams::VAVG) - .def_readwrite("USE_ADIABATIC_FLUCTUATIONS", + .def_rw("ZPRIME_STEP_FACTOR", &GlobalParams::ZPRIME_STEP_FACTOR) + .def_rw("TK_at_Z_HEAT_MAX", &GlobalParams::TK_at_Z_HEAT_MAX) + .def_rw("XION_at_Z_HEAT_MAX", &GlobalParams::XION_at_Z_HEAT_MAX) + .def_rw("Pop", &GlobalParams::Pop) + .def_rw("Pop2_ion", &GlobalParams::Pop2_ion) + .def_rw("Pop3_ion", &GlobalParams::Pop3_ion) + .def_rw("NU_X_BAND_MAX", &GlobalParams::NU_X_BAND_MAX) + .def_rw("NU_X_MAX", &GlobalParams::NU_X_MAX) + .def_rw("NBINS_LF", &GlobalParams::NBINS_LF) + .def_rw("P_CUTOFF", &GlobalParams::P_CUTOFF) + .def_rw("M_WDM", &GlobalParams::M_WDM) + .def_rw("g_x", &GlobalParams::g_x) + .def_rw("OMn", &GlobalParams::OMn) + .def_rw("OMk", &GlobalParams::OMk) + .def_rw("OMr", &GlobalParams::OMr) + .def_rw("OMtot", &GlobalParams::OMtot) + .def_rw("Y_He", &GlobalParams::Y_He) + .def_rw("wl", &GlobalParams::wl) + .def_rw("SHETH_b", &GlobalParams::SHETH_b) + .def_rw("SHETH_c", &GlobalParams::SHETH_c) + .def_rw("Zreion_HeII", &GlobalParams::Zreion_HeII) + .def_rw("FILTER", &GlobalParams::FILTER) + .def_ro("external_table_path", &GlobalParams::external_table_path) + .def_ro("wisdoms_path", &GlobalParams::wisdoms_path) + .def_rw("R_BUBBLE_MIN", &GlobalParams::R_BUBBLE_MIN) + .def_rw("M_MIN_INTEGRAL", &GlobalParams::M_MIN_INTEGRAL) + .def_rw("M_MAX_INTEGRAL", &GlobalParams::M_MAX_INTEGRAL) + .def_rw("T_RE", &GlobalParams::T_RE) + .def_rw("VAVG", &GlobalParams::VAVG) + .def_rw("USE_ADIABATIC_FLUCTUATIONS", &GlobalParams::USE_ADIABATIC_FLUCTUATIONS); // Bind output parameters nb::class_(m, "InitialConditions") - .def_readwrite("lowres_density", &InitialConditions::lowres_density) - .def_readwrite("lowres_vx", &InitialConditions::lowres_vx) - .def_readwrite("lowres_vy", &InitialConditions::lowres_vy) - .def_readwrite("lowres_vz", &InitialConditions::lowres_vz) - .def_readwrite("lowres_vx_2LPT", &InitialConditions::lowres_vx_2LPT) - .def_readwrite("lowres_vy_2LPT", &InitialConditions::lowres_vy_2LPT) - .def_readwrite("lowres_vz_2LPT", &InitialConditions::lowres_vz_2LPT) - .def_readwrite("hires_density", &InitialConditions::hires_density) - .def_readwrite("hires_vx", &InitialConditions::hires_vx) - .def_readwrite("hires_vy", &InitialConditions::hires_vy) - .def_readwrite("hires_vz", &InitialConditions::hires_vz) - .def_readwrite("hires_vx_2LPT", &InitialConditions::hires_vx_2LPT) - .def_readwrite("hires_vy_2LPT", &InitialConditions::hires_vy_2LPT) - .def_readwrite("hires_vz_2LPT", &InitialConditions::hires_vz_2LPT) - .def_readwrite("lowres_vcb", &InitialConditions::lowres_vcb); + .def_rw("lowres_density", &InitialConditions::lowres_density) + .def_rw("lowres_vx", &InitialConditions::lowres_vx) + .def_rw("lowres_vy", &InitialConditions::lowres_vy) + .def_rw("lowres_vz", &InitialConditions::lowres_vz) + .def_rw("lowres_vx_2LPT", &InitialConditions::lowres_vx_2LPT) + .def_rw("lowres_vy_2LPT", &InitialConditions::lowres_vy_2LPT) + .def_rw("lowres_vz_2LPT", &InitialConditions::lowres_vz_2LPT) + .def_rw("hires_density", &InitialConditions::hires_density) + .def_rw("hires_vx", &InitialConditions::hires_vx) + .def_rw("hires_vy", &InitialConditions::hires_vy) + .def_rw("hires_vz", &InitialConditions::hires_vz) + .def_rw("hires_vx_2LPT", &InitialConditions::hires_vx_2LPT) + .def_rw("hires_vy_2LPT", &InitialConditions::hires_vy_2LPT) + .def_rw("hires_vz_2LPT", &InitialConditions::hires_vz_2LPT) + .def_rw("lowres_vcb", &InitialConditions::lowres_vcb); nb::class_(m, "PerturbedField") - .def_readwrite("density", &PerturbedField::density) - .def_readwrite("velocity_x", &PerturbedField::velocity_x) - .def_readwrite("velocity_y", &PerturbedField::velocity_y) - .def_readwrite("velocity_z", &PerturbedField::velocity_z); + .def_rw("density", &PerturbedField::density) + .def_rw("velocity_x", &PerturbedField::velocity_x) + .def_rw("velocity_y", &PerturbedField::velocity_y) + .def_rw("velocity_z", &PerturbedField::velocity_z); nb::class_(m, "HaloField") - .def_readwrite("n_halos", &HaloField::n_halos) - .def_readwrite("buffer_size", &HaloField::buffer_size) - .def_readwrite("halo_masses", &HaloField::halo_masses) - .def_readwrite("halo_coords", &HaloField::halo_coords) - .def_readwrite("star_rng", &HaloField::star_rng) - .def_readwrite("sfr_rng", &HaloField::sfr_rng) - .def_readwrite("xray_rng", &HaloField::xray_rng); + .def_rw("n_halos", &HaloField::n_halos) + .def_rw("buffer_size", &HaloField::buffer_size) + .def_rw("halo_masses", &HaloField::halo_masses) + .def_rw("halo_coords", &HaloField::halo_coords) + .def_rw("star_rng", &HaloField::star_rng) + .def_rw("sfr_rng", &HaloField::sfr_rng) + .def_rw("xray_rng", &HaloField::xray_rng); nb::class_(m, "PerturbHaloField") - .def_readwrite("n_halos", &PerturbHaloField::n_halos) - .def_readwrite("buffer_size", &PerturbHaloField::buffer_size) - .def_readwrite("halo_masses", &PerturbHaloField::halo_masses) - .def_readwrite("halo_coords", &PerturbHaloField::halo_coords) - .def_readwrite("star_rng", &PerturbHaloField::star_rng) - .def_readwrite("sfr_rng", &PerturbHaloField::sfr_rng) - .def_readwrite("xray_rng", &PerturbHaloField::xray_rng); + .def_rw("n_halos", &PerturbHaloField::n_halos) + .def_rw("buffer_size", &PerturbHaloField::buffer_size) + .def_rw("halo_masses", &PerturbHaloField::halo_masses) + .def_rw("halo_coords", &PerturbHaloField::halo_coords) + .def_rw("star_rng", &PerturbHaloField::star_rng) + .def_rw("sfr_rng", &PerturbHaloField::sfr_rng) + .def_rw("xray_rng", &PerturbHaloField::xray_rng); nb::class_(m, "HaloBox") - .def_readwrite("halo_mass", &HaloBox::halo_mass) - .def_readwrite("halo_stars", &HaloBox::halo_stars) - .def_readwrite("halo_stars_mini", &HaloBox::halo_stars_mini) - .def_readwrite("count", &HaloBox::count) - .def_readwrite("n_ion", &HaloBox::n_ion) - .def_readwrite("halo_sfr", &HaloBox::halo_sfr) - .def_readwrite("halo_xray", &HaloBox::halo_xray) - .def_readwrite("halo_sfr_mini", &HaloBox::halo_sfr_mini) - .def_readwrite("whalo_sfr", &HaloBox::whalo_sfr) - .def_readwrite("log10_Mcrit_ACG_ave", &HaloBox::log10_Mcrit_ACG_ave) - .def_readwrite("log10_Mcrit_MCG_ave", &HaloBox::log10_Mcrit_MCG_ave); + .def_rw("halo_mass", &HaloBox::halo_mass) + .def_rw("halo_stars", &HaloBox::halo_stars) + .def_rw("halo_stars_mini", &HaloBox::halo_stars_mini) + .def_rw("count", &HaloBox::count) + .def_rw("n_ion", &HaloBox::n_ion) + .def_rw("halo_sfr", &HaloBox::halo_sfr) + .def_rw("halo_xray", &HaloBox::halo_xray) + .def_rw("halo_sfr_mini", &HaloBox::halo_sfr_mini) + .def_rw("whalo_sfr", &HaloBox::whalo_sfr) + .def_rw("log10_Mcrit_ACG_ave", &HaloBox::log10_Mcrit_ACG_ave) + .def_rw("log10_Mcrit_MCG_ave", &HaloBox::log10_Mcrit_MCG_ave); nb::class_(m, "XraySourceBox") - .def_readwrite("filtered_sfr", &XraySourceBox::filtered_sfr) - .def_readwrite("filtered_xray", &XraySourceBox::filtered_xray) - .def_readwrite("filtered_sfr_mini", &XraySourceBox::filtered_sfr_mini) - .def_readwrite("mean_log10_Mcrit_LW", &XraySourceBox::mean_log10_Mcrit_LW) - .def_readwrite("mean_sfr", &XraySourceBox::mean_sfr) - .def_readwrite("mean_sfr_mini", &XraySourceBox::mean_sfr_mini); + .def_rw("filtered_sfr", &XraySourceBox::filtered_sfr) + .def_rw("filtered_xray", &XraySourceBox::filtered_xray) + .def_rw("filtered_sfr_mini", &XraySourceBox::filtered_sfr_mini) + .def_rw("mean_log10_Mcrit_LW", &XraySourceBox::mean_log10_Mcrit_LW) + .def_rw("mean_sfr", &XraySourceBox::mean_sfr) + .def_rw("mean_sfr_mini", &XraySourceBox::mean_sfr_mini); nb::class_(m, "TsBox") - .def_readwrite("Ts_box", &TsBox::Ts_box) - .def_readwrite("x_e_box", &TsBox::x_e_box) - .def_readwrite("Tk_box", &TsBox::Tk_box) - .def_readwrite("J_21_LW_box", &TsBox::J_21_LW_box); + .def_rw("Ts_box", &TsBox::Ts_box) + .def_rw("x_e_box", &TsBox::x_e_box) + .def_rw("Tk_box", &TsBox::Tk_box) + .def_rw("J_21_LW_box", &TsBox::J_21_LW_box); nb::class_(m, "IonizedBox") - .def_readwrite("mean_f_coll", &IonizedBox::mean_f_coll) - .def_readwrite("mean_f_coll_MINI", &IonizedBox::mean_f_coll_MINI) - .def_readwrite("log10_Mturnover_ave", &IonizedBox::log10_Mturnover_ave) - .def_readwrite("log10_Mturnover_MINI_ave", + .def_rw("mean_f_coll", &IonizedBox::mean_f_coll) + .def_rw("mean_f_coll_MINI", &IonizedBox::mean_f_coll_MINI) + .def_rw("log10_Mturnover_ave", &IonizedBox::log10_Mturnover_ave) + .def_rw("log10_Mturnover_MINI_ave", &IonizedBox::log10_Mturnover_MINI_ave) - .def_readwrite("xH_box", &IonizedBox::xH_box) - .def_readwrite("Gamma12_box", &IonizedBox::Gamma12_box) - .def_readwrite("MFP_box", &IonizedBox::MFP_box) - .def_readwrite("z_re_box", &IonizedBox::z_re_box) - .def_readwrite("dNrec_box", &IonizedBox::dNrec_box) - .def_readwrite("temp_kinetic_all_gas", &IonizedBox::temp_kinetic_all_gas) - .def_readwrite("Fcoll", &IonizedBox::Fcoll) - .def_readwrite("Fcoll_MINI", &IonizedBox::Fcoll_MINI); + .def_rw("xH_box", &IonizedBox::xH_box) + .def_rw("Gamma12_box", &IonizedBox::Gamma12_box) + .def_rw("MFP_box", &IonizedBox::MFP_box) + .def_rw("z_re_box", &IonizedBox::z_re_box) + .def_rw("dNrec_box", &IonizedBox::dNrec_box) + .def_rw("temp_kinetic_all_gas", &IonizedBox::temp_kinetic_all_gas) + .def_rw("Fcoll", &IonizedBox::Fcoll) + .def_rw("Fcoll_MINI", &IonizedBox::Fcoll_MINI); nb::class_(m, "BrightnessTemp ") - .def_readwrite("brightness_temp", &BrightnessTemp::brightness_temp); + .def_rw("brightness_temp", &BrightnessTemp::brightness_temp); // Bind functions m.def("ComputeInitialConditions", &ComputeInitialConditions); From d4ba9cd19dec20ce0ae6ae92139fb1de28bc282e Mon Sep 17 00:00:00 2001 From: Gregory Poole Date: Wed, 26 Feb 2025 10:52:23 +1100 Subject: [PATCH 099/145] Nanobind wrtapper now being built; imports from c_21cmfast disabled because they still expect cffi structure. --- src/py21cmfast/__init__.py | 83 ++++++++++++++++--------------- src/py21cmfast/src/_wrapper.cpp | 19 ++++++- src/py21cmfast/src/meson.build | 14 ++++-- src/py21cmfast/wrapper/inputs.py | 3 +- src/py21cmfast/wrapper/outputs.py | 11 ++-- 5 files changed, 79 insertions(+), 51 deletions(-) diff --git a/src/py21cmfast/__init__.py b/src/py21cmfast/__init__.py index a6939896e..128e2bbe1 100644 --- a/src/py21cmfast/__init__.py +++ b/src/py21cmfast/__init__.py @@ -18,49 +18,50 @@ from os import mkdir as _mkdir from os import path -from . import cache_tools, lightcones, plotting, wrapper +# from . import cache_tools, lightcones, plotting, wrapper from ._cfg import config from ._logging import configure_logging -from .cache_tools import query_cache -from .drivers.coeval import Coeval, run_coeval -from .drivers.lightcone import LightCone, exhaust_lightcone, run_lightcone -from .drivers.param_config import InputParameters -from .drivers.single_field import ( - brightness_temperature, - compute_halo_grid, - compute_initial_conditions, - compute_ionization_field, - compute_xray_source_field, - determine_halo_list, - perturb_field, - perturb_halo_list, - spin_temperature, -) -from .lightcones import AngularLightconer, RectilinearLightconer -from .utils import get_all_fieldnames -from .wrapper.cfuncs import ( - compute_luminosity_function, - compute_tau, - construct_fftw_wisdoms, -) -from .wrapper.inputs import ( - AstroParams, - CosmoParams, - FlagOptions, - UserParams, - global_params, -) -from .wrapper.outputs import ( - BrightnessTemp, - HaloBox, - HaloField, - InitialConditions, - IonizedBox, - PerturbedField, - PerturbHaloField, - TsBox, - XraySourceBox, -) + +# from .cache_tools import query_cache +# from .drivers.coeval import Coeval, run_coeval +# from .drivers.lightcone import LightCone, exhaust_lightcone, run_lightcone +# from .drivers.param_config import InputParameters +# from .drivers.single_field import ( +# brightness_temperature, +# compute_halo_grid, +# compute_initial_conditions, +# compute_ionization_field, +# compute_xray_source_field, +# determine_halo_list, +# perturb_field, +# perturb_halo_list, +# spin_temperature, +# ) +# from .lightcones import AngularLightconer, RectilinearLightconer +# from .utils import get_all_fieldnames +# from .wrapper.cfuncs import ( +# compute_luminosity_function, +# compute_tau, +# construct_fftw_wisdoms, +# ) +# from .wrapper.inputs import ( +# AstroParams, +# CosmoParams, +# FlagOptions, +# UserParams, +# global_params, +# ) +# from .wrapper.outputs import ( +# BrightnessTemp, +# HaloBox, +# HaloField, +# InitialConditions, +# IonizedBox, +# PerturbedField, +# PerturbHaloField, +# TsBox, +# XraySourceBox, +# ) configure_logging() diff --git a/src/py21cmfast/src/_wrapper.cpp b/src/py21cmfast/src/_wrapper.cpp index 801ac4f67..32bb29849 100644 --- a/src/py21cmfast/src/_wrapper.cpp +++ b/src/py21cmfast/src/_wrapper.cpp @@ -10,11 +10,15 @@ extern "C" { #include "indexing.h" } -NB_MODULE(wrapper_module, m) { +NB_MODULE(c_21cmfast, m) { + + m.doc() = "This is the docstring for the 21cmFAST Python extension."; + // Bind input parameters // Bind CosmoParams nb::class_(m, "CosmoParams") + .def(nb::init<>()) .def_rw("SIGMA_8", &CosmoParams::SIGMA_8) .def_rw("hlittle", &CosmoParams::hlittle) .def_rw("OMm", &CosmoParams::OMm) @@ -24,6 +28,7 @@ NB_MODULE(wrapper_module, m) { // Bind UserParams nb::class_(m, "UserParams") + .def(nb::init<>()) .def_rw("HII_DIM", &UserParams::HII_DIM) .def_rw("DIM", &UserParams::DIM) .def_rw("BOX_LEN", &UserParams::BOX_LEN) @@ -61,6 +66,7 @@ NB_MODULE(wrapper_module, m) { // Bind AstroParams nb::class_(m, "AstroParams") + .def(nb::init<>()) .def_rw("HII_EFF_FACTOR", &AstroParams::HII_EFF_FACTOR) .def_rw("F_STAR10", &AstroParams::F_STAR10) .def_rw("ALPHA_STAR", &AstroParams::ALPHA_STAR) @@ -89,6 +95,7 @@ NB_MODULE(wrapper_module, m) { // Bind FlagOptions nb::class_(m, "FlagOptions") + .def(nb::init<>()) .def_rw("USE_HALO_FIELD", &FlagOptions::USE_HALO_FIELD) .def_rw("USE_MINI_HALOS", &FlagOptions::USE_MINI_HALOS) .def_rw("USE_CMB_HEATING", &FlagOptions::USE_CMB_HEATING) @@ -112,6 +119,7 @@ NB_MODULE(wrapper_module, m) { &FlagOptions::HALO_SCALING_RELATIONS_MEDIAN); nb::class_(m, "GlobalParams") + .def(nb::init<>()) .def_rw("ALPHA_UVB", &GlobalParams::ALPHA_UVB) .def_rw("EVOLVE_DENSITY_LINEARLY", &GlobalParams::EVOLVE_DENSITY_LINEARLY) @@ -184,6 +192,7 @@ NB_MODULE(wrapper_module, m) { // Bind output parameters nb::class_(m, "InitialConditions") + .def(nb::init<>()) .def_rw("lowres_density", &InitialConditions::lowres_density) .def_rw("lowres_vx", &InitialConditions::lowres_vx) .def_rw("lowres_vy", &InitialConditions::lowres_vy) @@ -201,12 +210,14 @@ NB_MODULE(wrapper_module, m) { .def_rw("lowres_vcb", &InitialConditions::lowres_vcb); nb::class_(m, "PerturbedField") + .def(nb::init<>()) .def_rw("density", &PerturbedField::density) .def_rw("velocity_x", &PerturbedField::velocity_x) .def_rw("velocity_y", &PerturbedField::velocity_y) .def_rw("velocity_z", &PerturbedField::velocity_z); nb::class_(m, "HaloField") + .def(nb::init<>()) .def_rw("n_halos", &HaloField::n_halos) .def_rw("buffer_size", &HaloField::buffer_size) .def_rw("halo_masses", &HaloField::halo_masses) @@ -216,6 +227,7 @@ NB_MODULE(wrapper_module, m) { .def_rw("xray_rng", &HaloField::xray_rng); nb::class_(m, "PerturbHaloField") + .def(nb::init<>()) .def_rw("n_halos", &PerturbHaloField::n_halos) .def_rw("buffer_size", &PerturbHaloField::buffer_size) .def_rw("halo_masses", &PerturbHaloField::halo_masses) @@ -225,6 +237,7 @@ NB_MODULE(wrapper_module, m) { .def_rw("xray_rng", &PerturbHaloField::xray_rng); nb::class_(m, "HaloBox") + .def(nb::init<>()) .def_rw("halo_mass", &HaloBox::halo_mass) .def_rw("halo_stars", &HaloBox::halo_stars) .def_rw("halo_stars_mini", &HaloBox::halo_stars_mini) @@ -238,6 +251,7 @@ NB_MODULE(wrapper_module, m) { .def_rw("log10_Mcrit_MCG_ave", &HaloBox::log10_Mcrit_MCG_ave); nb::class_(m, "XraySourceBox") + .def(nb::init<>()) .def_rw("filtered_sfr", &XraySourceBox::filtered_sfr) .def_rw("filtered_xray", &XraySourceBox::filtered_xray) .def_rw("filtered_sfr_mini", &XraySourceBox::filtered_sfr_mini) @@ -246,12 +260,14 @@ NB_MODULE(wrapper_module, m) { .def_rw("mean_sfr_mini", &XraySourceBox::mean_sfr_mini); nb::class_(m, "TsBox") + .def(nb::init<>()) .def_rw("Ts_box", &TsBox::Ts_box) .def_rw("x_e_box", &TsBox::x_e_box) .def_rw("Tk_box", &TsBox::Tk_box) .def_rw("J_21_LW_box", &TsBox::J_21_LW_box); nb::class_(m, "IonizedBox") + .def(nb::init<>()) .def_rw("mean_f_coll", &IonizedBox::mean_f_coll) .def_rw("mean_f_coll_MINI", &IonizedBox::mean_f_coll_MINI) .def_rw("log10_Mturnover_ave", &IonizedBox::log10_Mturnover_ave) @@ -267,6 +283,7 @@ NB_MODULE(wrapper_module, m) { .def_rw("Fcoll_MINI", &IonizedBox::Fcoll_MINI); nb::class_(m, "BrightnessTemp ") + .def(nb::init<>()) .def_rw("brightness_temp", &BrightnessTemp::brightness_temp); // Bind functions diff --git a/src/py21cmfast/src/meson.build b/src/py21cmfast/src/meson.build index 927f45f9c..09700ecda 100644 --- a/src/py21cmfast/src/meson.build +++ b/src/py21cmfast/src/meson.build @@ -1,3 +1,4 @@ +# Define the source files that contribute to the 21cmFast build source_files = [ 'BrightnessTemperatureBox.c', 'HaloBox.c', @@ -26,26 +27,31 @@ source_files = [ 'thermochem.c', 'MapMass_cpu.c', 'rng.c', + '_functionprototypes_wrapper.h', + '_inputparams_wrapper.h', + '_outputstructs_wrapper.h', '_wrapper.cpp', ] +# Define the 21cmFast dependencies omp = dependency('openmp') gsl = dependency('gsl') nanobind = dependency('nanobind', static: true) +# If/when fftw gets added to Meson WrapDB, we'll be able to use this: # fftw = dependency('fftw3f_threads') +# ... but until then, we need to jump through some hoops: cc = meson.get_compiler ('c') search_paths = [ '/usr/lib', '/usr/local/lib', '/opt/homebrew/lib' ] fftw = cc.find_library ('fftw3f', required: true, dirs: search_paths) fftw_threads = cc.find_library ('fftw3f_threads', required: true, dirs: search_paths) - +# Define the Python extension module deps = [omp,gsl,fftw,fftw_threads,nanobind] -target_dir = 'py21cmfast/c_21cmfast' py.extension_module( + 'c_21cmfast', source_files, dependencies: deps, install: true, - subdir: target_dir + subdir:'py21cmfast', ) -# library('21cmFAST', source_files, dependencies: [omp,gsl,fftw,fftw_threads]) diff --git a/src/py21cmfast/wrapper/inputs.py b/src/py21cmfast/wrapper/inputs.py index b4ae5006f..705bf42e0 100644 --- a/src/py21cmfast/wrapper/inputs.py +++ b/src/py21cmfast/wrapper/inputs.py @@ -25,7 +25,8 @@ from .._cfg import config from .._data import DATA_PATH -from ..c_21cmfast import ffi, lib + +# from ..c_21cmfast import ffi, lib from .globals import global_params from .structs import InputStruct diff --git a/src/py21cmfast/wrapper/outputs.py b/src/py21cmfast/wrapper/outputs.py index 18f9e8d90..04c127ff9 100644 --- a/src/py21cmfast/wrapper/outputs.py +++ b/src/py21cmfast/wrapper/outputs.py @@ -19,7 +19,7 @@ from cached_property import cached_property from .. import __version__ -from ..c_21cmfast import ffi, lib +from ..c_21cmfast import lib from ..drivers.param_config import InputParameters from .inputs import AstroParams, CosmoParams, FlagOptions, UserParams, global_params from .structs import OutputStruct as _BaseOutputStruct @@ -263,7 +263,8 @@ def get_required_input_arrays(self, input_box: _BaseOutputStruct) -> list[str]: required += ["hires_vx_2LPT", "hires_vy_2LPT", "hires_vz_2LPT"] else: - required += ["lowres_density", "lowres_vx", "lowres_vy", "lowres_vz"] + required += ["lowres_density", + "lowres_vx", "lowres_vy", "lowres_vz"] if self.user_params.USE_2LPT: required += [ @@ -493,7 +494,8 @@ def get_required_input_arrays(self, input_box: _BaseOutputStruct) -> list[str]: if self.user_params.USE_RELATIVE_VELOCITIES: required += ["lowres_vcb"] else: - raise ValueError(f"{type(input_box)} is not an input required for HaloBox!") + raise ValueError( + f"{type(input_box)} is not an input required for HaloBox!") return required @@ -558,7 +560,8 @@ def get_required_input_arrays(self, input_box: _BaseOutputStruct) -> list[str]: if self.flag_options.USE_MINI_HALOS: required += ["halo_sfr_mini"] else: - raise ValueError(f"{type(input_box)} is not an input required for HaloBox!") + raise ValueError( + f"{type(input_box)} is not an input required for HaloBox!") return required From 8a7e0360378c56e52f325f66e4c7a1e50cc24476 Mon Sep 17 00:00:00 2001 From: Gregory Poole Date: Wed, 26 Feb 2025 14:47:56 +1100 Subject: [PATCH 100/145] Import the C-extension nanobind module extension correctly; all the ffi casts etc still need to be removed. --- devel/TestFindHalos.py | 2 +- docs/tutorials/halosampler.ipynb | 2 +- src/py21cmfast/drivers/coeval.py | 36 +++++++++++++-------- src/py21cmfast/drivers/lightcone.py | 25 +++++++++------ src/py21cmfast/wrapper/_utils.py | 8 ++--- src/py21cmfast/wrapper/cfuncs.py | 18 +++++++---- src/py21cmfast/wrapper/globals.py | 2 +- src/py21cmfast/wrapper/inputs.py | 43 ++++++++++++++++--------- src/py21cmfast/wrapper/outputs.py | 2 +- src/py21cmfast/wrapper/photoncons.py | 23 ++++++++----- src/py21cmfast/wrapper/structs.py | 48 ++++++++++++++++++---------- tests/test_c_interpolation_tables.py | 41 ++++++++++++++++-------- tests/test_exceptions.py | 5 +-- tests/test_filtering.py | 13 +++++--- tests/test_halo_sampler.py | 17 ++++++---- 15 files changed, 180 insertions(+), 105 deletions(-) diff --git a/devel/TestFindHalos.py b/devel/TestFindHalos.py index 564bddefd..72b00c0da 100644 --- a/devel/TestFindHalos.py +++ b/devel/TestFindHalos.py @@ -9,7 +9,7 @@ initial_conditions, perturb_field, ) -from py21cmfast.c_21cmfast import ffi, lib +import py21cmfast.c_21cmfast as lib from py21cmfast.wrapper._utils import StructInstanceWrapper global_params = StructInstanceWrapper(lib.global_params, ffi) diff --git a/docs/tutorials/halosampler.ipynb b/docs/tutorials/halosampler.ipynb index 815007458..c28accbc0 100644 --- a/docs/tutorials/halosampler.ipynb +++ b/docs/tutorials/halosampler.ipynb @@ -46,7 +46,7 @@ "import py21cmfast as p21c\n", "from py21cmfast import plotting\n", "from py21cmfast import cache_tools\n", - "from py21cmfast.c_21cmfast import ffi, lib\n", + "import py21cmfast.c_21cmfast as lib\n", "\n", "from matplotlib import gridspec\n", "from matplotlib.lines import Line2D\n", diff --git a/src/py21cmfast/drivers/coeval.py b/src/py21cmfast/drivers/coeval.py index cf38252fc..828f4f45d 100644 --- a/src/py21cmfast/drivers/coeval.py +++ b/src/py21cmfast/drivers/coeval.py @@ -12,7 +12,7 @@ from .. import __version__ from .._cfg import config -from ..c_21cmfast import lib +from py21cmfast.c_21cmfast import lib from ..wrapper._utils import camel_to_snake from ..wrapper.globals import global_params from ..wrapper.inputs import AstroParams, CosmoParams, FlagOptions, UserParams @@ -114,7 +114,8 @@ def gather( "brightness_temp", ] - clean = kinds if clean and not hasattr(clean, "__len__") else clean or [] + clean = kinds if clean and not hasattr( + clean, "__len__") else clean or [] if any(c not in kinds for c in clean): raise ValueError( "You are trying to clean cached items that you will not be gathering." @@ -134,10 +135,12 @@ def gather( obj = self.get_cached_data(kind, redshift=z, load_data=True) with h5py.File(fname, "a") as fl: cache = ( - fl.create_group("cache") if "cache" not in fl else fl["cache"] + fl.create_group( + "cache") if "cache" not in fl else fl["cache"] ) kind_group = ( - cache.create_group(kind) if kind not in cache else cache[kind] + cache.create_group( + kind) if kind not in cache else cache[kind] ) zstr = f"z{z:.2f}" @@ -170,7 +173,8 @@ def _input_rep(self): def get_unique_filename(self): """Generate a unique hash filename for this instance.""" return self._get_prefix().format( - hash=md5((self._input_rep() + self._particular_rep()).encode()).hexdigest() + hash=md5((self._input_rep() + self._particular_rep() + ).encode()).hexdigest() ) def _write(self, direc=None, fname=None, clobber=False): @@ -304,7 +308,8 @@ def _read_inputs(cls, fname, safe=True): if "photon_nonconservation_data" in fl.keys(): d = fl["photon_nonconservation_data"] - kwargs["photon_nonconservation_data"] = {k: d[k][...] for k in d.keys()} + kwargs["photon_nonconservation_data"] = { + k: d[k][...] for k in d.keys()} return kwargs, glbls @@ -364,7 +369,6 @@ def __init__( photon_nonconservation_data=None, _globals=None, ): - # Check that all the fields have the same input parameters. # TODO: use this instead of all the parameter methods input_struct = InputParameters.from_output_structs( @@ -603,7 +607,8 @@ def run_coeval( if out_redshifts is None and perturbed_field is None: raise ValueError("Either out_redshifts or perturb must be given") - direc, regenerate, hooks = _get_config_options(direc, regenerate, write, hooks) + direc, regenerate, hooks = _get_config_options( + direc, regenerate, write, hooks) singleton = False # Ensure perturb is a list of boxes, not just one. @@ -645,7 +650,8 @@ def run_coeval( if out_redshifts is not None and any( p.redshift != z for p, z in zip(perturbed_field, out_redshifts) ): - raise ValueError("Input redshifts do not match perturb field redshifts") + raise ValueError( + "Input redshifts do not match perturb field redshifts") else: out_redshifts = [p.redshift for p in perturbed_field] @@ -669,7 +675,8 @@ def run_coeval( out_redshifts = out_redshifts.tolist() # Get the list of redshift we need to scroll through. - node_redshifts = _get_required_redshifts_coeval(flag_options, out_redshifts) + node_redshifts = _get_required_redshifts_coeval( + flag_options, out_redshifts) # Get all the perturb boxes early. We need to get the perturb at every # redshift. @@ -677,7 +684,8 @@ def run_coeval( perturb_ = [] for z in node_redshifts: p = ( - sf.perturb_field(redshift=z, initial_conditions=initial_conditions, **iokw) + sf.perturb_field( + redshift=z, initial_conditions=initial_conditions, **iokw) if z not in pz else perturbed_field[pz.index(z)] ) @@ -923,12 +931,14 @@ def _get_coeval_callbacks( "some of the coeval_callback_redshifts refer to the same node_redshift" ) elif ( - isinstance(coeval_callback_redshifts, int) and coeval_callback_redshifts > 0 + isinstance(coeval_callback_redshifts, + int) and coeval_callback_redshifts > 0 ): compute_coeval_callback = [ not i % coeval_callback_redshifts for i in range(len(scrollz)) ] else: - raise ValueError("coeval_callback_redshifts has to be list or integer > 0.") + raise ValueError( + "coeval_callback_redshifts has to be list or integer > 0.") return compute_coeval_callback diff --git a/src/py21cmfast/drivers/lightcone.py b/src/py21cmfast/drivers/lightcone.py index 970b8ab70..22a717623 100644 --- a/src/py21cmfast/drivers/lightcone.py +++ b/src/py21cmfast/drivers/lightcone.py @@ -13,7 +13,7 @@ from pathlib import Path from typing import Sequence -from ..c_21cmfast import lib +import py21cmfast.c_21cmfast as lib from ..cache_tools import get_boxes_at_redshift from ..lightcones import Lightconer, RectilinearLightconer from ..wrapper.globals import global_params @@ -173,14 +173,14 @@ def make_checkpoint(self, fname, index: int, redshift: float): current_index = fl.attrs.get("current_index", 0) for k, v in self.lightcones.items(): - fl["lightcones"][k][..., -index : v.shape[-1] - current_index] = v[ - ..., -index : v.shape[-1] - current_index + fl["lightcones"][k][..., -index: v.shape[-1] - current_index] = v[ + ..., -index: v.shape[-1] - current_index ] global_q = fl["global_quantities"] for k, v in self.global_quantities.items(): - global_q[k][-index : v.shape[-1] - current_index] = v[ - -index : v.shape[-1] - current_index + global_q[k][-index: v.shape[-1] - current_index] = v[ + -index: v.shape[-1] - current_index ] fl.attrs["current_index"] = index @@ -284,9 +284,11 @@ def compute_rsds(self, n_subcells: int = 4, fname: str | Path | None = None): return self.lightcones["brightness_temp_with_rsds"] H0 = self.cosmo_params.cosmo.H(self.lightcone_redshifts) - los_displacement = self.lightcones["los_velocity"] * units.Mpc / units.s / H0 + los_displacement = self.lightcones["los_velocity"] * \ + units.Mpc / units.s / H0 equiv = units.pixel_scale(self.user_params.cell_size / units.pixel) - los_displacement = -los_displacement.to(units.pixel, equivalencies=equiv) + los_displacement = - \ + los_displacement.to(units.pixel, equivalencies=equiv) lcd = self.lightcone_distances.to(units.pixel, equiv) dvdx_on_h = np.gradient(los_displacement, lcd, axis=1) @@ -307,7 +309,8 @@ def compute_rsds(self, n_subcells: int = 4, fname: str | Path | None = None): Trad = Tcmb * (1 + self.lightcone_redshifts) tb_with_rsds = np.where( gradient_component < 1e-7, - 1000.0 * (self.Ts_box - Trad) / (1.0 + self.lightcone_redshifts), + 1000.0 * (self.Ts_box - Trad) / + (1.0 + self.lightcone_redshifts), (1.0 - np.exp(self.brightness_temp / gradient_component)) * 1000.0 * (self.Ts_box - Trad) @@ -633,7 +636,8 @@ def _run_lightcone_from_perturbed_fields( for iz, z in enumerate(scrollz): if iz < start_idx: continue - logger.info(f"Computing Redshift {z} ({iz + 1}/{len(scrollz)}) iterations.") + logger.info( + f"Computing Redshift {z} ({iz + 1}/{len(scrollz)}) iterations.") # Best to get a perturb for this redshift, to pass to brightness_temperature pf2 = perturbed_fields[iz] @@ -874,7 +878,8 @@ def run_lightcone( regenerate, write, direc, random_seed, hooks See docs of :func:`initial_conditions` for more information. """ - direc, regenerate, hooks = _get_config_options(direc, regenerate, write, hooks) + direc, regenerate, hooks = _get_config_options( + direc, regenerate, write, hooks) pf_given = any(perturbed_fields) if pf_given and initial_conditions is None: diff --git a/src/py21cmfast/wrapper/_utils.py b/src/py21cmfast/wrapper/_utils.py index 85bfa542f..9680bbb79 100644 --- a/src/py21cmfast/wrapper/_utils.py +++ b/src/py21cmfast/wrapper/_utils.py @@ -2,14 +2,11 @@ import logging import numpy as np -from cffi import FFI from .. import __version__ -from ..c_21cmfast import ffi, lib +import py21cmfast.c_21cmfast as lib from .exceptions import _process_exitcode -_ffi = FFI() - logger = logging.getLogger(__name__) @@ -38,7 +35,8 @@ def asarray(ptr, shape): ) array = np.frombuffer( - _ffi.buffer(ptr, _ffi.sizeof(ctype) * np.prod(shape)), ctype2dtype[ctype] + _ffi.buffer(ptr, _ffi.sizeof(ctype) * + np.prod(shape)), ctype2dtype[ctype] ) array.shape = shape return array diff --git a/src/py21cmfast/wrapper/cfuncs.py b/src/py21cmfast/wrapper/cfuncs.py index d8c36647d..0f303f047 100644 --- a/src/py21cmfast/wrapper/cfuncs.py +++ b/src/py21cmfast/wrapper/cfuncs.py @@ -6,7 +6,7 @@ from scipy.interpolate import interp1d from typing import Literal, Sequence -from ..c_21cmfast import ffi, lib +import py21cmfast.c_21cmfast as lib from ..drivers.param_config import InputParameters from ._utils import _process_exitcode from .globals import global_params @@ -110,7 +110,8 @@ def compute_tau( # Run the C code return lib.ComputeTau( - inputs.user_params.cstruct, inputs.cosmo_params.cstruct, len(redshifts), z, xHI + inputs.user_params.cstruct, inputs.cosmo_params.cstruct, len( + redshifts), z, xHI ) @@ -197,7 +198,8 @@ def compute_luminosity_function( else: mturnovers = ( - np.zeros(len(redshifts), dtype=np.float32) + 10**astro_params.M_TURN + np.zeros(len(redshifts), dtype=np.float32) + + 10**astro_params.M_TURN ) component = "acg" @@ -330,7 +332,6 @@ def compute_luminosity_function( lfunc_all[lfunc_all <= -30] = np.nan return Muvfunc_all, Mhfunc_all, lfunc_all elif component == "acg": - lfunc[lfunc <= -30] = np.nan return Muvfunc, Mhfunc, lfunc elif component == "mcg": @@ -394,7 +395,8 @@ def evaluate_sigma( Uses the 21cmfast backend """ - lib.Broadcast_struct_global_noastro(user_params.cstruct, cosmo_params.cstruct) + lib.Broadcast_struct_global_noastro( + user_params.cstruct, cosmo_params.cstruct) lib.init_ps() if user_params.USE_INTERPOLATION_TABLES: @@ -1092,7 +1094,8 @@ def evaluate_Nion_cond( cond_mass, sigma_cond, densities[:, None] if flag_options.USE_MINI_HALOS else densities, - 10 ** l10mturns[None, :] if flag_options.USE_MINI_HALOS else mcrit_atom, + 10 ** l10mturns[None, + :] if flag_options.USE_MINI_HALOS else mcrit_atom, ap_c["ALPHA_STAR"], ap_c["ALPHA_ESC"], ap_c["F_STAR10"], @@ -1203,7 +1206,8 @@ def halo_sample_test( if from_cat: z_prev = (1 + redshift) / global_params.ZPRIME_STEP_FACTOR - 1 - buffer_size = int(3e7) # about 500MB total 2e7 * 4 (float) * 4 (mass + 3crd) + # about 500MB total 2e7 * 4 (float) * 4 (mass + 3crd) + buffer_size = int(3e7) nhalo_out = np.zeros(1).astype("i4") N_out = np.zeros(n_cond).astype("i4") M_out = np.zeros(n_cond).astype("f8") diff --git a/src/py21cmfast/wrapper/globals.py b/src/py21cmfast/wrapper/globals.py index 38cdeada7..4c051180c 100644 --- a/src/py21cmfast/wrapper/globals.py +++ b/src/py21cmfast/wrapper/globals.py @@ -6,7 +6,7 @@ from .._cfg import config from .._data import DATA_PATH -from ..c_21cmfast import ffi, lib +import py21cmfast.c_21cmfast as lib from .structs import StructInstanceWrapper diff --git a/src/py21cmfast/wrapper/inputs.py b/src/py21cmfast/wrapper/inputs.py index 705bf42e0..74e4ce0ab 100644 --- a/src/py21cmfast/wrapper/inputs.py +++ b/src/py21cmfast/wrapper/inputs.py @@ -26,7 +26,6 @@ from .._cfg import config from .._data import DATA_PATH -# from ..c_21cmfast import ffi, lib from .globals import global_params from .structs import InputStruct @@ -113,11 +112,16 @@ class CosmoParams(InputStruct): _base_cosmo = field( default=Planck18, validator=validators.instance_of(FLRW), eq=False, repr=False ) - SIGMA_8 = field(default=0.8102, converter=float, validator=validators.gt(0)) - hlittle = field(default=Planck18.h, converter=float, validator=validators.gt(0)) - OMm = field(default=Planck18.Om0, converter=float, validator=validators.gt(0)) - OMb = field(default=Planck18.Ob0, converter=float, validator=validators.gt(0)) - POWER_INDEX = field(default=0.9665, converter=float, validator=validators.gt(0)) + SIGMA_8 = field(default=0.8102, converter=float, + validator=validators.gt(0)) + hlittle = field(default=Planck18.h, converter=float, + validator=validators.gt(0)) + OMm = field(default=Planck18.Om0, converter=float, + validator=validators.gt(0)) + OMb = field(default=Planck18.Ob0, converter=float, + validator=validators.gt(0)) + POWER_INDEX = field(default=0.9665, converter=float, + validator=validators.gt(0)) @property def OMl(self): @@ -274,13 +278,15 @@ class UserParams(InputStruct): _hmf_models = ["PS", "ST", "WATSON", "WATSON-Z", "DELOS"] _power_models = ["EH", "BBKS", "EFSTATHIOU", "PEEBLES", "WHITE", "CLASS"] - _sample_methods = ["MASS-LIMITED", "NUMBER-LIMITED", "PARTITION", "BINARY-SPLIT"] + _sample_methods = ["MASS-LIMITED", + "NUMBER-LIMITED", "PARTITION", "BINARY-SPLIT"] _integral_methods = ["GSL-QAG", "GAUSS-LEGENDRE", "GAMMA-APPROX"] BOX_LEN = field(default=300.0, converter=float, validator=validators.gt(0)) HII_DIM = field(default=200, converter=int, validator=validators.gt(0)) DIM = field(converter=int) - NON_CUBIC_FACTOR = field(default=1.0, converter=float, validator=validators.gt(0)) + NON_CUBIC_FACTOR = field( + default=1.0, converter=float, validator=validators.gt(0)) USE_FFTW_WISDOM = field(default=False, converter=bool) HMF = field( default="ST", @@ -313,7 +319,8 @@ class UserParams(InputStruct): USE_2LPT = field(default=True, converter=bool) MINIMIZE_MEMORY = field(default=False, converter=bool) KEEP_3D_VELOCITIES = field(default=False, converter=bool) - SAMPLER_MIN_MASS = field(default=1e8, converter=float, validator=validators.gt(0)) + SAMPLER_MIN_MASS = field( + default=1e8, converter=float, validator=validators.gt(0)) SAMPLER_BUFFER_FACTOR = field(default=2.0, converter=float) MAXHALO_FACTOR = field(default=2.0, converter=float) N_COND_INTERP = field(default=200, converter=int) @@ -328,7 +335,8 @@ class UserParams(InputStruct): HALOMASS_CORRECTION = field( default=0.9, converter=float, validator=validators.gt(0) ) - PARKINSON_G0 = field(default=1.0, converter=float, validator=validators.gt(0)) + PARKINSON_G0 = field(default=1.0, converter=float, + validator=validators.gt(0)) PARKINSON_y1 = field(default=0.0, converter=float) PARKINSON_y2 = field(default=0.0, converter=float) @@ -550,7 +558,8 @@ def _PHOTON_CONS_TYPE_vld(self, att, val): def _HALO_STOCHASTICITY_vld(self, att, val): """Raise an error if HALO_STOCHASTICITY is True and USE_HALO_FIELD is False.""" if val and not self.USE_HALO_FIELD: - raise ValueError("HALO_STOCHASTICITY is True but USE_HALO_FIELD is False") + raise ValueError( + "HALO_STOCHASTICITY is True but USE_HALO_FIELD is False") @USE_EXP_FILTER.validator def _USE_EXP_FILTER_vld(self, att, val): @@ -697,7 +706,8 @@ class AstroParams(InputStruct): repr=False, ) - HII_EFF_FACTOR = field(default=30.0, converter=float, validator=validators.gt(0)) + HII_EFF_FACTOR = field(default=30.0, converter=float, + validator=validators.gt(0)) F_STAR10 = field( default=-1.3, converter=float, @@ -742,7 +752,8 @@ class AstroParams(InputStruct): L_X_MINI = field( converter=float, validator=validators.gt(0), transformer=logtransformer ) - NU_X_THRESH = field(default=500.0, converter=float, validator=validators.gt(0)) + NU_X_THRESH = field(default=500.0, converter=float, + validator=validators.gt(0)) X_RAY_SPEC_INDEX = field(default=1.0, converter=float) X_RAY_Tvir_MIN = field( converter=float, validator=validators.gt(0), transformer=logtransformer @@ -758,8 +769,10 @@ class AstroParams(InputStruct): default=11.447, converter=float, transformer=logtransformer ) UPPER_STELLAR_TURNOVER_INDEX = field(default=-0.6, converter=float) - SIGMA_STAR = field(default=0.25, converter=float, transformer=dex2exp_transformer) - SIGMA_LX = field(default=0.5, converter=float, transformer=dex2exp_transformer) + SIGMA_STAR = field(default=0.25, converter=float, + transformer=dex2exp_transformer) + SIGMA_LX = field(default=0.5, converter=float, + transformer=dex2exp_transformer) SIGMA_SFR_LIM = field( default=0.19, converter=float, transformer=dex2exp_transformer ) diff --git a/src/py21cmfast/wrapper/outputs.py b/src/py21cmfast/wrapper/outputs.py index 04c127ff9..9e4e9427b 100644 --- a/src/py21cmfast/wrapper/outputs.py +++ b/src/py21cmfast/wrapper/outputs.py @@ -19,7 +19,7 @@ from cached_property import cached_property from .. import __version__ -from ..c_21cmfast import lib +import py21cmfast.c_21cmfast as lib from ..drivers.param_config import InputParameters from .inputs import AstroParams, CosmoParams, FlagOptions, UserParams, global_params from .structs import OutputStruct as _BaseOutputStruct diff --git a/src/py21cmfast/wrapper/photoncons.py b/src/py21cmfast/wrapper/photoncons.py index 23530faea..eb07aff9a 100644 --- a/src/py21cmfast/wrapper/photoncons.py +++ b/src/py21cmfast/wrapper/photoncons.py @@ -29,7 +29,7 @@ from copy import deepcopy from scipy.optimize import curve_fit -from ..c_21cmfast import ffi, lib +import py21cmfast.c_21cmfast as lib from ._utils import _process_exitcode from .inputs import AstroParams, CosmoParams, FlagOptions, UserParams, global_params @@ -233,7 +233,8 @@ def setup_photon_cons( """ from ..drivers.single_field import _get_config_options - direc, regenerate, hooks = _get_config_options(direc, regenerate, None, hooks) + direc, regenerate, hooks = _get_config_options( + direc, regenerate, None, hooks) if flag_options.PHOTON_CONS_TYPE == "no-photoncons": return @@ -394,7 +395,8 @@ def calibrate_photon_cons( prev_perturb = this_perturb z_for_photon_cons = np.array(z_for_photon_cons[::-1]) - neutral_fraction_photon_cons = np.array(neutral_fraction_photon_cons[::-1]) + neutral_fraction_photon_cons = np.array( + neutral_fraction_photon_cons[::-1]) # Construct the spline for the calibration curve logger.info("Calibrating photon conservation correction") @@ -452,7 +454,8 @@ def photoncons_alpha(cosmo_params, user_params, astro_params, flag_options): alpha_arr = ( np.linspace(-2.0, 1.0, num=31) + astro_params.ALPHA_ESC ) # roughly -0.1 steps for an extended range of alpha - test_pc_data = np.zeros((alpha_arr.size, ref_pc_data["z_calibration"].size)) + test_pc_data = np.zeros( + (alpha_arr.size, ref_pc_data["z_calibration"].size)) # fit to the same z-array ref_interp = np.interp( @@ -493,7 +496,8 @@ def photoncons_alpha(cosmo_params, user_params, astro_params, flag_options): # ratio of given alpha with calibration ratio_ref = (1 - ref_pc_data["nf_calibration"]) / ref_interp - ratio_diff = ratio_test - 1 / ratio_ref[None, :] # find N(alpha)/ref == ref/cal + # find N(alpha)/ref == ref/cal + ratio_diff = ratio_test - 1 / ratio_ref[None, :] diff_test = ( (test_pc_data) + (1 - ref_pc_data["nf_calibration"])[None, ...] @@ -564,7 +568,8 @@ def photoncons_alpha(cosmo_params, user_params, astro_params, flag_options): # fit to the curve # make sure there's an estimate and Q isn't too high/low fit_alpha = alpha_estimate_ratio - sel = np.isfinite(fit_alpha) & (ref_interp < max_q_fit) & (ref_interp > min_q_fit) + sel = np.isfinite(fit_alpha) & ( + ref_interp < max_q_fit) & (ref_interp > min_q_fit) # if there are no alpha roots found, it's likely this is a strange reionisation history # but we can't apply the alpha correction so throw an error @@ -595,7 +600,8 @@ def photoncons_alpha(cosmo_params, user_params, astro_params, flag_options): popt, pcov = curve_fit(alpha_func, ref_interp[sel], fit_alpha[sel]) # pass to C logger.info(f"ALPHA_ESC Original = {astro_params.ALPHA_ESC:.3f}") - logger.info(f"Running with ALPHA_ESC = {popt[0]:.2f} + {popt[1]:.2f} * Q") + logger.info( + f"Running with ALPHA_ESC = {popt[0]:.2f} + {popt[1]:.2f} * Q") results["fit_yint"] = popt[0] results["fit_slope"] = popt[1] @@ -638,7 +644,8 @@ def photoncons_fesc(cosmo_params, user_params, astro_params, flag_options): ratio_ref = ref_interp / (1 - ref_pc_data["nf_calibration"]) fit_fesc = ratio_ref * 10**astro_params.F_ESC10 - sel = np.isfinite(fit_fesc) & (ref_interp < max_q_fit) & (ref_interp > min_q_fit) + sel = np.isfinite(fit_fesc) & ( + ref_interp < max_q_fit) & (ref_interp > min_q_fit) popt, pcov = curve_fit(alpha_func, ref_interp[sel], fit_fesc[sel]) # pass to C diff --git a/src/py21cmfast/wrapper/structs.py b/src/py21cmfast/wrapper/structs.py index c33e06731..5a00262da 100644 --- a/src/py21cmfast/wrapper/structs.py +++ b/src/py21cmfast/wrapper/structs.py @@ -17,7 +17,7 @@ from .. import __version__ from .._cfg import config -from ..c_21cmfast import ffi, lib +import py21cmfast.c_21cmfast as lib from ._utils import ( asarray, float_to_string_precision, @@ -211,7 +211,8 @@ def __str__(self): """Human-readable string representation of the object.""" d = self.asdict() biggest_k = max(len(k) for k in d) - params = "\n ".join(sorted(f"{k:<{biggest_k}}: {v}" for k, v in d.items())) + params = "\n ".join( + sorted(f"{k:<{biggest_k}}: {v}" for k, v in d.items())) return f"""{self.__class__.__name__}:{params} """ @classmethod @@ -228,7 +229,8 @@ def from_subdict(cls, dct, safe=True): for field in attrs.fields(cls) if field.name not in dct.keys() and field.name in fieldnames ] - extra_items = [(k, v) for k, v in dct.items() if k not in fieldnames] + extra_items = [(k, v) + for k, v in dct.items() if k not in fieldnames] message = ( f"There are extra or missing {cls.__name__} in the file to be read.\n" f"EXTRAS: {extra_items}\n" @@ -270,7 +272,8 @@ class OutputStruct(metaclass=ABCMeta): _c_based_pointers = () _c_compute_function = None - _TYPEMAP = bidict({"float32": "float *", "float64": "double *", "int32": "int *"}) + _TYPEMAP = bidict( + {"float32": "float *", "float64": "double *", "int32": "int *"}) def __init__(self, *, dummy=False, initial=False, **kwargs): """ @@ -321,7 +324,8 @@ def __init__(self, *, dummy=False, initial=False, **kwargs): } for k in self._array_structure: if k not in self.struct.pointer_fields: - raise TypeError(f"Key {k} in {self} not a defined pointer field in C.") + raise TypeError( + f"Key {k} in {self} not a defined pointer field in C.") @cached_property def struct(self) -> StructWrapper: @@ -348,7 +352,8 @@ def path(self) -> tuple[None, Path]: if pth.exists(): return pth - logger.info(f"All paths that defined {self} have been deleted on disk.") + logger.info( + f"All paths that defined {self} have been deleted on disk.") return None @abstractmethod @@ -422,7 +427,8 @@ def _init_cstruct(self): # to unnecessarily load things in. We leave it to the user to ensure that all # required arrays are loaded into memory before calling this function. if state.initialized: - setattr(self.struct.cstruct, k, self._ary2buf(getattr(self, k))) + setattr(self.struct.cstruct, k, + self._ary2buf(getattr(self, k))) for k in self.struct.primitive_fields: with contextlib.suppress(AttributeError): @@ -432,7 +438,8 @@ def _ary2buf(self, ary): if not isinstance(ary, np.ndarray): raise ValueError("ary must be a numpy array") return self.struct._ffi.cast( - OutputStruct._TYPEMAP[ary.dtype.name], self.struct._ffi.from_buffer(ary) + OutputStruct._TYPEMAP[ary.dtype.name], self.struct._ffi.from_buffer( + ary) ) def __call__(self): @@ -500,7 +507,8 @@ def _remove_array(self, k, force=False): state = self._array_state[k] if not state.initialized and k in self._array_structure: - warnings.warn(f"Trying to remove array that isn't yet created: {k}") + warnings.warn( + f"Trying to remove array that isn't yet created: {k}") return if state.computed_in_mem and not state.on_disk and not force: @@ -546,7 +554,8 @@ def load_all(self): def filename(self): """The base filename of this object.""" if self._random_seed is None: - raise AttributeError("filename not defined until random_seed has been set") + raise AttributeError( + "filename not defined until random_seed has been set") return self._fname_skeleton.format(seed=self.random_seed) @@ -609,7 +618,8 @@ def _check_parameters(self, fname): and f.attrs[kfile] != q ): if not isinstance(q, (float, np.float32)) or not ( - float_to_string_precision(q, config["cache_param_sigfigs"]) + float_to_string_precision( + q, config["cache_param_sigfigs"]) == float_to_string_precision( f.attrs[kfile], config["cache_param_sigfigs"] ) @@ -724,7 +734,8 @@ def write( try: fl.attrs[kfile] = q except TypeError as e: - logger.info(f"name {k} val {q}, type {type(q)}") + logger.info( + f"name {k} val {q}, type {type(q)}") raise e # Write 21cmFAST version to the file @@ -809,7 +820,8 @@ def _get_path( pth = self.find_existing(direc) if pth is None: - raise OSError(f"No boxes exist for these parameters. {pth} {direc}") + raise OSError( + f"No boxes exist for these parameters. {pth} {direc}") else: direc = Path(direc or config["direc"]).expanduser() fname = Path(fname) @@ -1114,7 +1126,8 @@ def summarize(self, indent=0) -> str: @classmethod def _log_call_arguments(cls, *args): - logger.debug(f"Calling {cls._c_compute_function.__name__} with following args:") + logger.debug( + f"Calling {cls._c_compute_function.__name__} with following args:") for arg in args: if isinstance(arg, OutputStruct): @@ -1160,7 +1173,9 @@ def _compute( ( arg() if isinstance(arg, OutputStruct) - else arg.cstruct if isinstance(arg, InputStruct) else arg + else arg.cstruct + if isinstance(arg, InputStruct) + else arg ) for arg in args ] @@ -1208,7 +1223,8 @@ def _call_hooks(self, hooks): def __memory_map(self): shapes = self._c_shape(self.cstruct) for item in self._c_based_pointers: - setattr(self, item, asarray(getattr(self.cstruct, item), shapes[item])) + setattr(self, item, asarray( + getattr(self.cstruct, item), shapes[item])) self._array_state[item].c_memory = True self._array_state[item].computed_in_mem = True diff --git a/tests/test_c_interpolation_tables.py b/tests/test_c_interpolation_tables.py index 469a087f3..4ac912b08 100644 --- a/tests/test_c_interpolation_tables.py +++ b/tests/test_c_interpolation_tables.py @@ -6,7 +6,7 @@ from astropy import units as u from py21cmfast import AstroParams, CosmoParams, FlagOptions, UserParams, global_params -from py21cmfast.c_21cmfast import ffi, lib +import py21cmfast.c_21cmfast as lib from py21cmfast.wrapper import cfuncs as cf from . import produce_integration_test_data as prd @@ -153,7 +153,8 @@ def test_inverse_cmf_tables(name, from_cat, delta_range, mass_range, plt): .to("M_sun") .value ) - inputs_cond, inputs_mass = np.meshgrid(delta_range, mmin_range, indexing="ij") + inputs_cond, inputs_mass = np.meshgrid( + delta_range, mmin_range, indexing="ij") z_desc = None inputs_delta = inputs_cond else: @@ -230,7 +231,8 @@ def test_inverse_cmf_tables(name, from_cat, delta_range, mass_range, plt): # TODO: the bound should be over MAX_DELTAC_FRAC*delta_crit, and we should interpolate # instead of setting the integral to its limit at delta crit. if not from_cat: - delta_crit = float(cf.get_delta_crit(up, cp, np.array([M_cond]), redshift)) + delta_crit = float(cf.get_delta_crit( + up, cp, np.array([M_cond]), redshift)) sel_delta = delta_range < 0.98 * delta_crit delta_range = delta_range[sel_delta] cmf_integral = cmf_integral[sel_delta, ...] @@ -360,7 +362,8 @@ def test_FgtrM_conditional_tables(R, delta_range, plt): M_max = 1e20 cond_mass = ( - (4.0 / 3.0 * np.pi * (R * u.Mpc) ** 3 * cp.cosmo.critical_density(0) * cp.OMm) + (4.0 / 3.0 * np.pi * (R * u.Mpc) ** 3 * + cp.cosmo.critical_density(0) * cp.OMm) .to("M_sun") .value ) @@ -402,7 +405,8 @@ def test_FgtrM_conditional_tables(R, delta_range, plt): # and interpolating across the sharp gap results in errors # TODO: the bound should be over MAX_DELTAC_FRAC*delta_crit, and we should interpolate # instead of setting the integral to its limit at delta crit. - delta_crit = float(cf.get_delta_crit(up, cp, np.array([cond_mass]), redshift)) + delta_crit = float(cf.get_delta_crit( + up, cp, np.array([cond_mass]), redshift)) sel_delta = np.fabs((delta_range - delta_crit) / delta_crit) > 0.02 delta_range = delta_range[sel_delta] fcoll_integrals = fcoll_integrals[sel_delta, ...] @@ -637,7 +641,8 @@ def test_Nion_conditional_tables( M_max = 1e20 cond_mass = ( - (4.0 / 3.0 * np.pi * (R * u.Mpc) ** 3 * cp.cosmo.critical_density(0) * cp.OMm) + (4.0 / 3.0 * np.pi * (R * u.Mpc) ** 3 * + cp.cosmo.critical_density(0) * cp.OMm) .to("M_sun") .value ) @@ -701,7 +706,8 @@ def test_Nion_conditional_tables( # and interpolating across the sharp gap results in errors # TODO: the bound should be over MAX_DELTAC_FRAC*delta_crit, and we should interpolate # instead of setting the integral to its limit at delta crit. - delta_crit = float(cf.get_delta_crit(up, cp, np.array([cond_mass]), redshift)) + delta_crit = float(cf.get_delta_crit( + up, cp, np.array([cond_mass]), redshift)) sel_delta = np.fabs((delta_range - delta_crit) / delta_crit) > 0.02 delta_range = delta_range[sel_delta] Nion_integrals = Nion_integrals[sel_delta, ...] @@ -747,7 +753,8 @@ def test_SFRD_conditional_table( if name != "PS": pytest.skip("FAST FFCOLL INTEGRALS WORK ONLY WITH EPS") else: - pytest.xfail("FFCOLL TABLES drop sharply at high Mturn, causing failure") + pytest.xfail( + "FFCOLL TABLES drop sharply at high Mturn, causing failure") redshift, kwargs = OPTIONS_HMF[name] opts = prd.get_all_options(redshift, **kwargs) @@ -767,7 +774,8 @@ def test_SFRD_conditional_table( M_max = 1e20 cond_mass = ( - (4.0 / 3.0 * np.pi * (R * u.Mpc) ** 3 * cp.cosmo.critical_density(0) * cp.OMm) + (4.0 / 3.0 * np.pi * (R * u.Mpc) ** 3 * + cp.cosmo.critical_density(0) * cp.OMm) .to("M_sun") .value ) @@ -822,7 +830,8 @@ def test_SFRD_conditional_table( # and interpolating across the sharp gap results in errors # TODO: the bound should be over MAX_DELTAC_FRAC*delta_crit, and we should interpolate # instead of setting the integral to its limit at delta crit. - delta_crit = float(cf.get_delta_crit(up, cp, np.array([cond_mass]), redshift)) + delta_crit = float(cf.get_delta_crit( + up, cp, np.array([cond_mass]), redshift)) sel_delta = np.fabs((delta_range - delta_crit) / delta_crit) > 0.02 delta_range = delta_range[sel_delta] SFRD_integrals = SFRD_integrals[sel_delta, ...] @@ -881,7 +890,8 @@ def test_conditional_integral_methods( M_min = global_params.M_MIN_INTEGRAL M_max = 1e20 cond_mass = ( - (4.0 / 3.0 * np.pi * (R * u.Mpc) ** 3 * cp.cosmo.critical_density(0) * cp.OMm) + (4.0 / 3.0 * np.pi * (R * u.Mpc) ** 3 * + cp.cosmo.critical_density(0) * cp.OMm) .to("M_sun") .value ) @@ -981,7 +991,8 @@ def make_table_comparison_plot( zlab = zlabels[j] + f" = {z[i]:.2e}" if z is not None else "" # allow single arrays x_plot = x[j][:, i] if len(x[j].shape) > 1 else x[j] - i_plot = integrals[j][:, i] if len(integrals[j].shape) > 1 else integrals[j] + i_plot = integrals[j][:, i] if len( + integrals[j].shape) > 1 else integrals[j] t_plot = tables[j][:, i] if len(tables[j].shape) > 1 else tables[j] make_comparison_plot( x_plot, @@ -1022,7 +1033,8 @@ def make_integral_comparison_plot(x1, x2, integral_list, integral_list_second, p ) for j in range(i_second.shape[1]): - axs[0, 1].semilogy(x1, i_second[:, j], color=f"C{j:d}", linestyle=styles[i]) + axs[0, 1].semilogy(x1, i_second[:, j], + color=f"C{j:d}", linestyle=styles[i]) axs[1, 1].semilogy( x1, i_second[:, j] / integral_list_second[0][:, j], @@ -1113,7 +1125,8 @@ def print_failure_stats(test, truth, inputs, abs_tol, rel_tol, name): print("----- First 10 -----") for j in range(min(10, sel_failed.sum())): - input_arr = [f"{failed_inp[i][j]:.2e}" for i, finp in enumerate(failed_inp)] + input_arr = [f"{failed_inp[i][j]:.2e}" for i, + finp in enumerate(failed_inp)] print( f"CRD {input_arr}" + f" {truth[sel_failed].flatten()[j]:.4e} {test[sel_failed].flatten()[j]:.4e}" diff --git a/tests/test_exceptions.py b/tests/test_exceptions.py index 9e8c77d42..43143abff 100644 --- a/tests/test_exceptions.py +++ b/tests/test_exceptions.py @@ -2,7 +2,7 @@ import numpy as np -from py21cmfast.c_21cmfast import ffi, lib +import py21cmfast.c_21cmfast as lib from py21cmfast.wrapper.exceptions import ( PHOTONCONSERROR, ParameterError, @@ -32,5 +32,6 @@ def test_simple(subfunc): def test_pass(): answer = np.array([0], dtype="f8") - lib.FunctionThatCatches(True, True, ffi.cast("double *", ffi.from_buffer(answer))) + lib.FunctionThatCatches(True, True, ffi.cast( + "double *", ffi.from_buffer(answer))) assert answer == 5.0 diff --git a/tests/test_filtering.py b/tests/test_filtering.py index 93e45b84b..08d10c5bc 100644 --- a/tests/test_filtering.py +++ b/tests/test_filtering.py @@ -13,7 +13,7 @@ UserParams, global_params, ) -from py21cmfast.c_21cmfast import ffi, lib +import py21cmfast.c_21cmfast as lib from . import produce_integration_test_data as prd from .test_c_interpolation_tables import print_failure_stats @@ -84,8 +84,10 @@ def get_binned_stats(x_arr, y_arr, bins, stats): } for stat in stats: - spstatkey = statistic_dict[stat] if stat in statistic_dict.keys() else stat - result[stat], _, _ = binstat(x_in, y_in, bins=bins, statistic=spstatkey) + spstatkey = statistic_dict[stat] if stat in statistic_dict.keys( + ) else stat + result[stat], _, _ = binstat( + x_in, y_in, bins=bins, statistic=spstatkey) return result @@ -128,7 +130,7 @@ def test_filters(filter_flag, R, plt): R_cells = R / up.BOX_LEN * up.HII_DIM Rp_cells = R_param / up.BOX_LEN * up.HII_DIM r_from_centre = np.linalg.norm( - np.mgrid[0 : up.HII_DIM, 0 : up.HII_DIM, 0 : up.HII_DIM] + np.mgrid[0: up.HII_DIM, 0: up.HII_DIM, 0: up.HII_DIM] - np.array([up.HII_DIM // 2, up.HII_DIM // 2, up.HII_DIM // 2])[ :, None, None, None ], @@ -293,7 +295,8 @@ def filter_plot( ) lns.append( - axs[idx, 2].plot(r_cen, t, "m:", linewidth=2, label="Expected", zorder=3)[0] + axs[idx, 2].plot(r_cen, t, "m:", linewidth=2, + label="Expected", zorder=3)[0] ) axs[idx, 2].grid() axs[idx, 2].set_xlabel("dist from centre") diff --git a/tests/test_halo_sampler.py b/tests/test_halo_sampler.py index f060f3eb1..5aa292a35 100644 --- a/tests/test_halo_sampler.py +++ b/tests/test_halo_sampler.py @@ -12,7 +12,7 @@ UserParams, global_params, ) -from py21cmfast.c_21cmfast import ffi, lib +import py21cmfast.c_21cmfast as lib from py21cmfast.wrapper import cfuncs as cf from . import produce_integration_test_data as prd @@ -97,7 +97,8 @@ def test_sampler(name, cond, from_cat, plt): hist, _ = np.histogram(sample_dict["halo_masses"], edges) - mass_dens = cp.cosmo.Om0 * cp.cosmo.critical_density(0).to("Mpc-3 M_sun").value + mass_dens = cp.cosmo.Om0 * \ + cp.cosmo.critical_density(0).to("Mpc-3 M_sun").value volume_total_m = mass * n_cond / mass_dens mf_out = hist / volume_total_m / dlnm binned_cmf = binned_cmf / dlnm * mass_dens @@ -191,7 +192,8 @@ def test_halo_scaling_relations(ic, default_input_struct): halo_stars_out = out_dict["halo_stars"].reshape( (halo_mass_vals.size, n_halo_per_mass) ) - halo_sfr_out = out_dict["halo_sfr"].reshape((halo_mass_vals.size, n_halo_per_mass)) + halo_sfr_out = out_dict["halo_sfr"].reshape( + (halo_mass_vals.size, n_halo_per_mass)) halo_xray_out = out_dict["halo_xray"].reshape( (halo_mass_vals.size, n_halo_per_mass) ) @@ -210,14 +212,16 @@ def test_halo_scaling_relations(ic, default_input_struct): ) sim_SHMR = halo_stars_out / halo_mass_out sel_stars = exp_SHMR > 1e-10 - np.testing.assert_allclose(exp_SHMR, sim_SHMR.mean(axis=1), atol=1e-10, rtol=1e-1) + np.testing.assert_allclose( + exp_SHMR, sim_SHMR.mean(axis=1), atol=1e-10, rtol=1e-1) np.testing.assert_allclose( ap.SIGMA_STAR, np.log10(sim_SHMR).std(axis=1)[sel_stars], rtol=1e-1 ) exp_SSFR = ic.cosmo_params.cosmo.H(redshift).to("s-1").value / (ap.t_STAR) sim_SSFR = halo_sfr_out / halo_stars_out - np.testing.assert_allclose(exp_SSFR, sim_SSFR.mean(axis=1)[sel_stars], rtol=1e-1) + np.testing.assert_allclose(exp_SSFR, sim_SSFR.mean(axis=1)[ + sel_stars], rtol=1e-1) np.testing.assert_allclose( ap.SIGMA_SFR_LIM, np.log10(sim_SSFR).std(axis=1)[sel_stars], @@ -259,7 +263,8 @@ def plot_sampler_comparison( # log-spaced bins dlnm = np.log(bin_edges[1:]) - np.log(bin_edges[:-1]) bin_centres = (bin_edges[:-1] * np.exp(dlnm / 2)).astype("f4") - edges_n = np.linspace(0, max(N_array.max(), 1), min(100, max(N_array.max(), 1) + 1)) + edges_n = np.linspace(0, max(N_array.max(), 1), + min(100, max(N_array.max(), 1) + 1)) centres_n = (edges_n[:-1] + edges_n[1:]) / 2 hist_n, _ = np.histogram(N_array, edges_n) From e493f18a2f36b6a061ab6d3714f39cd4909a2e3d Mon Sep 17 00:00:00 2001 From: Gregory Poole Date: Tue, 4 Mar 2025 16:09:46 +1100 Subject: [PATCH 101/145] Remove some CFFI-related comments. --- src/py21cmfast/src/InputParameters.h | 3 +-- src/py21cmfast/src/OutputStructs.h | 3 +-- src/py21cmfast/src/_functionprototypes_wrapper.h | 4 ---- src/py21cmfast/src/_inputparams_wrapper.h | 13 ------------- src/py21cmfast/src/_outputstructs_wrapper.h | 12 ------------ src/py21cmfast/wrapper/photoncons.py | 2 ++ 6 files changed, 4 insertions(+), 33 deletions(-) diff --git a/src/py21cmfast/src/InputParameters.h b/src/py21cmfast/src/InputParameters.h index 2a18768e5..e5b291c4a 100644 --- a/src/py21cmfast/src/InputParameters.h +++ b/src/py21cmfast/src/InputParameters.h @@ -2,8 +2,7 @@ #define _PARAMSTRUCTURES_H #include -//since ffi.cdef() cannot include directives, we store the types and globals in another file -// Since it is unguarded, make sure to ONLY include this file from here +// Since it is unguarded, make sure to ONLY include this file from here #include "_inputparams_wrapper.h" #ifdef __cplusplus diff --git a/src/py21cmfast/src/OutputStructs.h b/src/py21cmfast/src/OutputStructs.h index f87b25c25..9e8c1c1ee 100644 --- a/src/py21cmfast/src/OutputStructs.h +++ b/src/py21cmfast/src/OutputStructs.h @@ -6,8 +6,7 @@ #include "InputParameters.h" -//since ffi.cdef() cannot include directives, we store the types and globals in another file -// Since it is unguarded, make sure to ONLY include this file from here +// Since it is unguarded, make sure to ONLY include this file from here #include "_outputstructs_wrapper.h" #endif diff --git a/src/py21cmfast/src/_functionprototypes_wrapper.h b/src/py21cmfast/src/_functionprototypes_wrapper.h index 2f8f84770..7a21881d9 100644 --- a/src/py21cmfast/src/_functionprototypes_wrapper.h +++ b/src/py21cmfast/src/_functionprototypes_wrapper.h @@ -1,7 +1,3 @@ -/* This file contains the repeated function prototypes which are needed by CFFI - to be included explicitly via ffi.cdef(), These are the only functions which - are visible to the python wrapper */ - /* OutputStruct COMPUTE FUNCTIONS */ int ComputeInitialConditions(unsigned long long random_seed, UserParams *user_params, CosmoParams *cosmo_params, InitialConditions *boxes); diff --git a/src/py21cmfast/src/_inputparams_wrapper.h b/src/py21cmfast/src/_inputparams_wrapper.h index fff64ac98..49ca36060 100644 --- a/src/py21cmfast/src/_inputparams_wrapper.h +++ b/src/py21cmfast/src/_inputparams_wrapper.h @@ -1,12 +1,3 @@ -/*We need to explicitly define the types used by the warpper using ffi.cdef() - However, that function does not take directives, so we separate the types here -*/ -//WARNING: DO NOT #include THIS FILE IN THE C CODE EXCEPT FOR IN InputParameters.h - - -// #ifdef __cplusplus -// extern "C" { -// #endif typedef struct CosmoParams{ @@ -215,7 +206,3 @@ extern AstroParams *astro_params_global; extern FlagOptions *flag_options_global; extern GlobalParams global_params; - -// #ifdef __cplusplus -// } -// #endif diff --git a/src/py21cmfast/src/_outputstructs_wrapper.h b/src/py21cmfast/src/_outputstructs_wrapper.h index 1d84aaf06..bad3180b1 100644 --- a/src/py21cmfast/src/_outputstructs_wrapper.h +++ b/src/py21cmfast/src/_outputstructs_wrapper.h @@ -1,11 +1,3 @@ -/*We need to explicitly define the types used by the warpper using ffi.cdef() - However, that function does not take directives, so we separate the types here -*/ -//WARNING: DO NOT #include THIS FILE IN THE C CODE EXCEPT FOR IN OutputStructs.h - -// #ifdef __cplusplus -// extern "C" { -// #endif typedef struct InitialConditions{ float *lowres_density, *lowres_vx, *lowres_vy, *lowres_vz, *lowres_vx_2LPT, *lowres_vy_2LPT, *lowres_vz_2LPT; float *hires_density, *hires_vx, *hires_vy, *hires_vz, *hires_vx_2LPT, *hires_vy_2LPT, *hires_vz_2LPT; //cw addition @@ -94,7 +86,3 @@ typedef struct IonizedBox{ typedef struct BrightnessTemp{ float *brightness_temp; } BrightnessTemp; - -// #ifdef __cplusplus -// } -// #endif diff --git a/src/py21cmfast/wrapper/photoncons.py b/src/py21cmfast/wrapper/photoncons.py index eb07aff9a..005495a4d 100644 --- a/src/py21cmfast/wrapper/photoncons.py +++ b/src/py21cmfast/wrapper/photoncons.py @@ -149,6 +149,8 @@ def _get_photon_nonconservation_data(): c_int_NC = ffi.cast("int *", ffi.from_buffer(IntVal2)) c_int_NP = ffi.cast("int *", ffi.from_buffer(IntVal3)) + c_int_NP = IntVal3.ctypes.data_as(c_void_p) + # Run the C code errcode = lib.ObtainPhotonConsData( c_z_at_Q, From af24ee8795c157ef2263b21dc9ccc0263bb78204 Mon Sep 17 00:00:00 2001 From: Gregory Poole Date: Fri, 21 Mar 2025 14:33:22 +1100 Subject: [PATCH 102/145] All FFI code removed but lots still NotImplemented. Build works and tests run but nearly all fail. --- src/py21cmfast/__init__.py | 80 +++++++++++++------------- src/py21cmfast/drivers/coeval.py | 2 +- src/py21cmfast/src/InputParameters.c | 23 ++++++++ src/py21cmfast/src/InputParameters.h | 9 ++- src/py21cmfast/src/_wrapper.cpp | 11 +++- src/py21cmfast/wrapper/cfuncs.py | 78 +++++++++++++++++-------- src/py21cmfast/wrapper/globals.py | 31 +++++++--- src/py21cmfast/wrapper/photoncons.py | 59 +++++++++++++------ src/py21cmfast/wrapper/structs.py | 65 +++++++++++++++------ tests/produce_integration_test_data.py | 15 +++-- tests/test_exceptions.py | 15 +++-- tests/test_filtering.py | 8 ++- tests/test_tables.py | 2 +- 13 files changed, 273 insertions(+), 125 deletions(-) diff --git a/src/py21cmfast/__init__.py b/src/py21cmfast/__init__.py index 128e2bbe1..fd4be26a2 100644 --- a/src/py21cmfast/__init__.py +++ b/src/py21cmfast/__init__.py @@ -22,46 +22,46 @@ from ._cfg import config from ._logging import configure_logging -# from .cache_tools import query_cache -# from .drivers.coeval import Coeval, run_coeval -# from .drivers.lightcone import LightCone, exhaust_lightcone, run_lightcone -# from .drivers.param_config import InputParameters -# from .drivers.single_field import ( -# brightness_temperature, -# compute_halo_grid, -# compute_initial_conditions, -# compute_ionization_field, -# compute_xray_source_field, -# determine_halo_list, -# perturb_field, -# perturb_halo_list, -# spin_temperature, -# ) -# from .lightcones import AngularLightconer, RectilinearLightconer -# from .utils import get_all_fieldnames -# from .wrapper.cfuncs import ( -# compute_luminosity_function, -# compute_tau, -# construct_fftw_wisdoms, -# ) -# from .wrapper.inputs import ( -# AstroParams, -# CosmoParams, -# FlagOptions, -# UserParams, -# global_params, -# ) -# from .wrapper.outputs import ( -# BrightnessTemp, -# HaloBox, -# HaloField, -# InitialConditions, -# IonizedBox, -# PerturbedField, -# PerturbHaloField, -# TsBox, -# XraySourceBox, -# ) +from .cache_tools import query_cache +from .drivers.coeval import Coeval, run_coeval +from .drivers.lightcone import LightCone, exhaust_lightcone, run_lightcone +from .drivers.param_config import InputParameters +from .drivers.single_field import ( + brightness_temperature, + compute_halo_grid, + compute_initial_conditions, + compute_ionization_field, + compute_xray_source_field, + determine_halo_list, + perturb_field, + perturb_halo_list, + spin_temperature, +) +from .lightcones import AngularLightconer, RectilinearLightconer +from .utils import get_all_fieldnames +from .wrapper.cfuncs import ( + compute_luminosity_function, + compute_tau, + construct_fftw_wisdoms, +) +from .wrapper.inputs import ( + AstroParams, + CosmoParams, + FlagOptions, + UserParams, + global_params, +) +from .wrapper.outputs import ( + BrightnessTemp, + HaloBox, + HaloField, + InitialConditions, + IonizedBox, + PerturbedField, + PerturbHaloField, + TsBox, + XraySourceBox, +) configure_logging() diff --git a/src/py21cmfast/drivers/coeval.py b/src/py21cmfast/drivers/coeval.py index 828f4f45d..db232bee6 100644 --- a/src/py21cmfast/drivers/coeval.py +++ b/src/py21cmfast/drivers/coeval.py @@ -12,7 +12,7 @@ from .. import __version__ from .._cfg import config -from py21cmfast.c_21cmfast import lib +import py21cmfast.c_21cmfast as lib from ..wrapper._utils import camel_to_snake from ..wrapper.globals import global_params from ..wrapper.inputs import AstroParams, CosmoParams, FlagOptions, UserParams diff --git a/src/py21cmfast/src/InputParameters.c b/src/py21cmfast/src/InputParameters.c index 3e47064d1..5f77b2e21 100644 --- a/src/py21cmfast/src/InputParameters.c +++ b/src/py21cmfast/src/InputParameters.c @@ -1,3 +1,5 @@ +#include +#include #include "InputParameters.h" void Broadcast_struct_global_all(UserParams *user_params, CosmoParams *cosmo_params, AstroParams *astro_params, FlagOptions *flag_options){ @@ -89,3 +91,24 @@ GlobalParams global_params = { .USE_ADIABATIC_FLUCTUATIONS = 1, }; + +void set_external_table_path(GlobalParams *params, const char *value) { + if (params->external_table_path != 0) { + free(params->external_table_path); + } + params->external_table_path = (char *)malloc(strlen(value) + 1); + strcpy(params->external_table_path, value); +} +char* get_external_table_path(GlobalParams *params) { + return params->external_table_path ? params->external_table_path : ""; +} +void set_wisdoms_path(GlobalParams *params, const char *value) { + if (params->wisdoms_path != 0) { + free(params->wisdoms_path); + } + params->wisdoms_path = (char *)malloc(strlen(value) + 1); + strcpy(params->wisdoms_path, value); +} +char* get_wisdoms_path(GlobalParams *params) { + return params->wisdoms_path ? params->wisdoms_path : ""; +} diff --git a/src/py21cmfast/src/InputParameters.h b/src/py21cmfast/src/InputParameters.h index e5b291c4a..ee0f9144e 100644 --- a/src/py21cmfast/src/InputParameters.h +++ b/src/py21cmfast/src/InputParameters.h @@ -9,9 +9,12 @@ extern "C" { #endif - void Broadcast_struct_global_all(UserParams *user_params, CosmoParams *cosmo_params, AstroParams *astro_params, FlagOptions *flag_options); - void Broadcast_struct_global_noastro(UserParams *user_params, CosmoParams *cosmo_params); - +void set_external_table_path(GlobalParams *params, const char *value); +char* get_external_table_path(GlobalParams *params); +void set_wisdoms_path(GlobalParams *params, const char *value); +char* get_wisdoms_path(GlobalParams *params); +void Broadcast_struct_global_all(UserParams *user_params, CosmoParams *cosmo_params, AstroParams *astro_params, FlagOptions *flag_options); +void Broadcast_struct_global_noastro(UserParams *user_params, CosmoParams *cosmo_params); #ifdef __cplusplus } #endif diff --git a/src/py21cmfast/src/_wrapper.cpp b/src/py21cmfast/src/_wrapper.cpp index 32bb29849..5f4461375 100644 --- a/src/py21cmfast/src/_wrapper.cpp +++ b/src/py21cmfast/src/_wrapper.cpp @@ -1,3 +1,4 @@ +#include "InputParameters.h" #include #include // #include @@ -188,7 +189,15 @@ NB_MODULE(c_21cmfast, m) { .def_rw("T_RE", &GlobalParams::T_RE) .def_rw("VAVG", &GlobalParams::VAVG) .def_rw("USE_ADIABATIC_FLUCTUATIONS", - &GlobalParams::USE_ADIABATIC_FLUCTUATIONS); + &GlobalParams::USE_ADIABATIC_FLUCTUATIONS) + .def("set_external_table_path", &set_external_table_path) + .def("get_external_table_path", &get_external_table_path) + .def("set_wisdoms_path", &set_wisdoms_path) + .def("get_wisdoms_path", &get_wisdoms_path); + + m.def("get_global_params", []() -> GlobalParams& { + return global_params; + }, nb::rv_policy::reference); // Bind output parameters nb::class_(m, "InitialConditions") diff --git a/src/py21cmfast/wrapper/cfuncs.py b/src/py21cmfast/wrapper/cfuncs.py index 0f303f047..0d2c04ba7 100644 --- a/src/py21cmfast/wrapper/cfuncs.py +++ b/src/py21cmfast/wrapper/cfuncs.py @@ -105,8 +105,11 @@ def compute_tau( redshifts = np.array(redshifts, dtype="float32") global_xHI = np.array(global_xHI, dtype="float32") - z = ffi.cast("float *", ffi.from_buffer(redshifts)) - xHI = ffi.cast("float *", ffi.from_buffer(global_xHI)) + # WIP: CFFI Refactor + # z = ffi.cast("float *", ffi.from_buffer(redshifts)) + # xHI = ffi.cast("float *", ffi.from_buffer(global_xHI)) + z = redshifts + xHI = global_xHI # Run the C code return lib.ComputeTau( @@ -211,9 +214,13 @@ def compute_luminosity_function( Muvfunc.shape = (len(redshifts), nbins) Mhfunc.shape = (len(redshifts), nbins) - c_Muvfunc = ffi.cast("double *", ffi.from_buffer(Muvfunc)) - c_Mhfunc = ffi.cast("double *", ffi.from_buffer(Mhfunc)) - c_lfunc = ffi.cast("double *", ffi.from_buffer(lfunc)) + # WIP: CFFI Refactor + # c_Muvfunc = ffi.cast("double *", ffi.from_buffer(Muvfunc)) + # c_Mhfunc = ffi.cast("double *", ffi.from_buffer(Mhfunc)) + # c_lfunc = ffi.cast("double *", ffi.from_buffer(lfunc)) + c_Muvfunc = Muvfunc + c_Mhfunc = Mhfunc + c_lfunc = lfunc lfunc_MINI = np.zeros(len(redshifts) * nbins) Muvfunc_MINI = np.zeros(len(redshifts) * nbins) @@ -223,9 +230,13 @@ def compute_luminosity_function( Muvfunc_MINI.shape = (len(redshifts), nbins) Mhfunc_MINI.shape = (len(redshifts), nbins) - c_Muvfunc_MINI = ffi.cast("double *", ffi.from_buffer(Muvfunc_MINI)) - c_Mhfunc_MINI = ffi.cast("double *", ffi.from_buffer(Mhfunc_MINI)) - c_lfunc_MINI = ffi.cast("double *", ffi.from_buffer(lfunc_MINI)) + # WIP: CFFI Refactor + # c_Muvfunc_MINI = ffi.cast("double *", ffi.from_buffer(Muvfunc_MINI)) + # c_Mhfunc_MINI = ffi.cast("double *", ffi.from_buffer(Mhfunc_MINI)) + # c_lfunc_MINI = ffi.cast("double *", ffi.from_buffer(lfunc_MINI)) + c_Muvfunc_MINI = Muvfunc_MINI + c_Mhfunc_MINI = Mhfunc_MINI + c_lfunc_MINI = lfunc_MINI if component in ("both", "acg"): # Run the C code @@ -237,8 +248,11 @@ def compute_luminosity_function( flag_options.cstruct, 1, len(redshifts), - ffi.cast("float *", ffi.from_buffer(redshifts)), - ffi.cast("float *", ffi.from_buffer(mturnovers)), + # WIP: CFFI Refactor + # ffi.cast("float *", ffi.from_buffer(redshifts)), + # ffi.cast("float *", ffi.from_buffer(mturnovers)), + redshifts, + mturnovers, c_Muvfunc, c_Mhfunc, c_lfunc, @@ -268,8 +282,11 @@ def compute_luminosity_function( flag_options.cstruct, 2, len(redshifts), - ffi.cast("float *", ffi.from_buffer(redshifts)), - ffi.cast("float *", ffi.from_buffer(mturnovers_mini)), + # WIP: CFFI Refactor + # ffi.cast("float *", ffi.from_buffer(redshifts)), + # ffi.cast("float *", ffi.from_buffer(mturnovers_mini)), + redshifts, + mturnovers_mini, c_Muvfunc_MINI, c_Mhfunc_MINI, c_lfunc_MINI, @@ -1223,17 +1240,28 @@ def halo_sample_test( flag_options.cstruct, 12345, n_cond, - ffi.cast("float *", cond_array.ctypes.data), - ffi.cast("int *", crd_in.ctypes.data), + # WIP: CFFI Refactor + # ffi.cast("float *", cond_array.ctypes.data), + # ffi.cast("int *", crd_in.ctypes.data), + cond_array.ctypes.data, + crd_in.ctypes.data, redshift, z_prev, - ffi.cast("int *", nhalo_out.ctypes.data), - ffi.cast("int *", N_out.ctypes.data), - ffi.cast("double *", exp_N.ctypes.data), - ffi.cast("double *", M_out.ctypes.data), - ffi.cast("double *", exp_M.ctypes.data), - ffi.cast("float *", halomass_out.ctypes.data), - ffi.cast("int *", halocrd_out.ctypes.data), + # WIP: CFFI Refactor + # ffi.cast("int *", nhalo_out.ctypes.data), + # ffi.cast("int *", N_out.ctypes.data), + # ffi.cast("double *", exp_N.ctypes.data), + # ffi.cast("double *", M_out.ctypes.data), + # ffi.cast("double *", exp_M.ctypes.data), + # ffi.cast("float *", halomass_out.ctypes.data), + # ffi.cast("int *", halocrd_out.ctypes.data), + nhalo_out, + N_out, + exp_N, + M_out, + exp_M, + halomass_out, + halocrd_out, ) return { @@ -1309,7 +1337,9 @@ def convert_halo_properties( fake_pthalos._init_cstruct() # single element zero array to act as the grids (vcb, J_21_LW, z_reion, Gamma12) - zero_array = ffi.cast("float *", np.zeros(1).ctypes.data) + # WIP: CFFI Refactor + # zero_array = ffi.cast("float *", np.zeros(1).ctypes.data) + zero_array = np.zeros(1) out_buffer = np.zeros(12 * halo_masses.size).astype("f4") lib.test_halo_props( @@ -1323,7 +1353,9 @@ def convert_halo_properties( zero_array, # z_re zero_array, # Gamma12 fake_pthalos(), - ffi.cast("float *", out_buffer.ctypes.data), + # WIP: CFFI Refactor + # ffi.cast("float *", out_buffer.ctypes.data), + out_buffer, ) out_buffer = out_buffer.reshape(fake_pthalos.n_halos, 12) diff --git a/src/py21cmfast/wrapper/globals.py b/src/py21cmfast/wrapper/globals.py index 4c051180c..daf72c385 100644 --- a/src/py21cmfast/wrapper/globals.py +++ b/src/py21cmfast/wrapper/globals.py @@ -7,9 +7,12 @@ from .._cfg import config from .._data import DATA_PATH import py21cmfast.c_21cmfast as lib + +# WIP: CFFI Refactor from .structs import StructInstanceWrapper +# WIP: CFFI Refactor class GlobalParams(StructInstanceWrapper): """ Global parameters for 21cmFAST. @@ -262,12 +265,19 @@ class GlobalParams(StructInstanceWrapper): Avg value of the DM-b relative velocity [im km/s], ~0.9*SIGMAVCB (=25.86 km/s) normally. """ - def __init__(self, wrapped, ffi): - super().__init__(wrapped, ffi) + # def __init__(self, wrapped, ffi): + # super().__init__(wrapped, ffi) + # + # self.external_table_path = ffi.new("char[]", str(DATA_PATH).encode()) + # self._wisdoms_path = Path(config["direc"]) / "wisdoms" + # self.wisdoms_path = ffi.new("char[]", str(self._wisdoms_path).encode()) + def __init__(self, wrapped): + super().__init__(wrapped) - self.external_table_path = ffi.new("char[]", str(DATA_PATH).encode()) - self._wisdoms_path = Path(config["direc"]) / "wisdoms" - self.wisdoms_path = ffi.new("char[]", str(self._wisdoms_path).encode()) + self._cobj.set_external_table_path(str(DATA_PATH)) + _wisdoms_path = Path(config["direc"]) / "wisdoms" + self._cobj.set_wisdoms_path(str(_wisdoms_path)) + self.wisdoms_path = str(_wisdoms_path).encode() @property def external_table_path(self): @@ -281,10 +291,11 @@ def external_table_path(self, val): @property def wisdoms_path(self): """An ffi char pointer to the path to which external tables are kept.""" - if not self._wisdoms_path.exists(): - self._wisdoms_path.mkdir(parents=True) + wisdoms_path = Path(self._cobj.get_wisdoms_path()) + if not wisdoms_path.exists(): + wisdoms_path.mkdir(parents=True) - return self._wisdom_path + return wisdoms_path @wisdoms_path.setter def wisdoms_path(self, val): @@ -332,4 +343,6 @@ def validate(self): ) -global_params = GlobalParams(lib.global_params, ffi) +# WIP: CFFI Refactor +global_params = GlobalParams(lib.get_global_params()) +# global_params = lib.GlobalParams() diff --git a/src/py21cmfast/wrapper/photoncons.py b/src/py21cmfast/wrapper/photoncons.py index 005495a4d..ca72249c9 100644 --- a/src/py21cmfast/wrapper/photoncons.py +++ b/src/py21cmfast/wrapper/photoncons.py @@ -91,8 +91,11 @@ def _calibrate_photon_conservation_correction( redshifts_estimate = np.array(redshifts_estimate, dtype="float64") nf_estimate = np.array(nf_estimate, dtype="float64") - z = ffi.cast("double *", ffi.from_buffer(redshifts_estimate)) - xHI = ffi.cast("double *", ffi.from_buffer(nf_estimate)) + # WIP: CFFI Refactor + # z = ffi.cast("double *", ffi.from_buffer(redshifts_estimate)) + # xHI = ffi.cast("double *", ffi.from_buffer(nf_estimate)) + z = redshifts_estimate + xHI = nf_estimate logger.debug(f"PhotonCons nf estimates: {nf_estimate}") return lib.PhotonCons_Calibration(z, xHI, NSpline) @@ -138,18 +141,28 @@ def _get_photon_nonconservation_data(): IntVal2 = np.array(np.zeros(1), dtype="int32") IntVal3 = np.array(np.zeros(1), dtype="int32") - c_z_at_Q = ffi.cast("double *", ffi.from_buffer(data[0])) - c_Qval = ffi.cast("double *", ffi.from_buffer(data[1])) - c_z_cal = ffi.cast("double *", ffi.from_buffer(data[2])) - c_nf_cal = ffi.cast("double *", ffi.from_buffer(data[3])) - c_PC_nf = ffi.cast("double *", ffi.from_buffer(data[4])) - c_PC_deltaz = ffi.cast("double *", ffi.from_buffer(data[5])) - - c_int_NQ = ffi.cast("int *", ffi.from_buffer(IntVal1)) - c_int_NC = ffi.cast("int *", ffi.from_buffer(IntVal2)) - c_int_NP = ffi.cast("int *", ffi.from_buffer(IntVal3)) - - c_int_NP = IntVal3.ctypes.data_as(c_void_p) + # WIP: CFFI Refactor + # c_z_at_Q = ffi.cast("double *", ffi.from_buffer(data[0])) + # c_Qval = ffi.cast("double *", ffi.from_buffer(data[1])) + # c_z_cal = ffi.cast("double *", ffi.from_buffer(data[2])) + # c_nf_cal = ffi.cast("double *", ffi.from_buffer(data[3])) + # c_PC_nf = ffi.cast("double *", ffi.from_buffer(data[4])) + # c_PC_deltaz = ffi.cast("double *", ffi.from_buffer(data[5])) + # + # c_int_NQ = ffi.cast("int *", ffi.from_buffer(IntVal1)) + # c_int_NC = ffi.cast("int *", ffi.from_buffer(IntVal2)) + # c_int_NP = ffi.cast("int *", ffi.from_buffer(IntVal3)) + + c_z_at_Q = data[0] + c_Qval = data[1] + c_z_cal = data[2] + c_nf_cal = data[3] + c_PC_nf = data[4] + c_PC_deltaz = data[5] + + c_int_NQ = IntVal1 + c_int_NC = IntVal2 + c_int_NP = IntVal3 # Run the C code errcode = lib.ObtainPhotonConsData( @@ -419,9 +432,13 @@ def get_photoncons_dz(astro_params, flag_options, redshift): lib.adjust_redshifts_for_photoncons( astro_params.cstruct, flag_options.cstruct, - ffi.cast("float *", redshift_pc_in.ctypes.data), - ffi.cast("float *", stored_redshift_pc_in.ctypes.data), - ffi.cast("float *", deltaz.ctypes.data), + # WIP: CFFI Refactor + # ffi.cast("float *", redshift_pc_in.ctypes.data), + # ffi.cast("float *", stored_redshift_pc_in.ctypes.data), + # ffi.cast("float *", deltaz.ctypes.data), + redshift_pc_in, + stored_redshift_pc_in, + deltaz, ) return redshift_pc_in[0], stored_redshift_pc_in[0], deltaz[0] @@ -445,7 +462,9 @@ def photoncons_alpha(cosmo_params, user_params, astro_params, flag_options): # TODO: Move the deltaz interp tables to python if not lib.photon_cons_allocated: lib.determine_deltaz_for_photoncons() - lib.photon_cons_allocated = ffi.cast("bool", True) + # WIP: CFFI Refactor + # lib.photon_cons_allocated = ffi.cast("bool", True) + lib.photon_cons_allocated = True # Q(analytic) limits to fit the curve max_q_fit = 0.99 @@ -622,7 +641,9 @@ def photoncons_fesc(cosmo_params, user_params, astro_params, flag_options): # HACK: I need to allocate the deltaz arrays so I can return the other ones properly, this isn't a great solution if not lib.photon_cons_allocated: lib.determine_deltaz_for_photoncons() - lib.photon_cons_allocated = ffi.cast("bool", True) + # WIP: CFFI Refactor + # lib.photon_cons_allocated = ffi.cast("bool", True) + lib.photon_cons_allocated = True # Q(analytic) limits to fit the curve max_q_fit = 0.99 diff --git a/src/py21cmfast/wrapper/structs.py b/src/py21cmfast/wrapper/structs.py index 5a00262da..16e06541d 100644 --- a/src/py21cmfast/wrapper/structs.py +++ b/src/py21cmfast/wrapper/structs.py @@ -44,7 +44,8 @@ class StructWrapper: _name: str = attrs.field(converter=str) cstruct = attrs.field(default=None) - _ffi = attrs.field(default=ffi) + # WIP: CFFI Refactor + # _ffi = attrs.field(default=ffi) @_name.default def _name_default(self): @@ -60,12 +61,16 @@ def __init__(self, *args): def _new(self): """Return a new empty C structure corresponding to this class.""" - return self._ffi.new(f"struct {self._name}*") + # WIP: CFFI Refactor + # return self._ffi.new(f"struct {self._name}*") + raise NotImplementedError @property def fields(self) -> dict[str, Any]: """A list of fields of the underlying C struct (a list of tuples of "name, type").""" - return dict(self._ffi.typeof(self.cstruct[0]).fields) + # WIP: CFFI Refactor + # return dict(self._ffi.typeof(self.cstruct[0]).fields) + raise NotImplementedError @property def fieldnames(self) -> list[str]: @@ -87,7 +92,9 @@ def __getstate__(self): return { k: v for k, v in self.__dict__.items() - if k not in ["_strings", "cstruct", "_ffi"] + # WIP: CFFI Refactor + # if k not in ["_strings", "cstruct", "_ffi"] + if k not in ["_strings", "cstruct"] } @@ -156,7 +163,9 @@ def cstruct(self) -> StructWrapper: if isinstance(val, str): # If it is a string, need to convert it to C string ourselves. - val = self.ffi.new("char[]", val.encode()) + # WIP: CFFI Refactor + # val = self.ffi.new("char[]", val.encode()) + raise NotImplementedError setattr(self.struct.cstruct, k, val) @@ -437,10 +446,12 @@ def _init_cstruct(self): def _ary2buf(self, ary): if not isinstance(ary, np.ndarray): raise ValueError("ary must be a numpy array") - return self.struct._ffi.cast( - OutputStruct._TYPEMAP[ary.dtype.name], self.struct._ffi.from_buffer( - ary) - ) + # WIP: CFFI Refactor + # return self.struct._ffi.cast( + # OutputStruct._TYPEMAP[ary.dtype.name], self.struct._ffi.from_buffer( + # ary) + # ) + raise NotImplementedError def __call__(self): """Return the C structure, will initialise if not already initialised.""" @@ -1250,15 +1261,29 @@ class StructInstanceWrapper: The ``cffi.ffi`` object. """ - def __init__(self, wrapped, ffi): + # WIP: CFFI Refactor + # def __init__(self, wrapped, ffi): + def __init__(self, wrapped): self._cobj = wrapped - self._ffi = ffi - - for nm, tp in self._ffi.typeof(self._cobj).fields: - setattr(self, nm, getattr(self._cobj, nm)) + # WIP: CFFI Refactor + # self._ffi = ffi + + # WIP: CFFI Refactor + # for nm, tp in self._ffi.typeof(self._cobj).fields: + # setattr(self, nm, getattr(self._cobj, nm)) + # nanobind does not supply a list of fileds like CFFI does, so we do + # this instead to return a list of members + for attr in dir(self._cobj): + if not attr.startswith("__") and not callable(getattr(self._cobj, attr)): + print("CCC:", attr, getattr(self._cobj, attr)) + setattr(self, attr, getattr(self._cobj, attr)) # Get the name of the structure - self._ctype = self._ffi.typeof(self._cobj).cname.split()[-1] + # WIP: CFFI Refactor + # self._ctype = self._ffi.typeof(self._cobj).cname.split()[-1] + self._ctype = type(self._cobj).__name__ + + print("WWWWWWWWWWW:", self) def __setattr__(self, name, value): """Set an attribute of the instance, attempting to change it in the C struct as well.""" @@ -1268,8 +1293,14 @@ def __setattr__(self, name, value): def items(self): """Yield (name, value) pairs for each element of the struct.""" - for nm, tp in self._ffi.typeof(self._cobj).fields: - yield nm, getattr(self, nm) + # WIP: CFFI Refactor + # for nm, tp in self._ffi.typeof(self._cobj).fields: + # yield nm, getattr(self, nm) + # nanobind does not supply a list of fileds like CFFI does, so we do + # this instead to return a list of members + for attr in dir(self._cobj): + if not attr.startswith("__") and not callable(getattr(self._cobj, attr)): + yield attr, getattr(self, attr) def keys(self): """Return a list of names of elements in the struct.""" diff --git a/tests/produce_integration_test_data.py b/tests/produce_integration_test_data.py index f2ee4a2b2..40e2c1289 100644 --- a/tests/produce_integration_test_data.py +++ b/tests/produce_integration_test_data.py @@ -294,9 +294,11 @@ def get_input_struct(kwargs, cls): def get_all_input_structs(kwargs): - flag_options = get_input_struct({**DEFAULT_FLAG_OPTIONS, **kwargs}, FlagOptions) + flag_options = get_input_struct( + {**DEFAULT_FLAG_OPTIONS, **kwargs}, FlagOptions) cosmo_params = get_input_struct(kwargs, CosmoParams) - user_params = get_input_struct({**DEFAULT_USER_PARAMS, **kwargs}, UserParams) + user_params = get_input_struct( + {**DEFAULT_USER_PARAMS, **kwargs}, UserParams) kwargs_a = kwargs.copy() kwargs_a.update({"flag_options": flag_options}) @@ -663,19 +665,22 @@ def go( kwargs = OPTIONS[name][1] fnames.append( - produce_power_spectra_for_tests(name, redshift, force, direc, **kwargs) + produce_power_spectra_for_tests( + name, redshift, force, direc, **kwargs) ) if not no_pt: for name, (redshift, kwargs) in OPTIONS_PT.items(): fnames.append( - produce_data_for_perturb_field_tests(name, redshift, force, **kwargs) + produce_data_for_perturb_field_tests( + name, redshift, force, **kwargs) ) if not no_halo: for name, (redshift, kwargs) in OPTIONS_HALO.items(): fnames.append( - produce_data_for_halo_field_tests(name, redshift, force, **kwargs) + produce_data_for_halo_field_tests( + name, redshift, force, **kwargs) ) # Remove extra files that diff --git a/tests/test_exceptions.py b/tests/test_exceptions.py index 43143abff..44d46886d 100644 --- a/tests/test_exceptions.py +++ b/tests/test_exceptions.py @@ -21,17 +21,24 @@ def test_simple(subfunc): answer = np.array([0], dtype="f8") with pytest.raises(ParameterError): status = lib.FunctionThatCatches( - subfunc, False, ffi.cast("double *", ffi.from_buffer(answer)) + # WIP: CFFI Refactor + # subfunc, False, ffi.cast("double *", ffi.from_buffer(answer)) + subfunc, + False, + answer, ) _process_exitcode( status, lib.FunctionThatCatches, - (False, ffi.cast("double *", ffi.from_buffer(answer))), + # WIP: CFFI Refactor + # (False, ffi.cast("double *", ffi.from_buffer(answer))), + (False, answer), ) def test_pass(): answer = np.array([0], dtype="f8") - lib.FunctionThatCatches(True, True, ffi.cast( - "double *", ffi.from_buffer(answer))) + # WIP: CFFI Refactor + # lib.FunctionThatCatches(True, True, ffi.cast( "double *", ffi.from_buffer(answer))) + lib.FunctionThatCatches(True, True, answer) assert answer == 5.0 diff --git a/tests/test_filtering.py b/tests/test_filtering.py index 08d10c5bc..a22ff9809 100644 --- a/tests/test_filtering.py +++ b/tests/test_filtering.py @@ -119,11 +119,15 @@ def test_filters(filter_flag, R, plt): cp.cstruct, ap.cstruct, fo.cstruct, - ffi.cast("float *", input_box_centre.ctypes.data), + # WIP: CFFI Refactor + # ffi.cast("float *", input_box_centre.ctypes.data), + input_box_centre.ctypes.data, R, R_param, filter_flag, - ffi.cast("double *", output_box_centre.ctypes.data), + # WIP: CFFI Refactor + # ffi.cast("double *", output_box_centre.ctypes.data), + output_box_centre.ctypes.data, ) # expected outputs given in cell units diff --git a/tests/test_tables.py b/tests/test_tables.py index 23271c2f5..7a9265573 100644 --- a/tests/test_tables.py +++ b/tests/test_tables.py @@ -1,4 +1,4 @@ -from py21cmfast.c_21cmfast import lib +import py21cmfast.c_21cmfast as lib def test_init_heat(): From 442e05ad40f92218ffc64e58e472f68d07b56004 Mon Sep 17 00:00:00 2001 From: Gregory Poole Date: Fri, 11 Apr 2025 12:11:50 +1000 Subject: [PATCH 103/145] First attempt at implementing fields method of StructWrapper --- src/py21cmfast/wrapper/structs.py | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/src/py21cmfast/wrapper/structs.py b/src/py21cmfast/wrapper/structs.py index 16e06541d..07bc36a6b 100644 --- a/src/py21cmfast/wrapper/structs.py +++ b/src/py21cmfast/wrapper/structs.py @@ -57,20 +57,30 @@ def __init__(self, *args): This instantiates the memory associated with the C struct, attached to this inst. """ self.__attrs_init__(*args) + if args[0] == "InitialConditions": + self._cobj = lib.InitialConditions + else: + raise NotImplementedError( + "Wrapped class {args[0]} not listed as an option in StructWrapper." + ) self.cstruct = self._new() def _new(self): """Return a new empty C structure corresponding to this class.""" # WIP: CFFI Refactor # return self._ffi.new(f"struct {self._name}*") - raise NotImplementedError + return self._cobj() @property def fields(self) -> dict[str, Any]: """A list of fields of the underlying C struct (a list of tuples of "name, type").""" # WIP: CFFI Refactor # return dict(self._ffi.typeof(self.cstruct[0]).fields) - raise NotImplementedError + result = {} + for attr in dir(self._cobj): + if not attr.startswith("__") and not callable(getattr(self._cobj, attr)): + result[attr] = type(getattr(self._cobj, attr)) + return result @property def fieldnames(self) -> list[str]: @@ -80,12 +90,16 @@ def fieldnames(self) -> list[str]: @property def pointer_fields(self) -> list[str]: """A list of names of fields which have pointer type in the C struct.""" - return [f for f, t in self.fields.items() if t.type.kind == "pointer"] + # WIP: CFFI Refactor + # return [f for f, t in self.fields.items() if t.type.kind == "pointer"] + raise NotImplementedError @property def primitive_fields(self) -> list[str]: """The list of names of fields which have primitive type in the C struct.""" - return [f for f, t in self.fields.items() if t.type.kind == "primitive"] + # WIP: CFFI Refactor + # return [f for f, t in self.fields.items() if t.type.kind == "primitive"] + raise NotImplementedError def __getstate__(self): """Return the current state of the class without pointers.""" @@ -1275,7 +1289,6 @@ def __init__(self, wrapped): # this instead to return a list of members for attr in dir(self._cobj): if not attr.startswith("__") and not callable(getattr(self._cobj, attr)): - print("CCC:", attr, getattr(self._cobj, attr)) setattr(self, attr, getattr(self._cobj, attr)) # Get the name of the structure @@ -1283,8 +1296,6 @@ def __init__(self, wrapped): # self._ctype = self._ffi.typeof(self._cobj).cname.split()[-1] self._ctype = type(self._cobj).__name__ - print("WWWWWWWWWWW:", self) - def __setattr__(self, name, value): """Set an attribute of the instance, attempting to change it in the C struct as well.""" with contextlib.suppress(AttributeError): From e07a8a6930c4e462a01e995e08df15cb4f4f48ea Mon Sep 17 00:00:00 2001 From: JHu Date: Mon, 14 Apr 2025 15:29:27 +1000 Subject: [PATCH 104/145] fix building error raised by newer version of setuptools --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 235264bae..ca4cd83e6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,5 @@ [build-system] -requires = ["setuptools>=42", "wheel", "setuptools_scm[toml]>=3.4"] +requires = ["setuptools>=42,<76", "wheel", "setuptools_scm[toml]>=3.4"] build-backend = "setuptools.build_meta" [tool.setuptools_scm] From ee1e0bfe67f77701e9320151141d3f65c9da211a Mon Sep 17 00:00:00 2001 From: JHu Date: Tue, 15 Apr 2025 06:22:19 +1000 Subject: [PATCH 105/145] add use_cuda condition to initialcondition and stochasticity --- src/py21cmfast/src/InitialConditions.c | 8 +- src/py21cmfast/src/Stochasticity.c | 316 ++++++++++++------------- 2 files changed, 152 insertions(+), 172 deletions(-) diff --git a/src/py21cmfast/src/InitialConditions.c b/src/py21cmfast/src/InitialConditions.c index 9c948d015..492331af3 100644 --- a/src/py21cmfast/src/InitialConditions.c +++ b/src/py21cmfast/src/InitialConditions.c @@ -24,7 +24,6 @@ #include "InitialConditions.h" #include "hello_world.h" -#include "device_rng.cuh" void adj_complex_conj(fftwf_complex *HIRES_box, UserParams *user_params, CosmoParams *cosmo_params){ /***** Adjust the complex conjugate relations for a real array *****/ @@ -91,8 +90,11 @@ int ComputeInitialConditions( int status; - printf("Start computing initial conditions\n"); - call_cuda(); + // tmp: ensure hello_world works on GPU + bool use_cuda=true; + if (use_cuda){ + printf("Start computing initial conditions\n"); + call_cuda();} Try{ // This Try wraps the entire function so we don't indent. diff --git a/src/py21cmfast/src/Stochasticity.c b/src/py21cmfast/src/Stochasticity.c index 145ef9ee8..a4ae1dd18 100644 --- a/src/py21cmfast/src/Stochasticity.c +++ b/src/py21cmfast/src/Stochasticity.c @@ -931,159 +931,132 @@ int sample_halo_progenitors(gsl_rng **rng_arr, double z_in, double z_out, HaloFi double corr_arr[3] = {hs_constants->corr_star,hs_constants->corr_sfr,hs_constants->corr_xray}; - // get parameters needed for sigma calculation - double x_min = sigma_table->x_min; - double x_width = sigma_table->x_width; - int sigma_bin = sigma_table->n_bin; - float *sigma_y_arr = sigma_table->y_arr; - - // Create a copy of hs_constants for passing to cuda - struct HaloSamplingConstants d_hs_constants; - d_hs_constants = *hs_constants; - - // get in halo data - float *halo_m = halofield_in->halo_masses; - float *halo_star_rng = halofield_in->star_rng; - float *halo_sfr_rng = halofield_in->sfr_rng; - float *halo_xray_rng = halofield_in->xray_rng; - int *halo_c = halofield_in->halo_coords; - - // call cuda function here - printf("Start cuda calculation for progenitors. "); - print_current_time(); - updateHaloOut(halo_m, halo_star_rng, halo_sfr_rng, - halo_xray_rng,halo_c,nhalo_in, sigma_y_arr, - sigma_bin, x_min, x_width, d_hs_constants, - arraysize_total, halofield_out); - printf("End cuda calculation for progenitors. "); - print_current_time(); + // use cuda function if use_cuda is true + bool use_cuda = true; // pass this as a parameter later + if (use_cuda){ + // get parameters needed for sigma calculation + double x_min = sigma_table->x_min; + double x_width = sigma_table->x_width; + int sigma_bin = sigma_table->n_bin; + float *sigma_y_arr = sigma_table->y_arr; + + // Create a copy of hs_constants for passing to cuda + struct HaloSamplingConstants d_hs_constants; + d_hs_constants = *hs_constants; + + // get in halo data + float *halo_m = halofield_in->halo_masses; + float *halo_star_rng = halofield_in->star_rng; + float *halo_sfr_rng = halofield_in->sfr_rng; + float *halo_xray_rng = halofield_in->xray_rng; + int *halo_c = halofield_in->halo_coords; + + printf("Start cuda calculation for progenitors. "); + print_current_time(); + updateHaloOut(halo_m, halo_star_rng, halo_sfr_rng, + halo_xray_rng,halo_c,nhalo_in, sigma_y_arr, + sigma_bin, x_min, x_width, d_hs_constants, + arraysize_total, halofield_out); + printf("End cuda calculation for progenitors. "); + print_current_time(); + }else{ // CPU fallback +#pragma omp parallel num_threads(user_params_global->N_THREADS) + { + float prog_buf[MAX_HALO_CELL]; + int n_prog; + double M_prog; - // 2025-01-23 tmp: original processing in C only (start) -// // get max halo mass -// double max_halo_m = get_max_nhalo(hs_constants, halofield_in->halo_masses, nhalo_in); -// printf("The evaluated N halo is : %f \n", max_halo_m); - -// // check n_prog > 4 -// int nprog_check = 0; - -// // // tiger tmp: debug (start) -// // double res1, res2, res3, res4; -// // res1 = EvaluateNhaloInv(18.694414138793945, 0.0046723012881037529); -// // printf("tmp res1: %.17f \n", res1); -// // res2 = EvaluateNhaloInv(20.084152221679688, 0.32153863360286256); -// // printf("tmp res2: %.17f \n", res2); -// // res3 = EvaluateNhaloInv(26.806314468383789, 0.8698794976081996); -// // printf("tmp res3: %.17f \n", res3); -// // res4 = EvaluateNhaloInv(19.00053596496582, 0.83130413049947305); -// // printf("tmp res4: %.17f \n", res4); -// // // tiger tmp: debug (end) - -// #pragma omp parallel num_threads(user_params_global->N_THREADS) -// { -// float prog_buf[MAX_HALO_CELL]= {0}; -// int n_prog; -// double M_prog; - -// double propbuf_in[3]; -// double propbuf_out[3]; - -// int threadnum = omp_get_thread_num(); -// double M2; -// int jj; -// unsigned long long int ii; -// unsigned long long int count=0; -// unsigned long long int istart = threadnum * arraysize_local; - -// //we need a private version -// //also the naming convention should be better between structs/struct pointers -// struct HaloSamplingConstants hs_constants_priv; -// hs_constants_priv = *hs_constants; - -// #pragma omp for -// for(ii=0;iihalo_masses[ii]; -// if(M2 < Mmin || M2 > Mmax_tb){ -// printf("got an outlier."); -// LOG_ERROR("Input Mass = %.2e at %llu of %llu, something went wrong in the input catalogue",M2,ii,nhalo_in); -// Throw(ValueError); -// } -// //set condition-dependent variables for sampling -// stoc_set_consts_cond(&hs_constants_priv,M2); -// // tiger tmp debug (start) -// if (ii == 160 || ii == 680 || ii == 10792){ -// printf("temp check.\n"); -// } -// // tiger tmp dubug (end) - -// //Sample the CMF set by the descendant -// stoc_sample(&hs_constants_priv,rng_arr[threadnum],&n_prog,prog_buf); - -// if (n_prog >=100){ -// printf("The number of progenitors at z %.1f and halo %llu: %d \n", z_in, ii, n_prog); -// } - -// if (n_prog == 2){ -// nprog_check += 1; -// } - -// propbuf_in[0] = halofield_in->star_rng[ii]; -// propbuf_in[1] = halofield_in->sfr_rng[ii]; -// propbuf_in[2] = halofield_in->xray_rng[ii]; - -// //place progenitors in local list -// M_prog = 0; -// for(jj=0;jjSAMPLER_MIN_MASS) continue; - -// if(count >= arraysize_local){ -// LOG_ERROR("More than %llu halos (expected %.1e) with buffer size factor %.1f", -// arraysize_local,arraysize_local/user_params_global->MAXHALO_FACTOR,user_params_global->MAXHALO_FACTOR); -// LOG_ERROR("If you expected to have an above average halo number try raising user_params_global->MAXHALO_FACTOR"); -// Throw(ValueError); -// } - -// set_prop_rng(rng_arr[threadnum], true, corr_arr, propbuf_in, propbuf_out); - -// halofield_out->halo_masses[istart + count] = prog_buf[jj]; -// halofield_out->halo_coords[3*(istart + count) + 0] = halofield_in->halo_coords[3*ii+0]; -// halofield_out->halo_coords[3*(istart + count) + 1] = halofield_in->halo_coords[3*ii+1]; -// halofield_out->halo_coords[3*(istart + count) + 2] = halofield_in->halo_coords[3*ii+2]; - -// halofield_out->star_rng[istart + count] = propbuf_out[0]; -// halofield_out->sfr_rng[istart + count] = propbuf_out[1]; -// halofield_out->xray_rng[istart + count] = propbuf_out[2]; -// // printf("prop out: %f, %f, %f \n", propbuf_out[0], propbuf_out[1], propbuf_out[2]); -// count++; - -// if(ii==0){ -// M_prog += prog_buf[jj]; - -// LOG_ULTRA_DEBUG("First Halo Prog %d: Mass %.2e Stellar %.2e SFR %.2e XRAY %.2e e_d %.3f", -// jj,prog_buf[jj],propbuf_out[0],propbuf_out[1],propbuf_out[2], -// Deltac*hs_constants->growth_out/hs_constants->growth_in); -// } -// } -// if(ii==0){ -// LOG_ULTRA_DEBUG(" HMF %d delta %.3f delta_coll %.3f delta_desc %.3f adjusted %.3f",user_params_global->HMF, -// hs_constants_priv.delta, -// get_delta_crit(user_params_global->HMF,hs_constants_priv.sigma_cond,hs_constants->growth_out), -// get_delta_crit(user_params_global->HMF,hs_constants_priv.sigma_cond,hs_constants->growth_in), -// get_delta_crit(user_params_global->HMF,hs_constants_priv.sigma_cond,hs_constants->growth_in) -// *hs_constants->growth_out/hs_constants->growth_in); - // print_hs_consts(&hs_constants_priv); - // LOG_SUPER_DEBUG("First Halo: Mass %.2f | N %d (exp. %.2e) | Total M %.2e (exp. %.2e)", - // M2,n_prog,hs_constants_priv.expected_N,M_prog,hs_constants_priv.expected_M); - // } - // } - // istart_threads[threadnum] = istart; - // nhalo_threads[threadnum] = count; - // } - // printf("The number of halos with nprog == 2 is: %llu\n", nprog_check); - // condense_sparse_halolist(halofield_out, istart_threads, nhalo_threads); + double propbuf_in[3]; + double propbuf_out[3]; + + int threadnum = omp_get_thread_num(); + double M2; + int jj; + unsigned long long int ii; + unsigned long long int count = 0; + unsigned long long int istart = threadnum * arraysize_local; + + // we need a private version + // also the naming convention should be better between structs/struct pointers + struct HaloSamplingConstants hs_constants_priv; + hs_constants_priv = *hs_constants; + +#pragma omp for + for (ii = 0; ii < nhalo_in; ii++) + { + M2 = halofield_in->halo_masses[ii]; + if (M2 < Mmin || M2 > Mmax_tb) + { + LOG_ERROR("Input Mass = %.2e at %llu of %llu, something went wrong in the input catalogue", M2, ii, nhalo_in); + Throw(ValueError); + } + // set condition-dependent variables for sampling + stoc_set_consts_cond(&hs_constants_priv, M2); + + // Sample the CMF set by the descendant + stoc_sample(&hs_constants_priv, rng_arr[threadnum], &n_prog, prog_buf); + + propbuf_in[0] = halofield_in->star_rng[ii]; + propbuf_in[1] = halofield_in->sfr_rng[ii]; + propbuf_in[2] = halofield_in->xray_rng[ii]; + + // place progenitors in local list + M_prog = 0; + for (jj = 0; jj < n_prog; jj++) + { + // sometimes halos are subtracted from the sample (set to zero) + // we do not want to save these + if (prog_buf[jj] < user_params_global->SAMPLER_MIN_MASS) + continue; + + if (count >= arraysize_local) + { + LOG_ERROR("More than %llu halos (expected %.1e) with buffer size factor %.1f", + arraysize_local, arraysize_local / user_params_global->MAXHALO_FACTOR, user_params_global->MAXHALO_FACTOR); + LOG_ERROR("If you expected to have an above average halo number try raising user_params_global->MAXHALO_FACTOR"); + Throw(ValueError); + } + + set_prop_rng(rng_arr[threadnum], true, corr_arr, propbuf_in, propbuf_out); + + halofield_out->halo_masses[istart + count] = prog_buf[jj]; + halofield_out->halo_coords[3 * (istart + count) + 0] = halofield_in->halo_coords[3 * ii + 0]; + halofield_out->halo_coords[3 * (istart + count) + 1] = halofield_in->halo_coords[3 * ii + 1]; + halofield_out->halo_coords[3 * (istart + count) + 2] = halofield_in->halo_coords[3 * ii + 2]; + + halofield_out->star_rng[istart + count] = propbuf_out[0]; + halofield_out->sfr_rng[istart + count] = propbuf_out[1]; + halofield_out->xray_rng[istart + count] = propbuf_out[2]; + count++; + + if (ii == 0) + { + M_prog += prog_buf[jj]; + + LOG_ULTRA_DEBUG("First Halo Prog %d: Mass %.2e Stellar %.2e SFR %.2e XRAY %.2e e_d %.3f", + jj, prog_buf[jj], propbuf_out[0], propbuf_out[1], propbuf_out[2], + Deltac * hs_constants->growth_out / hs_constants->growth_in); + } + } + if (ii == 0) + { + LOG_ULTRA_DEBUG(" HMF %d delta %.3f delta_coll %.3f delta_desc %.3f adjusted %.3f", user_params_global->HMF, + hs_constants_priv.delta, + get_delta_crit(user_params_global->HMF, hs_constants_priv.sigma_cond, hs_constants->growth_out), + get_delta_crit(user_params_global->HMF, hs_constants_priv.sigma_cond, hs_constants->growth_in), + get_delta_crit(user_params_global->HMF, hs_constants_priv.sigma_cond, hs_constants->growth_in) * hs_constants->growth_out / hs_constants->growth_in); + print_hs_consts(&hs_constants_priv); + LOG_SUPER_DEBUG("First Halo: Mass %.2f | N %d (exp. %.2e) | Total M %.2e (exp. %.2e)", + M2, n_prog, hs_constants_priv.expected_N, M_prog, hs_constants_priv.expected_M); + } + } + istart_threads[threadnum] = istart; + nhalo_threads[threadnum] = count; + } + condense_sparse_halolist(halofield_out, istart_threads, nhalo_threads); + return 0; + } -// 2025-01-23 tmp: original processing in C only (end) return 0; } @@ -1105,18 +1078,21 @@ int stochastic_halofield(UserParams *user_params, CosmoParams *cosmo_params, struct HaloSamplingConstants hs_constants; stoc_set_consts_z(&hs_constants,redshift,redshift_desc); - // tmp: confirm we could access sigma table + // get interp tables needed for sampling progenitors RGTable1D *nhalo_table = GetNhaloTable(); RGTable1D *mcoll_table = GetMcollTable(); RGTable2D *nhalo_inv_table = GetNhaloInvTable(); RGTable1D_f *sigma_table = GetSigmaInterpTable(); - - // copy relevant tables to the device - copyTablesToDevice(*nhalo_table, *mcoll_table, *nhalo_inv_table); - // copy global variables to the device - updateGlobalParams(user_params_global, cosmo_params_global, astro_params_global); + bool use_cuda=true; + if (use_cuda){ + // copy the tables to the device + copyTablesToDevice(*nhalo_table, *mcoll_table, *nhalo_inv_table); + // copy global variables to the device + // todo: move the following operation to InitialConditions.c + updateGlobalParams(user_params_global, cosmo_params_global, astro_params_global);} + // Fill them // NOTE:Halos prev in the first box corresponds to the large DexM halos if (redshift_desc <= 0.) @@ -1124,24 +1100,26 @@ int stochastic_halofield(UserParams *user_params, CosmoParams *cosmo_params, LOG_DEBUG("building first halo field at z=%.1f", redshift); sample_halo_grids(rng_stoc,redshift,dens_field,halo_overlap_box,halos_desc,halos,&hs_constants); - // todo: add use_cuda/cuda_found condition here - // initiate rand states on the device - unsigned long long int nhalo_first = halos->n_halos; - int buffer_scale = HALO_CUDA_THREAD_FACTOR + 1; - unsigned long long int n_rstates = nhalo_first * buffer_scale; - printf("initializing %llu random states on the device... \n", n_rstates); - print_current_time(); - - init_rand_states(seed, n_rstates); - printf("finish initializing \n"); - print_current_time(); + if (use_cuda) { + // initiate rand states on the device + unsigned long long int nhalo_first = halos->n_halos; + int buffer_scale = HALO_CUDA_THREAD_FACTOR + 1; + unsigned long long int n_rstates = nhalo_first * buffer_scale; + printf("initializing %llu random states on the device... \n", n_rstates); + print_current_time(); + + init_rand_states(seed, n_rstates); + + printf("finish initializing \n");} + } else{ LOG_DEBUG("Calculating halo progenitors from z=%.1f to z=%.1f | %llu", redshift_desc,redshift,halos_desc->n_halos); sample_halo_progenitors(rng_stoc,redshift_desc,redshift,halos_desc,halos,&hs_constants, sigma_table); } - + printf("Found %llu Halos \n", halos->n_halos); + print_current_time(); LOG_DEBUG("Found %llu Halos", halos->n_halos); if(halos->n_halos >= 3){ From 4df15402ee62f589e301d0584d9eb7bfac76ecd7 Mon Sep 17 00:00:00 2001 From: JHu Date: Tue, 15 Apr 2025 08:35:32 +1000 Subject: [PATCH 106/145] condense other star, sfr, xray, coords array as well (reserve commenting out) --- src/py21cmfast/src/Stochasticity.cu | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/py21cmfast/src/Stochasticity.cu b/src/py21cmfast/src/Stochasticity.cu index 8b4f750da..f183787fb 100644 --- a/src/py21cmfast/src/Stochasticity.cu +++ b/src/py21cmfast/src/Stochasticity.cu @@ -863,6 +863,7 @@ int updateHaloOut(float *halo_masses, float *star_rng, float *sfr_rng, float *xr float *d_star_rng_out; CALL_CUDA(cudaMalloc(&d_star_rng_out, buffer_size)); CALL_CUDA(cudaMemset(d_star_rng_out, 0, buffer_size)); + // initializeArray(d_halo_masses_out, d_n_buffer, -1.2f); float *d_sfr_rng_out; CALL_CUDA(cudaMalloc(&d_sfr_rng_out, buffer_size)); @@ -948,17 +949,17 @@ int updateHaloOut(float *halo_masses, float *star_rng, float *sfr_rng, float *xr printf("The number of progenitors written in out halo field so far: %d \n", n_processed_prog); // condense other halo field arrays on the device - // unsigned long long int n_processed_star_rng = condenseDeviceArray(d_star_rng_out, d_n_buffer, 0.0f); - // printf("The number of star prop rng written in out halo field so far: %d \n", n_processed_star_rng); + unsigned long long int n_processed_star_rng = condenseDeviceArray(d_star_rng_out, d_n_buffer, 0.0f); + printf("The number of star prop rng written in out halo field so far: %d \n", n_processed_star_rng); - // unsigned long long int n_processed_sfr_rng = condenseDeviceArray(d_sfr_rng_out, d_n_buffer, 0.0f); - // printf("The number of sfr prop rng written in out halo field so far: %d \n", n_processed_sfr_rng); + unsigned long long int n_processed_sfr_rng = condenseDeviceArray(d_sfr_rng_out, d_n_buffer, 0.0f); + printf("The number of sfr prop rng written in out halo field so far: %d \n", n_processed_sfr_rng); - // unsigned long long int n_processed_xray_rng = condenseDeviceArray(d_xray_rng_out, d_n_buffer, 0.0f); - // printf("The number of xray prop rng written in out halo field so far: %d \n", n_processed_xray_rng); + unsigned long long int n_processed_xray_rng = condenseDeviceArray(d_xray_rng_out, d_n_buffer, 0.0f); + printf("The number of xray prop rng written in out halo field so far: %d \n", n_processed_xray_rng); - // unsigned long long int n_processed_coords = condenseDeviceArray(d_halo_coords_out, d_n_buffer*3, -1000); - // printf("The number of halo coords written in out halo field so far: %d \n", n_processed_coords); + unsigned long long int n_processed_coords = condenseDeviceArray(d_halo_coords_out, d_n_buffer*3, -1000); + printf("The number of halo coords written in out halo field so far: %d \n", n_processed_coords); // tmp: the following is just needed for debugging purpose // float *h_filter_halos; @@ -1044,6 +1045,6 @@ int updateHaloOut(float *halo_masses, float *star_rng, float *sfr_rng, float *xr CALL_CUDA(cudaGetLastError()); CALL_CUDA(cudaDeviceSynchronize()); - + printf("After synchronization. \n"); return 0; } From 14b97c090ba174c179c48b6185755b899e8b01f3 Mon Sep 17 00:00:00 2001 From: JHu Date: Tue, 15 Apr 2025 09:38:40 +1000 Subject: [PATCH 107/145] move get device prop function to hello_world; add device check in initialcondition --- src/py21cmfast/src/InitialConditions.c | 14 +++++++---- src/py21cmfast/src/Stochasticity.cu | 19 --------------- src/py21cmfast/src/cuda_hello_world.cu | 31 +++++++++++++++++++++++++ src/py21cmfast/src/cuda_hello_world.cuh | 14 +++++++++++ src/py21cmfast/src/hello_world.cu | 11 --------- src/py21cmfast/src/hello_world.h | 6 ----- 6 files changed, 55 insertions(+), 40 deletions(-) create mode 100644 src/py21cmfast/src/cuda_hello_world.cu create mode 100644 src/py21cmfast/src/cuda_hello_world.cuh delete mode 100644 src/py21cmfast/src/hello_world.cu delete mode 100644 src/py21cmfast/src/hello_world.h diff --git a/src/py21cmfast/src/InitialConditions.c b/src/py21cmfast/src/InitialConditions.c index 492331af3..d2593de5e 100644 --- a/src/py21cmfast/src/InitialConditions.c +++ b/src/py21cmfast/src/InitialConditions.c @@ -23,7 +23,7 @@ #include "InitialConditions.h" -#include "hello_world.h" +#include "cuda_hello_world.cuh" void adj_complex_conj(fftwf_complex *HIRES_box, UserParams *user_params, CosmoParams *cosmo_params){ /***** Adjust the complex conjugate relations for a real array *****/ @@ -90,11 +90,17 @@ int ComputeInitialConditions( int status; - // tmp: ensure hello_world works on GPU + bool use_cuda=true; if (use_cuda){ - printf("Start computing initial conditions\n"); - call_cuda();} + printf("Check GPU device ...\n\n"); + + // print key device properites + print_key_device_properties(); + + // tmp: ensure hello_world works on GPU + call_cuda(); + } Try{ // This Try wraps the entire function so we don't indent. diff --git a/src/py21cmfast/src/Stochasticity.cu b/src/py21cmfast/src/Stochasticity.cu index f183787fb..6fb5216ac 100644 --- a/src/py21cmfast/src/Stochasticity.cu +++ b/src/py21cmfast/src/Stochasticity.cu @@ -20,7 +20,6 @@ #include "interpolation_types.h" #include "Stochasticity.h" -// #include "tiger_checks.h" #include "cuda_utils.cuh" #include "Stochasticity.cuh" #include "DeviceConstants.cuh" @@ -274,22 +273,6 @@ void initializeArray(int *d_array, int n_elements, int value){ thrust::device_ptr d_array_ptr(d_array); thrust::fill(d_array_ptr, d_array_ptr + n_elements, value); } -// more members of deviceprop can be found in cura_runtime_api documentation -void getDeviceProperties(){ - int device; - CALL_CUDA(cudaGetDevice(&device)); - cudaDeviceProp deviceProp; - CALL_CUDA(cudaGetDeviceProperties(&deviceProp, device)); - printf("name: %s\n", deviceProp.name); - // printf("uuid: %s\n", deviceProp.uuid); - printf("total global memory: %zu bytes \n", deviceProp.totalGlobalMem); - printf("Shared memory per block: %zu bytes\n", deviceProp.sharedMemPerBlock); - printf("registers per block: %d\n", deviceProp.regsPerBlock); - printf("warp size: %d \n", deviceProp.warpSize); - printf("memory pitch: %zu bytes \n", deviceProp.memPitch); - printf("max threads per block: %d \n", deviceProp.maxThreadsPerBlock); - printf("total constant memory: %zu bytes \n", deviceProp.totalConstMem); -} // void getKernelAttr(){ // cudaFuncAttributes attr; @@ -889,8 +872,6 @@ int updateHaloOut(float *halo_masses, float *star_rng, float *sfr_rng, float *xr // initialize number of progenitors processed unsigned long long int n_processed_prog; - getDeviceProperties(); - cudaFuncAttributes attr; cudaFuncGetAttributes(&attr, update_halo_constants); printf("Kernel Shared Memory per Block: %zu bytes\n", attr.sharedSizeBytes); diff --git a/src/py21cmfast/src/cuda_hello_world.cu b/src/py21cmfast/src/cuda_hello_world.cu new file mode 100644 index 000000000..b6bf6298b --- /dev/null +++ b/src/py21cmfast/src/cuda_hello_world.cu @@ -0,0 +1,31 @@ +#include +#include + +#include "cuda_utils.cuh" +#include "cuda_hello_world.cuh" + +__global__ void hello_kernel() { + printf("Hello World from GPU! BlockIdx: %d, ThreadIdx: %d\n", blockIdx.x, threadIdx.x); +} + +int call_cuda() { + hello_kernel<<<3, 3>>>(); + cudaDeviceSynchronize(); + return 0; +} + +// more members of deviceprop can be found in cura_runtime_api documentation +void print_key_device_properties(){ + int device; + CALL_CUDA(cudaGetDevice(&device)); + cudaDeviceProp deviceProp; + CALL_CUDA(cudaGetDeviceProperties(&deviceProp, device)); + printf("Device name: %s\n", deviceProp.name); + printf("Total global memory: %zu bytes \n", deviceProp.totalGlobalMem); + printf("Shared memory per block: %zu bytes\n", deviceProp.sharedMemPerBlock); + printf("Registers per block: %d\n", deviceProp.regsPerBlock); + printf("Warp size: %d \n", deviceProp.warpSize); + printf("Memory pitch: %zu bytes \n", deviceProp.memPitch); + printf("Max threads per block: %d \n", deviceProp.maxThreadsPerBlock); + printf("Total constant memory: %zu bytes \n", deviceProp.totalConstMem); +} \ No newline at end of file diff --git a/src/py21cmfast/src/cuda_hello_world.cuh b/src/py21cmfast/src/cuda_hello_world.cuh new file mode 100644 index 000000000..27cf56995 --- /dev/null +++ b/src/py21cmfast/src/cuda_hello_world.cuh @@ -0,0 +1,14 @@ +#ifndef _CUDA_HELLO_WORLD_CUH +#define _CUDA_HELLO_WORLD_CUH + +#ifdef __cplusplus +extern "C" +{ +#endif + int call_cuda(); + void print_key_device_properties(); +#ifdef __cplusplus +} +#endif + +#endif // _CUDA_HELLO_WORLD_CUH \ No newline at end of file diff --git a/src/py21cmfast/src/hello_world.cu b/src/py21cmfast/src/hello_world.cu deleted file mode 100644 index ab1a5595b..000000000 --- a/src/py21cmfast/src/hello_world.cu +++ /dev/null @@ -1,11 +0,0 @@ -#include - -__global__ void hello_kernel() { - printf("Hello World from GPU! BlockIdx: %d, ThreadIdx: %d\n", blockIdx.x, threadIdx.x); -} - -extern "C" int call_cuda() { - hello_kernel<<<3, 3>>>(); - cudaDeviceSynchronize(); - return 0; -} \ No newline at end of file diff --git a/src/py21cmfast/src/hello_world.h b/src/py21cmfast/src/hello_world.h deleted file mode 100644 index d11e30a1a..000000000 --- a/src/py21cmfast/src/hello_world.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef _HELLO_WORLD_H -#define _HELLO_WORLD_H - -int call_cuda(); - -#endif // HELLO_WORLD_H \ No newline at end of file From 7b4ed3f644846fda91748f920cb18bb2503b5aea Mon Sep 17 00:00:00 2001 From: JHu Date: Tue, 15 Apr 2025 12:23:39 +1000 Subject: [PATCH 108/145] clean up --- src/py21cmfast/src/Stochasticity.c | 16 ++++----- src/py21cmfast/src/Stochasticity.cu | 50 ++++++++++++++--------------- 2 files changed, 31 insertions(+), 35 deletions(-) diff --git a/src/py21cmfast/src/Stochasticity.c b/src/py21cmfast/src/Stochasticity.c index a4ae1dd18..434775096 100644 --- a/src/py21cmfast/src/Stochasticity.c +++ b/src/py21cmfast/src/Stochasticity.c @@ -895,12 +895,6 @@ int sample_halo_grids(gsl_rng **rng_arr, double redshift, float *dens_field, flo return 0; } -void print_current_time() -{ - time_t now = time(NULL); - printf("Current time: %s \n", ctime(&now)); -} - //NOTE: there's a lot of repeated code here and in build_halo_cats, find a way to merge int sample_halo_progenitors(gsl_rng **rng_arr, double z_in, double z_out, HaloField *halofield_in, HaloField *halofield_out, struct HaloSamplingConstants *hs_constants, RGTable1D_f *sigma_table) @@ -952,13 +946,13 @@ int sample_halo_progenitors(gsl_rng **rng_arr, double z_in, double z_out, HaloFi int *halo_c = halofield_in->halo_coords; printf("Start cuda calculation for progenitors. "); - print_current_time(); + updateHaloOut(halo_m, halo_star_rng, halo_sfr_rng, halo_xray_rng,halo_c,nhalo_in, sigma_y_arr, sigma_bin, x_min, x_width, d_hs_constants, arraysize_total, halofield_out); printf("End cuda calculation for progenitors. "); - print_current_time(); + }else{ // CPU fallback #pragma omp parallel num_threads(user_params_global->N_THREADS) { @@ -1107,11 +1101,13 @@ int stochastic_halofield(UserParams *user_params, CosmoParams *cosmo_params, int buffer_scale = HALO_CUDA_THREAD_FACTOR + 1; unsigned long long int n_rstates = nhalo_first * buffer_scale; printf("initializing %llu random states on the device... \n", n_rstates); - print_current_time(); + init_rand_states(seed, n_rstates); printf("finish initializing \n");} + + // todo: add a signal to free rand states once all iterations are done } else{ @@ -1119,7 +1115,7 @@ int stochastic_halofield(UserParams *user_params, CosmoParams *cosmo_params, sample_halo_progenitors(rng_stoc,redshift_desc,redshift,halos_desc,halos,&hs_constants, sigma_table); } printf("Found %llu Halos \n", halos->n_halos); - print_current_time(); + LOG_DEBUG("Found %llu Halos", halos->n_halos); if(halos->n_halos >= 3){ diff --git a/src/py21cmfast/src/Stochasticity.cu b/src/py21cmfast/src/Stochasticity.cu index 6fb5216ac..f226578a8 100644 --- a/src/py21cmfast/src/Stochasticity.cu +++ b/src/py21cmfast/src/Stochasticity.cu @@ -133,6 +133,7 @@ int condenseDeviceArray(T *d_array, int original_size, T mask_value) return valid_size; } +// todo: maybe add python wrapper for test functions void testCondenseDeviceArray() { // Input data @@ -169,6 +170,7 @@ void testCondenseDeviceArray() cudaFree(d_array); } +// todo: add more tests to check with large number of input; fix the type mismatch (int, ull) int filterWithMask(float *d_data, int *d_mask, int original_size) { // Wrap the raw pointers into thrust device pointers @@ -265,6 +267,9 @@ int getSparsity(int n_buffer, int n_halo){ int sparsity = 1 << power; return sparsity; } + else{ + return -1; + } } @@ -634,15 +639,15 @@ __global__ void update_halo_constants(float *d_halo_masses, float *d_star_rng_in // } // tmp: just to verify the tables have been copied correctly - if (ind == 0) - { - printf("The first element of Nhalo y_arr: %e (%e) \n", d_Nhalo_yarr[0], d_Nhalo_table.y_arr[0]); - printf("The nhalo table n_bin: %d\n", d_Nhalo_table.n_bin); - printf("The nhalo_inv table nx_bin: %d\n", d_Nhalo_inv_table.nx_bin); - printf("HII_DIM: %d \n", d_user_params.HII_DIM); - printf("test params: %f \n", d_test_params); - printf("A_VCB: %f \n", d_astro_params.A_VCB); - printf("SIGMA_8: %f \n", d_cosmo_params.SIGMA_8); + // if (ind == 0) + // { + // printf("The first element of Nhalo y_arr: %e (%e) \n", d_Nhalo_yarr[0], d_Nhalo_table.y_arr[0]); + // printf("The nhalo table n_bin: %d\n", d_Nhalo_table.n_bin); + // printf("The nhalo_inv table nx_bin: %d\n", d_Nhalo_inv_table.nx_bin); + // printf("HII_DIM: %d \n", d_user_params.HII_DIM); + // printf("test params: %f \n", d_test_params); + // printf("A_VCB: %f \n", d_astro_params.A_VCB); + // printf("SIGMA_8: %f \n", d_cosmo_params.SIGMA_8); // printf("number of rng states: %d\n", g_numRNGStates); // // tiger tmp: debug (start) // double res1, res2, res3, res4; @@ -655,7 +660,7 @@ __global__ void update_halo_constants(float *d_halo_masses, float *d_star_rng_in // res4 = EvaluateNhaloInv(19.00053596496582, 0.83130413049947305); // printf("tmp res4 on gpu: %.17f \n", res4); // // tiger tmp: debug (end) - } + // } curandState local_state = d_randStates[ind]; // if (blockIdx.x > 100000){ @@ -861,7 +866,7 @@ int updateHaloOut(float *halo_masses, float *star_rng, float *sfr_rng, float *xr initializeArray(d_halo_coords_out, d_n_buffer * 3, -1000); // initiate n_halo check - unsigned long long int n_halo_check = n_halos; + // unsigned long long int n_halo_check = n_halos; // initiate offset for writing output data unsigned long long int write_offset = 0; @@ -872,11 +877,12 @@ int updateHaloOut(float *halo_masses, float *star_rng, float *sfr_rng, float *xr // initialize number of progenitors processed unsigned long long int n_processed_prog; + // todo: add the following to debug cudaFuncAttributes attr; cudaFuncGetAttributes(&attr, update_halo_constants); - printf("Kernel Shared Memory per Block: %zu bytes\n", attr.sharedSizeBytes); - printf("Kernel Registers per Thread: %d\n", attr.numRegs); - printf("Kernel Max Threads per Block: %d\n", attr.maxThreadsPerBlock); + // printf("Kernel Shared Memory per Block: %zu bytes\n", attr.sharedSizeBytes); + // printf("Kernel Registers per Thread: %d\n", attr.numRegs); + // printf("Kernel Max Threads per Block: %d\n", attr.maxThreadsPerBlock); // start with 4 threads work with one halo int sparsity = 4; @@ -919,11 +925,12 @@ int updateHaloOut(float *halo_masses, float *star_rng, float *sfr_rng, float *xr CALL_CUDA(cudaHostAlloc((void **)&h_n_prog, sizeof(int)*n_halos, cudaHostAllocDefault)); CALL_CUDA(cudaMemcpy(h_n_prog, d_n_prog, sizeof(int)*n_halos, cudaMemcpyDeviceToHost)); - // Values to count - std::vector values_to_count = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 100,32}; + // debug only + // // Values to count + // std::vector values_to_count = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 100,32}; - // Count and display occurrences - countElements(h_n_prog, n_halos, values_to_count); + // // Count and display occurrences + // countElements(h_n_prog, n_halos, values_to_count); // condense halo mass array on the device n_processed_prog = condenseDeviceArray(d_halo_masses_out, d_n_buffer, 0.0f); @@ -956,12 +963,6 @@ int updateHaloOut(float *halo_masses, float *star_rng, float *sfr_rng, float *xr unsigned long long int available_n_buffer = d_n_buffer - n_processed_prog; sparsity = getSparsity(available_n_buffer, n_halos_tbp); - // check max threadblock size - int device; - CALL_CUDA(cudaGetDevice(&device)); - cudaDeviceProp deviceProp; - CALL_CUDA(cudaGetDeviceProperties(&deviceProp, device)); - int max_threads_pb = deviceProp.maxThreadsPerBlock; // sparsity should not exceed the max threads per block // sparsity = 256; @@ -1026,6 +1027,5 @@ int updateHaloOut(float *halo_masses, float *star_rng, float *sfr_rng, float *xr CALL_CUDA(cudaGetLastError()); CALL_CUDA(cudaDeviceSynchronize()); - printf("After synchronization. \n"); return 0; } From 045f263efd80d0a8bc646ff6246122716540db2d Mon Sep 17 00:00:00 2001 From: JHu Date: Tue, 15 Apr 2025 12:43:38 +1000 Subject: [PATCH 109/145] remove tmp file --- src/py21cmfast/src/tiger_checks.h | 17 ----------------- 1 file changed, 17 deletions(-) delete mode 100644 src/py21cmfast/src/tiger_checks.h diff --git a/src/py21cmfast/src/tiger_checks.h b/src/py21cmfast/src/tiger_checks.h deleted file mode 100644 index 8fb9b2e73..000000000 --- a/src/py21cmfast/src/tiger_checks.h +++ /dev/null @@ -1,17 +0,0 @@ -#ifndef _TIGER_CHECK_H -#define _TIGER_CHECK_H -#include - -#include "Stochasticity.h" - -#ifdef __cplusplus -extern "C" -{ -#endif - int checkComplextype(fftwf_complex *box, int total_elements, int xy_dim, int z_dim, int midpoint, int RES, int filter_type, float R, float R_param); - // int updateHaloOut(float *halo_masses, unsigned long long int n_halos, float *y_arr, int n_bin_y, double x_min, double x_width, struct HaloSamplingConstants hs_constants); -#ifdef __cplusplus -} -#endif - -#endif // TIGER_CHECK_H From dc35cc2318799f2038be9f675e495752c42c343c Mon Sep 17 00:00:00 2001 From: JHu Date: Wed, 7 May 2025 09:53:23 +1000 Subject: [PATCH 110/145] Place proper CUDA compile guards around code in Stochasticity.c --- src/py21cmfast/src/Stochasticity.c | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/src/py21cmfast/src/Stochasticity.c b/src/py21cmfast/src/Stochasticity.c index 434775096..d3ad3bb63 100644 --- a/src/py21cmfast/src/Stochasticity.c +++ b/src/py21cmfast/src/Stochasticity.c @@ -926,7 +926,7 @@ int sample_halo_progenitors(gsl_rng **rng_arr, double z_in, double z_out, HaloFi double corr_arr[3] = {hs_constants->corr_star,hs_constants->corr_sfr,hs_constants->corr_xray}; // use cuda function if use_cuda is true - bool use_cuda = true; // pass this as a parameter later + bool use_cuda = false; // pass this as a parameter later if (use_cuda){ // get parameters needed for sigma calculation double x_min = sigma_table->x_min; @@ -947,13 +947,18 @@ int sample_halo_progenitors(gsl_rng **rng_arr, double z_in, double z_out, HaloFi printf("Start cuda calculation for progenitors. "); +#if CUDA_FOUND updateHaloOut(halo_m, halo_star_rng, halo_sfr_rng, halo_xray_rng,halo_c,nhalo_in, sigma_y_arr, sigma_bin, x_min, x_width, d_hs_constants, arraysize_total, halofield_out); printf("End cuda calculation for progenitors. "); - - }else{ // CPU fallback + +#else + LOG_ERROR("CUDA function updateHaloOut() called but code was not compiled for CUDA."); +#endif + } + else{ // CPU fallback #pragma omp parallel num_threads(user_params_global->N_THREADS) { float prog_buf[MAX_HALO_CELL]; @@ -1078,14 +1083,19 @@ int stochastic_halofield(UserParams *user_params, CosmoParams *cosmo_params, RGTable2D *nhalo_inv_table = GetNhaloInvTable(); RGTable1D_f *sigma_table = GetSigmaInterpTable(); - bool use_cuda=true; + bool use_cuda=false; if (use_cuda){ +#if CUDA_FOUND // copy the tables to the device copyTablesToDevice(*nhalo_table, *mcoll_table, *nhalo_inv_table); // copy global variables to the device // todo: move the following operation to InitialConditions.c - updateGlobalParams(user_params_global, cosmo_params_global, astro_params_global);} + updateGlobalParams(user_params_global, cosmo_params_global, astro_params_global); +#else + LOG_ERROR("CUDA function copyTablesToDevice called but code was not compiled for CUDA."); +#endif + } // Fill them // NOTE:Halos prev in the first box corresponds to the large DexM halos @@ -1102,13 +1112,17 @@ int stochastic_halofield(UserParams *user_params, CosmoParams *cosmo_params, unsigned long long int n_rstates = nhalo_first * buffer_scale; printf("initializing %llu random states on the device... \n", n_rstates); - +#if CUDA_FOUND init_rand_states(seed, n_rstates); - printf("finish initializing \n");} + printf("finish initializing \n"); // todo: add a signal to free rand states once all iterations are done - +#else + LOG_ERROR("CUDA function init_rand_states() called but code was not compiled for CUDA."); +#endif + } + } else{ LOG_DEBUG("Calculating halo progenitors from z=%.1f to z=%.1f | %llu", redshift_desc,redshift,halos_desc->n_halos); From c0887eae18e8abb74dbba9ab80c1a2bae33c1247 Mon Sep 17 00:00:00 2001 From: JHu Date: Wed, 7 May 2025 09:59:07 +1000 Subject: [PATCH 111/145] Place proper CUDA compile guards around code in InitialConditions.c --- src/py21cmfast/src/InitialConditions.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/py21cmfast/src/InitialConditions.c b/src/py21cmfast/src/InitialConditions.c index d2593de5e..56eac3edf 100644 --- a/src/py21cmfast/src/InitialConditions.c +++ b/src/py21cmfast/src/InitialConditions.c @@ -91,15 +91,18 @@ int ComputeInitialConditions( int status; - bool use_cuda=true; + bool use_cuda=false; if (use_cuda){ printf("Check GPU device ...\n\n"); - - // print key device properites +#if CUDA_FOUND + // print key device properties print_key_device_properties(); // tmp: ensure hello_world works on GPU call_cuda(); +#else + LOG_ERROR("CUDA function print_key_device_properties() and call_cuda() called but code was not compiled for CUDA."); +#endif } Try{ // This Try wraps the entire function so we don't indent. From ef8785ade4dced19ded2ed43a5b42fe0e3235850 Mon Sep 17 00:00:00 2001 From: James Davies Date: Thu, 8 May 2025 14:58:48 +0200 Subject: [PATCH 112/145] some minor cleanup --- environment_dev.yml | 8 ++++---- pyproject.toml | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/environment_dev.yml b/environment_dev.yml index 5e7f4aee5..83fac283d 100644 --- a/environment_dev.yml +++ b/environment_dev.yml @@ -1,4 +1,4 @@ -name: tiger21_dev_module +name: 21cmfast channels: - defaults dependencies: @@ -113,7 +113,7 @@ dependencies: - h5py - jupyter - nb_conda -# - pytest-plt + - pytest-plt - questionary -# - pip: -# - pre-commit + - pip: + - pre-commit diff --git a/pyproject.toml b/pyproject.toml index 1045d2ad7..8af4a35a5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ [project] name="21cmFAST" dynamic = ["version"] -license="MIT license" +license="MIT" license-files = ["LICENSE"] description="A semi-numerical cosmological simulation code for the 21cm signal" # long_description="%s\n%s" @@ -65,7 +65,7 @@ docs = [ "sphinx>=1.3", "sphinx-rtd-theme", ] -# When the min python version supports PEP 735, this can be simplified +# When the min python version supports PEP 735, this can be simplified # as dev = test_req + doc_req again (as it was implemented in setup.py) dev = [ "pre-commit", From 205b867b2ff6ade1571aeec9004b4e5d44ea55da Mon Sep 17 00:00:00 2001 From: James Davies Date: Thu, 8 May 2025 15:12:17 +0200 Subject: [PATCH 113/145] delete cffi files --- build_cffi.py | 161 - environment_dev.yml | 2 - py21cmfast/c_21cmfast.c | 7507 --------------------------------------- py21cmfast/c_21cmfast.o | Bin 430416 -> 0 bytes 4 files changed, 7670 deletions(-) delete mode 100755 build_cffi.py delete mode 100644 py21cmfast/c_21cmfast.c delete mode 100644 py21cmfast/c_21cmfast.o diff --git a/build_cffi.py b/build_cffi.py deleted file mode 100755 index 97f59bded..000000000 --- a/build_cffi.py +++ /dev/null @@ -1,161 +0,0 @@ -"""Build the C code with CFFI.""" - -import os -import sys -import sysconfig -from cffi import FFI - -# Get the compiler. We support gcc and clang. -_compiler = sysconfig.get_config_var("CC") - -if "gcc" in _compiler: - compiler = "gcc" -elif "clang" in _compiler: - compiler = "clang" -else: - raise ValueError(f"Compiler {_compiler} not supported for 21cmFAST") - -ffi = FFI() - -LOCATION = os.path.dirname(os.path.abspath(__file__)) -CLOC = os.path.join(LOCATION, "src", "py21cmfast", "src") -include_dirs = [CLOC] - -c_files = [ - os.path.join("src", "py21cmfast", "src", f) - for f in os.listdir(CLOC) - if f.endswith(".c") -] - -# Compiled CUDA code -extra_objects = [ - os.path.join(CLOC, "hello_world.o"), - os.path.join(CLOC, "Stochasticity_cuda.o"), - os.path.join(CLOC, "HaloField_cuda.o"), - os.path.join(CLOC, "device_rng_cuda.o"), - os.path.join(CLOC, "combined_cuda.o"), - os.path.join(CLOC, "filtering_cuda.o"), - os.path.join(CLOC, "PerturbField_cuda.o"), - os.path.join(CLOC, "SpinTemperatureBox_cuda.o"), - os.path.join(CLOC, "IonisationBox_cuda.o"), -] -extra_link_args = ["-lcudart", "-lcudadevrt", "-lstdc++"] - -# # compiled cuda code -# extra_objects = [os.path.join(CLOC, "hello_world.o"), os.path.join(CLOC, "filtering_cuda.o"), os.path.join(CLOC, "Stochasticity_cuda.o") -# , os.path.join(CLOC, "HaloField_cuda.o"), os.path.join(CLOC, "combined_cuda.o"), os.path.join(CLOC, "device_rng_cuda.o")] -# # os.path.join(CLOC, "interp_tables_cuda.o")] -# extra_link_args = ["-lcudart", "-lcudadevrt"] - -# Set the C-code logging level. -# If DEBUG is set, we default to the highest level, but if not, -# we set it to the level just above no logging at all. -log_level = os.environ.get("LOG_LEVEL", 4 if "DEBUG" in os.environ else 1) -available_levels = [ - "NONE", - "ERROR", - "WARNING", - "INFO", - "DEBUG", - "SUPER_DEBUG", - "ULTRA_DEBUG", -] - - -if isinstance(log_level, str) and log_level.upper() in available_levels: - log_level = available_levels.index(log_level.upper()) - -try: - log_level = int(log_level) -except ValueError: - # note: for py35 support, can't use f strings. - raise ValueError( - "LOG_LEVEL must be specified as a positive integer, or one " "of {}".format( - available_levels - ) - ) - -# ================================================== -# Set compilation arguments dependent on environment -# ================================================== - -extra_compile_args = ["-Wall", "--verbose", f"-DLOG_LEVEL={log_level:d}"] - -if "DEBUG" in os.environ: - extra_compile_args += ["-g", "-O0"] -else: - extra_compile_args += ["-Ofast"] - -if sys.platform == "darwin": - extra_compile_args += ["-Xpreprocessor"] - -extra_compile_args += ["-fopenmp"] - -libraries = ["m", "gsl", "gslcblas", "fftw3f_omp", "fftw3f"] - -# GPU fft libraries -# if True: -# libraries += ["cufft", "cufftw"] - -# stuff for gperftools -if "PROFILE" in os.environ: - # libraries += ["profiler", "tcmalloc"] - libraries += ["profiler"] - # we need this even if DEBUG is off - extra_compile_args += ["-g"] - # extra_compile_args += ["-g", "-pg"] - -if compiler == "clang": - libraries += ["omp"] - -library_dirs = [] -for k, v in os.environ.items(): - if "inc" in k.lower(): - include_dirs += [v] - elif "lib" in k.lower(): - library_dirs += [v] - -# ================================================================= -# NOTES FOR DEVELOPERS: -# The CFFI implementation works as follows: -# - All function prototypes, global variables and type definitions *directly* used -# in the python wrapper must be declared via ffi.cdef("""C CODE"""). -# There must be no compiler directives in this code (#include, #define, etc) -# - All implementations of global variables and types present in the cdef() calls -# must also be present in the second argument of set_source. -# This is passed to the compiler. -# - The `sources` kwarg then contains all the .c files in the library which are to be compiled - -# This is the overall C code. -ffi.set_source( - "py21cmfast.c_21cmfast", # Name/Location of shared library module - """ - #include "21cmFAST.h" - """, - sources=c_files, - include_dirs=include_dirs, - library_dirs=library_dirs, - libraries=libraries, - extra_compile_args=extra_compile_args, - extra_objects=extra_objects, - extra_link_args=extra_link_args, -) - -# Header files containing types, globals and function prototypes -with open(os.path.join(CLOC, "_inputparams_wrapper.h")) as f: - ffi.cdef(f.read()) -with open(os.path.join(CLOC, "_outputstructs_wrapper.h")) as f: - ffi.cdef(f.read()) -with open(os.path.join(CLOC, "_functionprototypes_wrapper.h")) as f: - ffi.cdef(f.read()) - -# CFFI needs to be able to access a free function to make the __del__ method for OutputStruct fields -# This will expose the standard free() function to the wrapper so it can be used -ffi.cdef( - """ - void free(void *ptr); - """ -) - -if __name__ == "__main__": - ffi.compile(verbose=False) diff --git a/environment_dev.yml b/environment_dev.yml index 83fac283d..3c8a95795 100644 --- a/environment_dev.yml +++ b/environment_dev.yml @@ -7,7 +7,6 @@ dependencies: - zlib - pip - libxml2 - - libffi - zipp - click - scipy @@ -46,7 +45,6 @@ dependencies: - qt - packaging - ipython_genutils - - cffi - pytest-remotedata - nomkl - chardet diff --git a/py21cmfast/c_21cmfast.c b/py21cmfast/c_21cmfast.c deleted file mode 100644 index 38c90c1ab..000000000 --- a/py21cmfast/c_21cmfast.c +++ /dev/null @@ -1,7507 +0,0 @@ -#define _CFFI_ - -/* We try to define Py_LIMITED_API before including Python.h. - - Mess: we can only define it if Py_DEBUG, Py_TRACE_REFS and - Py_REF_DEBUG are not defined. This is a best-effort approximation: - we can learn about Py_DEBUG from pyconfig.h, but it is unclear if - the same works for the other two macros. Py_DEBUG implies them, - but not the other way around. - - The implementation is messy (issue #350): on Windows, with _MSC_VER, - we have to define Py_LIMITED_API even before including pyconfig.h. - In that case, we guess what pyconfig.h will do to the macros above, - and check our guess after the #include. - - Note that on Windows, with CPython 3.x, you need >= 3.5 and virtualenv - version >= 16.0.0. With older versions of either, you don't get a - copy of PYTHON3.DLL in the virtualenv. We can't check the version of - CPython *before* we even include pyconfig.h. ffi.set_source() puts - a ``#define _CFFI_NO_LIMITED_API'' at the start of this file if it is - running on Windows < 3.5, as an attempt at fixing it, but that's - arguably wrong because it may not be the target version of Python. - Still better than nothing I guess. As another workaround, you can - remove the definition of Py_LIMITED_API here. - - See also 'py_limited_api' in cffi/setuptools_ext.py. -*/ -#if !defined(_CFFI_USE_EMBEDDING) && !defined(Py_LIMITED_API) -# ifdef _MSC_VER -# if !defined(_DEBUG) && !defined(Py_DEBUG) && !defined(Py_TRACE_REFS) && !defined(Py_REF_DEBUG) && !defined(_CFFI_NO_LIMITED_API) -# define Py_LIMITED_API -# endif -# include - /* sanity-check: Py_LIMITED_API will cause crashes if any of these - are also defined. Normally, the Python file PC/pyconfig.h does not - cause any of these to be defined, with the exception that _DEBUG - causes Py_DEBUG. Double-check that. */ -# ifdef Py_LIMITED_API -# if defined(Py_DEBUG) -# error "pyconfig.h unexpectedly defines Py_DEBUG, but Py_LIMITED_API is set" -# endif -# if defined(Py_TRACE_REFS) -# error "pyconfig.h unexpectedly defines Py_TRACE_REFS, but Py_LIMITED_API is set" -# endif -# if defined(Py_REF_DEBUG) -# error "pyconfig.h unexpectedly defines Py_REF_DEBUG, but Py_LIMITED_API is set" -# endif -# endif -# else -# include -# if !defined(Py_DEBUG) && !defined(Py_TRACE_REFS) && !defined(Py_REF_DEBUG) && !defined(_CFFI_NO_LIMITED_API) -# define Py_LIMITED_API -# endif -# endif -#endif - -#include -#ifdef __cplusplus -extern "C" { -#endif -#include - -/* This part is from file 'cffi/parse_c_type.h'. It is copied at the - beginning of C sources generated by CFFI's ffi.set_source(). */ - -typedef void *_cffi_opcode_t; - -#define _CFFI_OP(opcode, arg) (_cffi_opcode_t)(opcode | (((uintptr_t)(arg)) << 8)) -#define _CFFI_GETOP(cffi_opcode) ((unsigned char)(uintptr_t)cffi_opcode) -#define _CFFI_GETARG(cffi_opcode) (((intptr_t)cffi_opcode) >> 8) - -#define _CFFI_OP_PRIMITIVE 1 -#define _CFFI_OP_POINTER 3 -#define _CFFI_OP_ARRAY 5 -#define _CFFI_OP_OPEN_ARRAY 7 -#define _CFFI_OP_STRUCT_UNION 9 -#define _CFFI_OP_ENUM 11 -#define _CFFI_OP_FUNCTION 13 -#define _CFFI_OP_FUNCTION_END 15 -#define _CFFI_OP_NOOP 17 -#define _CFFI_OP_BITFIELD 19 -#define _CFFI_OP_TYPENAME 21 -#define _CFFI_OP_CPYTHON_BLTN_V 23 // varargs -#define _CFFI_OP_CPYTHON_BLTN_N 25 // noargs -#define _CFFI_OP_CPYTHON_BLTN_O 27 // O (i.e. a single arg) -#define _CFFI_OP_CONSTANT 29 -#define _CFFI_OP_CONSTANT_INT 31 -#define _CFFI_OP_GLOBAL_VAR 33 -#define _CFFI_OP_DLOPEN_FUNC 35 -#define _CFFI_OP_DLOPEN_CONST 37 -#define _CFFI_OP_GLOBAL_VAR_F 39 -#define _CFFI_OP_EXTERN_PYTHON 41 - -#define _CFFI_PRIM_VOID 0 -#define _CFFI_PRIM_BOOL 1 -#define _CFFI_PRIM_CHAR 2 -#define _CFFI_PRIM_SCHAR 3 -#define _CFFI_PRIM_UCHAR 4 -#define _CFFI_PRIM_SHORT 5 -#define _CFFI_PRIM_USHORT 6 -#define _CFFI_PRIM_INT 7 -#define _CFFI_PRIM_UINT 8 -#define _CFFI_PRIM_LONG 9 -#define _CFFI_PRIM_ULONG 10 -#define _CFFI_PRIM_LONGLONG 11 -#define _CFFI_PRIM_ULONGLONG 12 -#define _CFFI_PRIM_FLOAT 13 -#define _CFFI_PRIM_DOUBLE 14 -#define _CFFI_PRIM_LONGDOUBLE 15 - -#define _CFFI_PRIM_WCHAR 16 -#define _CFFI_PRIM_INT8 17 -#define _CFFI_PRIM_UINT8 18 -#define _CFFI_PRIM_INT16 19 -#define _CFFI_PRIM_UINT16 20 -#define _CFFI_PRIM_INT32 21 -#define _CFFI_PRIM_UINT32 22 -#define _CFFI_PRIM_INT64 23 -#define _CFFI_PRIM_UINT64 24 -#define _CFFI_PRIM_INTPTR 25 -#define _CFFI_PRIM_UINTPTR 26 -#define _CFFI_PRIM_PTRDIFF 27 -#define _CFFI_PRIM_SIZE 28 -#define _CFFI_PRIM_SSIZE 29 -#define _CFFI_PRIM_INT_LEAST8 30 -#define _CFFI_PRIM_UINT_LEAST8 31 -#define _CFFI_PRIM_INT_LEAST16 32 -#define _CFFI_PRIM_UINT_LEAST16 33 -#define _CFFI_PRIM_INT_LEAST32 34 -#define _CFFI_PRIM_UINT_LEAST32 35 -#define _CFFI_PRIM_INT_LEAST64 36 -#define _CFFI_PRIM_UINT_LEAST64 37 -#define _CFFI_PRIM_INT_FAST8 38 -#define _CFFI_PRIM_UINT_FAST8 39 -#define _CFFI_PRIM_INT_FAST16 40 -#define _CFFI_PRIM_UINT_FAST16 41 -#define _CFFI_PRIM_INT_FAST32 42 -#define _CFFI_PRIM_UINT_FAST32 43 -#define _CFFI_PRIM_INT_FAST64 44 -#define _CFFI_PRIM_UINT_FAST64 45 -#define _CFFI_PRIM_INTMAX 46 -#define _CFFI_PRIM_UINTMAX 47 -#define _CFFI_PRIM_FLOATCOMPLEX 48 -#define _CFFI_PRIM_DOUBLECOMPLEX 49 -#define _CFFI_PRIM_CHAR16 50 -#define _CFFI_PRIM_CHAR32 51 - -#define _CFFI__NUM_PRIM 52 -#define _CFFI__UNKNOWN_PRIM (-1) -#define _CFFI__UNKNOWN_FLOAT_PRIM (-2) -#define _CFFI__UNKNOWN_LONG_DOUBLE (-3) - -#define _CFFI__IO_FILE_STRUCT (-1) - - -struct _cffi_global_s { - const char *name; - void *address; - _cffi_opcode_t type_op; - void *size_or_direct_fn; // OP_GLOBAL_VAR: size, or 0 if unknown - // OP_CPYTHON_BLTN_*: addr of direct function -}; - -struct _cffi_getconst_s { - unsigned long long value; - const struct _cffi_type_context_s *ctx; - int gindex; -}; - -struct _cffi_struct_union_s { - const char *name; - int type_index; // -> _cffi_types, on a OP_STRUCT_UNION - int flags; // _CFFI_F_* flags below - size_t size; - int alignment; - int first_field_index; // -> _cffi_fields array - int num_fields; -}; -#define _CFFI_F_UNION 0x01 // is a union, not a struct -#define _CFFI_F_CHECK_FIELDS 0x02 // complain if fields are not in the - // "standard layout" or if some are missing -#define _CFFI_F_PACKED 0x04 // for CHECK_FIELDS, assume a packed struct -#define _CFFI_F_EXTERNAL 0x08 // in some other ffi.include() -#define _CFFI_F_OPAQUE 0x10 // opaque - -struct _cffi_field_s { - const char *name; - size_t field_offset; - size_t field_size; - _cffi_opcode_t field_type_op; -}; - -struct _cffi_enum_s { - const char *name; - int type_index; // -> _cffi_types, on a OP_ENUM - int type_prim; // _CFFI_PRIM_xxx - const char *enumerators; // comma-delimited string -}; - -struct _cffi_typename_s { - const char *name; - int type_index; /* if opaque, points to a possibly artificial - OP_STRUCT which is itself opaque */ -}; - -struct _cffi_type_context_s { - _cffi_opcode_t *types; - const struct _cffi_global_s *globals; - const struct _cffi_field_s *fields; - const struct _cffi_struct_union_s *struct_unions; - const struct _cffi_enum_s *enums; - const struct _cffi_typename_s *typenames; - int num_globals; - int num_struct_unions; - int num_enums; - int num_typenames; - const char *const *includes; - int num_types; - int flags; /* future extension */ -}; - -struct _cffi_parse_info_s { - const struct _cffi_type_context_s *ctx; - _cffi_opcode_t *output; - unsigned int output_size; - size_t error_location; - const char *error_message; -}; - -struct _cffi_externpy_s { - const char *name; - size_t size_of_result; - void *reserved1, *reserved2; -}; - -#ifdef _CFFI_INTERNAL -static int parse_c_type(struct _cffi_parse_info_s *info, const char *input); -static int search_in_globals(const struct _cffi_type_context_s *ctx, - const char *search, size_t search_len); -static int search_in_struct_unions(const struct _cffi_type_context_s *ctx, - const char *search, size_t search_len); -#endif - -/* this block of #ifs should be kept exactly identical between - c/_cffi_backend.c, cffi/vengine_cpy.py, cffi/vengine_gen.py - and cffi/_cffi_include.h */ -#if defined(_MSC_VER) -# include /* for alloca() */ -# if _MSC_VER < 1600 /* MSVC < 2010 */ - typedef __int8 int8_t; - typedef __int16 int16_t; - typedef __int32 int32_t; - typedef __int64 int64_t; - typedef unsigned __int8 uint8_t; - typedef unsigned __int16 uint16_t; - typedef unsigned __int32 uint32_t; - typedef unsigned __int64 uint64_t; - typedef __int8 int_least8_t; - typedef __int16 int_least16_t; - typedef __int32 int_least32_t; - typedef __int64 int_least64_t; - typedef unsigned __int8 uint_least8_t; - typedef unsigned __int16 uint_least16_t; - typedef unsigned __int32 uint_least32_t; - typedef unsigned __int64 uint_least64_t; - typedef __int8 int_fast8_t; - typedef __int16 int_fast16_t; - typedef __int32 int_fast32_t; - typedef __int64 int_fast64_t; - typedef unsigned __int8 uint_fast8_t; - typedef unsigned __int16 uint_fast16_t; - typedef unsigned __int32 uint_fast32_t; - typedef unsigned __int64 uint_fast64_t; - typedef __int64 intmax_t; - typedef unsigned __int64 uintmax_t; -# else -# include -# endif -# if _MSC_VER < 1800 /* MSVC < 2013 */ -# ifndef __cplusplus - typedef unsigned char _Bool; -# endif -# endif -#else -# include -# if (defined (__SVR4) && defined (__sun)) || defined(_AIX) || defined(__hpux) -# include -# endif -#endif - -#ifdef __GNUC__ -# define _CFFI_UNUSED_FN __attribute__((unused)) -#else -# define _CFFI_UNUSED_FN /* nothing */ -#endif - -#ifdef __cplusplus -# ifndef _Bool - typedef bool _Bool; /* semi-hackish: C++ has no _Bool; bool is builtin */ -# endif -#endif - -/********** CPython-specific section **********/ -#ifndef PYPY_VERSION - - -#if PY_MAJOR_VERSION >= 3 -# define PyInt_FromLong PyLong_FromLong -#endif - -#define _cffi_from_c_double PyFloat_FromDouble -#define _cffi_from_c_float PyFloat_FromDouble -#define _cffi_from_c_long PyInt_FromLong -#define _cffi_from_c_ulong PyLong_FromUnsignedLong -#define _cffi_from_c_longlong PyLong_FromLongLong -#define _cffi_from_c_ulonglong PyLong_FromUnsignedLongLong -#define _cffi_from_c__Bool PyBool_FromLong - -#define _cffi_to_c_double PyFloat_AsDouble -#define _cffi_to_c_float PyFloat_AsDouble - -#define _cffi_from_c_int(x, type) \ - (((type)-1) > 0 ? /* unsigned */ \ - (sizeof(type) < sizeof(long) ? \ - PyInt_FromLong((long)x) : \ - sizeof(type) == sizeof(long) ? \ - PyLong_FromUnsignedLong((unsigned long)x) : \ - PyLong_FromUnsignedLongLong((unsigned long long)x)) : \ - (sizeof(type) <= sizeof(long) ? \ - PyInt_FromLong((long)x) : \ - PyLong_FromLongLong((long long)x))) - -#define _cffi_to_c_int(o, type) \ - ((type)( \ - sizeof(type) == 1 ? (((type)-1) > 0 ? (type)_cffi_to_c_u8(o) \ - : (type)_cffi_to_c_i8(o)) : \ - sizeof(type) == 2 ? (((type)-1) > 0 ? (type)_cffi_to_c_u16(o) \ - : (type)_cffi_to_c_i16(o)) : \ - sizeof(type) == 4 ? (((type)-1) > 0 ? (type)_cffi_to_c_u32(o) \ - : (type)_cffi_to_c_i32(o)) : \ - sizeof(type) == 8 ? (((type)-1) > 0 ? (type)_cffi_to_c_u64(o) \ - : (type)_cffi_to_c_i64(o)) : \ - (Py_FatalError("unsupported size for type " #type), (type)0))) - -#define _cffi_to_c_i8 \ - ((int(*)(PyObject *))_cffi_exports[1]) -#define _cffi_to_c_u8 \ - ((int(*)(PyObject *))_cffi_exports[2]) -#define _cffi_to_c_i16 \ - ((int(*)(PyObject *))_cffi_exports[3]) -#define _cffi_to_c_u16 \ - ((int(*)(PyObject *))_cffi_exports[4]) -#define _cffi_to_c_i32 \ - ((int(*)(PyObject *))_cffi_exports[5]) -#define _cffi_to_c_u32 \ - ((unsigned int(*)(PyObject *))_cffi_exports[6]) -#define _cffi_to_c_i64 \ - ((long long(*)(PyObject *))_cffi_exports[7]) -#define _cffi_to_c_u64 \ - ((unsigned long long(*)(PyObject *))_cffi_exports[8]) -#define _cffi_to_c_char \ - ((int(*)(PyObject *))_cffi_exports[9]) -#define _cffi_from_c_pointer \ - ((PyObject *(*)(char *, struct _cffi_ctypedescr *))_cffi_exports[10]) -#define _cffi_to_c_pointer \ - ((char *(*)(PyObject *, struct _cffi_ctypedescr *))_cffi_exports[11]) -#define _cffi_get_struct_layout \ - not used any more -#define _cffi_restore_errno \ - ((void(*)(void))_cffi_exports[13]) -#define _cffi_save_errno \ - ((void(*)(void))_cffi_exports[14]) -#define _cffi_from_c_char \ - ((PyObject *(*)(char))_cffi_exports[15]) -#define _cffi_from_c_deref \ - ((PyObject *(*)(char *, struct _cffi_ctypedescr *))_cffi_exports[16]) -#define _cffi_to_c \ - ((int(*)(char *, struct _cffi_ctypedescr *, PyObject *))_cffi_exports[17]) -#define _cffi_from_c_struct \ - ((PyObject *(*)(char *, struct _cffi_ctypedescr *))_cffi_exports[18]) -#define _cffi_to_c_wchar_t \ - ((_cffi_wchar_t(*)(PyObject *))_cffi_exports[19]) -#define _cffi_from_c_wchar_t \ - ((PyObject *(*)(_cffi_wchar_t))_cffi_exports[20]) -#define _cffi_to_c_long_double \ - ((long double(*)(PyObject *))_cffi_exports[21]) -#define _cffi_to_c__Bool \ - ((_Bool(*)(PyObject *))_cffi_exports[22]) -#define _cffi_prepare_pointer_call_argument \ - ((Py_ssize_t(*)(struct _cffi_ctypedescr *, \ - PyObject *, char **))_cffi_exports[23]) -#define _cffi_convert_array_from_object \ - ((int(*)(char *, struct _cffi_ctypedescr *, PyObject *))_cffi_exports[24]) -#define _CFFI_CPIDX 25 -#define _cffi_call_python \ - ((void(*)(struct _cffi_externpy_s *, char *))_cffi_exports[_CFFI_CPIDX]) -#define _cffi_to_c_wchar3216_t \ - ((int(*)(PyObject *))_cffi_exports[26]) -#define _cffi_from_c_wchar3216_t \ - ((PyObject *(*)(int))_cffi_exports[27]) -#define _CFFI_NUM_EXPORTS 28 - -struct _cffi_ctypedescr; - -static void *_cffi_exports[_CFFI_NUM_EXPORTS]; - -#define _cffi_type(index) ( \ - assert((((uintptr_t)_cffi_types[index]) & 1) == 0), \ - (struct _cffi_ctypedescr *)_cffi_types[index]) - -static PyObject *_cffi_init(const char *module_name, Py_ssize_t version, - const struct _cffi_type_context_s *ctx) -{ - PyObject *module, *o_arg, *new_module; - void *raw[] = { - (void *)module_name, - (void *)version, - (void *)_cffi_exports, - (void *)ctx, - }; - - module = PyImport_ImportModule("_cffi_backend"); - if (module == NULL) - goto failure; - - o_arg = PyLong_FromVoidPtr((void *)raw); - if (o_arg == NULL) - goto failure; - - new_module = PyObject_CallMethod( - module, (char *)"_init_cffi_1_0_external_module", (char *)"O", o_arg); - - Py_DECREF(o_arg); - Py_DECREF(module); - return new_module; - - failure: - Py_XDECREF(module); - return NULL; -} - - -#ifdef HAVE_WCHAR_H -typedef wchar_t _cffi_wchar_t; -#else -typedef uint16_t _cffi_wchar_t; /* same random pick as _cffi_backend.c */ -#endif - -_CFFI_UNUSED_FN static uint16_t _cffi_to_c_char16_t(PyObject *o) -{ - if (sizeof(_cffi_wchar_t) == 2) - return (uint16_t)_cffi_to_c_wchar_t(o); - else - return (uint16_t)_cffi_to_c_wchar3216_t(o); -} - -_CFFI_UNUSED_FN static PyObject *_cffi_from_c_char16_t(uint16_t x) -{ - if (sizeof(_cffi_wchar_t) == 2) - return _cffi_from_c_wchar_t((_cffi_wchar_t)x); - else - return _cffi_from_c_wchar3216_t((int)x); -} - -_CFFI_UNUSED_FN static int _cffi_to_c_char32_t(PyObject *o) -{ - if (sizeof(_cffi_wchar_t) == 4) - return (int)_cffi_to_c_wchar_t(o); - else - return (int)_cffi_to_c_wchar3216_t(o); -} - -_CFFI_UNUSED_FN static PyObject *_cffi_from_c_char32_t(unsigned int x) -{ - if (sizeof(_cffi_wchar_t) == 4) - return _cffi_from_c_wchar_t((_cffi_wchar_t)x); - else - return _cffi_from_c_wchar3216_t((int)x); -} - -union _cffi_union_alignment_u { - unsigned char m_char; - unsigned short m_short; - unsigned int m_int; - unsigned long m_long; - unsigned long long m_longlong; - float m_float; - double m_double; - long double m_longdouble; -}; - -struct _cffi_freeme_s { - struct _cffi_freeme_s *next; - union _cffi_union_alignment_u alignment; -}; - -_CFFI_UNUSED_FN static int -_cffi_convert_array_argument(struct _cffi_ctypedescr *ctptr, PyObject *arg, - char **output_data, Py_ssize_t datasize, - struct _cffi_freeme_s **freeme) -{ - char *p; - if (datasize < 0) - return -1; - - p = *output_data; - if (p == NULL) { - struct _cffi_freeme_s *fp = (struct _cffi_freeme_s *)PyObject_Malloc( - offsetof(struct _cffi_freeme_s, alignment) + (size_t)datasize); - if (fp == NULL) - return -1; - fp->next = *freeme; - *freeme = fp; - p = *output_data = (char *)&fp->alignment; - } - memset((void *)p, 0, (size_t)datasize); - return _cffi_convert_array_from_object(p, ctptr, arg); -} - -_CFFI_UNUSED_FN static void -_cffi_free_array_arguments(struct _cffi_freeme_s *freeme) -{ - do { - void *p = (void *)freeme; - freeme = freeme->next; - PyObject_Free(p); - } while (freeme != NULL); -} - -/********** end CPython-specific section **********/ -#else -_CFFI_UNUSED_FN -static void (*_cffi_call_python_org)(struct _cffi_externpy_s *, char *); -# define _cffi_call_python _cffi_call_python_org -#endif - - -#define _cffi_array_len(array) (sizeof(array) / sizeof((array)[0])) - -#define _cffi_prim_int(size, sign) \ - ((size) == 1 ? ((sign) ? _CFFI_PRIM_INT8 : _CFFI_PRIM_UINT8) : \ - (size) == 2 ? ((sign) ? _CFFI_PRIM_INT16 : _CFFI_PRIM_UINT16) : \ - (size) == 4 ? ((sign) ? _CFFI_PRIM_INT32 : _CFFI_PRIM_UINT32) : \ - (size) == 8 ? ((sign) ? _CFFI_PRIM_INT64 : _CFFI_PRIM_UINT64) : \ - _CFFI__UNKNOWN_PRIM) - -#define _cffi_prim_float(size) \ - ((size) == sizeof(float) ? _CFFI_PRIM_FLOAT : \ - (size) == sizeof(double) ? _CFFI_PRIM_DOUBLE : \ - (size) == sizeof(long double) ? _CFFI__UNKNOWN_LONG_DOUBLE : \ - _CFFI__UNKNOWN_FLOAT_PRIM) - -#define _cffi_check_int(got, got_nonpos, expected) \ - ((got_nonpos) == (expected <= 0) && \ - (got) == (unsigned long long)expected) - -#ifdef MS_WIN32 -# define _cffi_stdcall __stdcall -#else -# define _cffi_stdcall /* nothing */ -#endif - -#ifdef __cplusplus -} -#endif - -/************************************************************/ - - - #include "21cmFAST.h" - - -/************************************************************/ - -static void *_cffi_types[] = { -/* 0 */ _CFFI_OP(_CFFI_OP_FUNCTION, 1), // double()(double) -/* 1 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), // double -/* 2 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), -/* 3 */ _CFFI_OP(_CFFI_OP_FUNCTION, 1), // double()(double, UserParams *, CosmoParams *, AstroParams *, FlagOptions *) -/* 4 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 5 */ _CFFI_OP(_CFFI_OP_POINTER, 514), // UserParams * -/* 6 */ _CFFI_OP(_CFFI_OP_POINTER, 504), // CosmoParams * -/* 7 */ _CFFI_OP(_CFFI_OP_POINTER, 502), // AstroParams * -/* 8 */ _CFFI_OP(_CFFI_OP_POINTER, 505), // FlagOptions * -/* 9 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), -/* 10 */ _CFFI_OP(_CFFI_OP_FUNCTION, 1), // double()(double, double) -/* 11 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 12 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 13 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), -/* 14 */ _CFFI_OP(_CFFI_OP_FUNCTION, 1), // double()(double, double, double) -/* 15 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 16 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 17 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 18 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), -/* 19 */ _CFFI_OP(_CFFI_OP_FUNCTION, 1), // double()(double, double, double, double) -/* 20 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 21 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 22 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 23 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 24 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), -/* 25 */ _CFFI_OP(_CFFI_OP_FUNCTION, 1), // double()(double, double, double, double, double, double, double) -/* 26 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 27 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 28 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 29 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 30 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 31 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 32 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 33 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), -/* 34 */ _CFFI_OP(_CFFI_OP_FUNCTION, 1), // double()(double, double, double, double, double, double, double, double) -/* 35 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 36 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 37 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 38 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 39 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 40 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 41 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 42 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 43 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), -/* 44 */ _CFFI_OP(_CFFI_OP_FUNCTION, 1), // double()(double, double, double, double, double, double, double, double, double) -/* 45 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 46 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 47 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 48 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 49 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 50 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 51 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 52 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 53 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 54 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), -/* 55 */ _CFFI_OP(_CFFI_OP_FUNCTION, 1), // double()(double, double, double, double, double, double, double, double, double, _Bool) -/* 56 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 57 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 58 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 59 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 60 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 61 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 62 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 63 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 64 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 65 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 1), // _Bool -/* 66 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), -/* 67 */ _CFFI_OP(_CFFI_OP_FUNCTION, 1), // double()(double, double, double, double, double, double, double, double, double, double) -/* 68 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 69 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 70 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 71 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 72 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 73 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 74 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 75 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 76 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 77 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 78 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), -/* 79 */ _CFFI_OP(_CFFI_OP_FUNCTION, 1), // double()(double, double, double, double, double, double, double, double, double, double, _Bool) -/* 80 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 81 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 82 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 83 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 84 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 85 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 86 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 87 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 88 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 89 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 90 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 1), -/* 91 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), -/* 92 */ _CFFI_OP(_CFFI_OP_FUNCTION, 1), // double()(double, double, double, double, double, double, double, double, double, double, double) -/* 93 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 94 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 95 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 96 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 97 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 98 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 99 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 100 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 101 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 102 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 103 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 104 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), -/* 105 */ _CFFI_OP(_CFFI_OP_FUNCTION, 1), // double()(double, double, double, double, double, double, double, double, double, double, double, double, double, double, int) -/* 106 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 107 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 108 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 109 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 110 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 111 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 112 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 113 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 114 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 115 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 116 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 117 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 118 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 119 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 120 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 7), // int -/* 121 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), -/* 122 */ _CFFI_OP(_CFFI_OP_FUNCTION, 1), // double()(double, double, double, double, double, double, double, double, double, double, double, double, double, int) -/* 123 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 124 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 125 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 126 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 127 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 128 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 129 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 130 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 131 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 132 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 133 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 134 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 135 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 136 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 7), -/* 137 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), -/* 138 */ _CFFI_OP(_CFFI_OP_FUNCTION, 1), // double()(double, double, double, double, double, double, int) -/* 139 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 140 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 141 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 142 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 143 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 144 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 145 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 7), -/* 146 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), -/* 147 */ _CFFI_OP(_CFFI_OP_FUNCTION, 1), // double()(double, double, double, double, int) -/* 148 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 149 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 150 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 151 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 152 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 7), -/* 153 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), -/* 154 */ _CFFI_OP(_CFFI_OP_FUNCTION, 1), // double()(double, double, double, int) -/* 155 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 156 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 157 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 158 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 7), -/* 159 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), -/* 160 */ _CFFI_OP(_CFFI_OP_FUNCTION, 1), // double()(float) -/* 161 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), // float -/* 162 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), -/* 163 */ _CFFI_OP(_CFFI_OP_FUNCTION, 1), // double()(int, double, double) -/* 164 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 7), -/* 165 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 166 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 167 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), -/* 168 */ _CFFI_OP(_CFFI_OP_FUNCTION, 1), // double()(void) -/* 169 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), -/* 170 */ _CFFI_OP(_CFFI_OP_FUNCTION, 161), // float()(UserParams *, CosmoParams *, int, float *, float *) -/* 171 */ _CFFI_OP(_CFFI_OP_NOOP, 5), -/* 172 */ _CFFI_OP(_CFFI_OP_NOOP, 6), -/* 173 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 7), -/* 174 */ _CFFI_OP(_CFFI_OP_POINTER, 161), // float * -/* 175 */ _CFFI_OP(_CFFI_OP_NOOP, 174), -/* 176 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), -/* 177 */ _CFFI_OP(_CFFI_OP_FUNCTION, 120), // int()(UserParams *, CosmoParams *) -/* 178 */ _CFFI_OP(_CFFI_OP_NOOP, 5), -/* 179 */ _CFFI_OP(_CFFI_OP_NOOP, 6), -/* 180 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), -/* 181 */ _CFFI_OP(_CFFI_OP_FUNCTION, 120), // int()(UserParams *, CosmoParams *, AstroParams *, FlagOptions *) -/* 182 */ _CFFI_OP(_CFFI_OP_NOOP, 5), -/* 183 */ _CFFI_OP(_CFFI_OP_NOOP, 6), -/* 184 */ _CFFI_OP(_CFFI_OP_NOOP, 7), -/* 185 */ _CFFI_OP(_CFFI_OP_NOOP, 8), -/* 186 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), -/* 187 */ _CFFI_OP(_CFFI_OP_FUNCTION, 120), // int()(UserParams *, CosmoParams *, AstroParams *, FlagOptions *, HaloBox *, double, double, int, XraySourceBox *) -/* 188 */ _CFFI_OP(_CFFI_OP_NOOP, 5), -/* 189 */ _CFFI_OP(_CFFI_OP_NOOP, 6), -/* 190 */ _CFFI_OP(_CFFI_OP_NOOP, 7), -/* 191 */ _CFFI_OP(_CFFI_OP_NOOP, 8), -/* 192 */ _CFFI_OP(_CFFI_OP_POINTER, 507), // HaloBox * -/* 193 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 194 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 195 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 7), -/* 196 */ _CFFI_OP(_CFFI_OP_POINTER, 515), // XraySourceBox * -/* 197 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), -/* 198 */ _CFFI_OP(_CFFI_OP_FUNCTION, 120), // int()(UserParams *, CosmoParams *, AstroParams *, FlagOptions *, float *, double, double, int, double *) -/* 199 */ _CFFI_OP(_CFFI_OP_NOOP, 5), -/* 200 */ _CFFI_OP(_CFFI_OP_NOOP, 6), -/* 201 */ _CFFI_OP(_CFFI_OP_NOOP, 7), -/* 202 */ _CFFI_OP(_CFFI_OP_NOOP, 8), -/* 203 */ _CFFI_OP(_CFFI_OP_NOOP, 174), -/* 204 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 205 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 206 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 7), -/* 207 */ _CFFI_OP(_CFFI_OP_POINTER, 1), // double * -/* 208 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), -/* 209 */ _CFFI_OP(_CFFI_OP_FUNCTION, 120), // int()(UserParams *, CosmoParams *, AstroParams *, FlagOptions *, int, int, float *, int *, double, double, int *, int *, double *, double *, double *, float *, int *) -/* 210 */ _CFFI_OP(_CFFI_OP_NOOP, 5), -/* 211 */ _CFFI_OP(_CFFI_OP_NOOP, 6), -/* 212 */ _CFFI_OP(_CFFI_OP_NOOP, 7), -/* 213 */ _CFFI_OP(_CFFI_OP_NOOP, 8), -/* 214 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 7), -/* 215 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 7), -/* 216 */ _CFFI_OP(_CFFI_OP_NOOP, 174), -/* 217 */ _CFFI_OP(_CFFI_OP_POINTER, 120), // int * -/* 218 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 219 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 220 */ _CFFI_OP(_CFFI_OP_NOOP, 217), -/* 221 */ _CFFI_OP(_CFFI_OP_NOOP, 217), -/* 222 */ _CFFI_OP(_CFFI_OP_NOOP, 207), -/* 223 */ _CFFI_OP(_CFFI_OP_NOOP, 207), -/* 224 */ _CFFI_OP(_CFFI_OP_NOOP, 207), -/* 225 */ _CFFI_OP(_CFFI_OP_NOOP, 174), -/* 226 */ _CFFI_OP(_CFFI_OP_NOOP, 217), -/* 227 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), -/* 228 */ _CFFI_OP(_CFFI_OP_FUNCTION, 120), // int()(_Bool) -/* 229 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 1), -/* 230 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), -/* 231 */ _CFFI_OP(_CFFI_OP_FUNCTION, 120), // int()(_Bool, _Bool, double *) -/* 232 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 1), -/* 233 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 1), -/* 234 */ _CFFI_OP(_CFFI_OP_NOOP, 207), -/* 235 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), -/* 236 */ _CFFI_OP(_CFFI_OP_FUNCTION, 120), // int()(double *) -/* 237 */ _CFFI_OP(_CFFI_OP_NOOP, 207), -/* 238 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), -/* 239 */ _CFFI_OP(_CFFI_OP_FUNCTION, 120), // int()(double *, double *, int *, double *, double *, int *, double *, double *, int *) -/* 240 */ _CFFI_OP(_CFFI_OP_NOOP, 207), -/* 241 */ _CFFI_OP(_CFFI_OP_NOOP, 207), -/* 242 */ _CFFI_OP(_CFFI_OP_NOOP, 217), -/* 243 */ _CFFI_OP(_CFFI_OP_NOOP, 207), -/* 244 */ _CFFI_OP(_CFFI_OP_NOOP, 207), -/* 245 */ _CFFI_OP(_CFFI_OP_NOOP, 217), -/* 246 */ _CFFI_OP(_CFFI_OP_NOOP, 207), -/* 247 */ _CFFI_OP(_CFFI_OP_NOOP, 207), -/* 248 */ _CFFI_OP(_CFFI_OP_NOOP, 217), -/* 249 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), -/* 250 */ _CFFI_OP(_CFFI_OP_FUNCTION, 120), // int()(double *, double *, int) -/* 251 */ _CFFI_OP(_CFFI_OP_NOOP, 207), -/* 252 */ _CFFI_OP(_CFFI_OP_NOOP, 207), -/* 253 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 7), -/* 254 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), -/* 255 */ _CFFI_OP(_CFFI_OP_FUNCTION, 120), // int()(double, UserParams *, CosmoParams *, AstroParams *, FlagOptions *, InitialConditions *, PerturbedField *, PerturbHaloField *, TsBox *, IonizedBox *, HaloBox *) -/* 256 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 257 */ _CFFI_OP(_CFFI_OP_NOOP, 5), -/* 258 */ _CFFI_OP(_CFFI_OP_NOOP, 6), -/* 259 */ _CFFI_OP(_CFFI_OP_NOOP, 7), -/* 260 */ _CFFI_OP(_CFFI_OP_NOOP, 8), -/* 261 */ _CFFI_OP(_CFFI_OP_POINTER, 509), // InitialConditions * -/* 262 */ _CFFI_OP(_CFFI_OP_POINTER, 512), // PerturbedField * -/* 263 */ _CFFI_OP(_CFFI_OP_POINTER, 511), // PerturbHaloField * -/* 264 */ _CFFI_OP(_CFFI_OP_POINTER, 513), // TsBox * -/* 265 */ _CFFI_OP(_CFFI_OP_POINTER, 510), // IonizedBox * -/* 266 */ _CFFI_OP(_CFFI_OP_NOOP, 192), -/* 267 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), -/* 268 */ _CFFI_OP(_CFFI_OP_FUNCTION, 120), // int()(double, UserParams *, CosmoParams *, AstroParams *, FlagOptions *, float *, float *, float *, float *, PerturbHaloField *, float *) -/* 269 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 270 */ _CFFI_OP(_CFFI_OP_NOOP, 5), -/* 271 */ _CFFI_OP(_CFFI_OP_NOOP, 6), -/* 272 */ _CFFI_OP(_CFFI_OP_NOOP, 7), -/* 273 */ _CFFI_OP(_CFFI_OP_NOOP, 8), -/* 274 */ _CFFI_OP(_CFFI_OP_NOOP, 174), -/* 275 */ _CFFI_OP(_CFFI_OP_NOOP, 174), -/* 276 */ _CFFI_OP(_CFFI_OP_NOOP, 174), -/* 277 */ _CFFI_OP(_CFFI_OP_NOOP, 174), -/* 278 */ _CFFI_OP(_CFFI_OP_NOOP, 263), -/* 279 */ _CFFI_OP(_CFFI_OP_NOOP, 174), -/* 280 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), -/* 281 */ _CFFI_OP(_CFFI_OP_FUNCTION, 120), // int()(float, UserParams *, CosmoParams *, AstroParams *, FlagOptions *, InitialConditions *, HaloField *, PerturbHaloField *) -/* 282 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), -/* 283 */ _CFFI_OP(_CFFI_OP_NOOP, 5), -/* 284 */ _CFFI_OP(_CFFI_OP_NOOP, 6), -/* 285 */ _CFFI_OP(_CFFI_OP_NOOP, 7), -/* 286 */ _CFFI_OP(_CFFI_OP_NOOP, 8), -/* 287 */ _CFFI_OP(_CFFI_OP_NOOP, 261), -/* 288 */ _CFFI_OP(_CFFI_OP_POINTER, 508), // HaloField * -/* 289 */ _CFFI_OP(_CFFI_OP_NOOP, 263), -/* 290 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), -/* 291 */ _CFFI_OP(_CFFI_OP_FUNCTION, 120), // int()(float, UserParams *, CosmoParams *, AstroParams *, FlagOptions *, TsBox *, IonizedBox *, PerturbedField *, BrightnessTemp *) -/* 292 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), -/* 293 */ _CFFI_OP(_CFFI_OP_NOOP, 5), -/* 294 */ _CFFI_OP(_CFFI_OP_NOOP, 6), -/* 295 */ _CFFI_OP(_CFFI_OP_NOOP, 7), -/* 296 */ _CFFI_OP(_CFFI_OP_NOOP, 8), -/* 297 */ _CFFI_OP(_CFFI_OP_NOOP, 264), -/* 298 */ _CFFI_OP(_CFFI_OP_NOOP, 265), -/* 299 */ _CFFI_OP(_CFFI_OP_NOOP, 262), -/* 300 */ _CFFI_OP(_CFFI_OP_POINTER, 503), // BrightnessTemp * -/* 301 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), -/* 302 */ _CFFI_OP(_CFFI_OP_FUNCTION, 120), // int()(float, UserParams *, CosmoParams *, InitialConditions *, PerturbedField *) -/* 303 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), -/* 304 */ _CFFI_OP(_CFFI_OP_NOOP, 5), -/* 305 */ _CFFI_OP(_CFFI_OP_NOOP, 6), -/* 306 */ _CFFI_OP(_CFFI_OP_NOOP, 261), -/* 307 */ _CFFI_OP(_CFFI_OP_NOOP, 262), -/* 308 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), -/* 309 */ _CFFI_OP(_CFFI_OP_FUNCTION, 120), // int()(float, float, UserParams *, CosmoParams *, AstroParams *, FlagOptions *, InitialConditions *, unsigned long long, HaloField *, HaloField *) -/* 310 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), -/* 311 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), -/* 312 */ _CFFI_OP(_CFFI_OP_NOOP, 5), -/* 313 */ _CFFI_OP(_CFFI_OP_NOOP, 6), -/* 314 */ _CFFI_OP(_CFFI_OP_NOOP, 7), -/* 315 */ _CFFI_OP(_CFFI_OP_NOOP, 8), -/* 316 */ _CFFI_OP(_CFFI_OP_NOOP, 261), -/* 317 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 12), // unsigned long long -/* 318 */ _CFFI_OP(_CFFI_OP_NOOP, 288), -/* 319 */ _CFFI_OP(_CFFI_OP_NOOP, 288), -/* 320 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), -/* 321 */ _CFFI_OP(_CFFI_OP_FUNCTION, 120), // int()(float, float, UserParams *, CosmoParams *, AstroParams *, FlagOptions *, PerturbedField *, PerturbedField *, IonizedBox *, TsBox *, HaloBox *, InitialConditions *, IonizedBox *) -/* 322 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), -/* 323 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), -/* 324 */ _CFFI_OP(_CFFI_OP_NOOP, 5), -/* 325 */ _CFFI_OP(_CFFI_OP_NOOP, 6), -/* 326 */ _CFFI_OP(_CFFI_OP_NOOP, 7), -/* 327 */ _CFFI_OP(_CFFI_OP_NOOP, 8), -/* 328 */ _CFFI_OP(_CFFI_OP_NOOP, 262), -/* 329 */ _CFFI_OP(_CFFI_OP_NOOP, 262), -/* 330 */ _CFFI_OP(_CFFI_OP_NOOP, 265), -/* 331 */ _CFFI_OP(_CFFI_OP_NOOP, 264), -/* 332 */ _CFFI_OP(_CFFI_OP_NOOP, 192), -/* 333 */ _CFFI_OP(_CFFI_OP_NOOP, 261), -/* 334 */ _CFFI_OP(_CFFI_OP_NOOP, 265), -/* 335 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), -/* 336 */ _CFFI_OP(_CFFI_OP_FUNCTION, 120), // int()(float, float, UserParams *, CosmoParams *, AstroParams *, FlagOptions *, float, short, PerturbedField *, XraySourceBox *, TsBox *, InitialConditions *, TsBox *) -/* 337 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), -/* 338 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), -/* 339 */ _CFFI_OP(_CFFI_OP_NOOP, 5), -/* 340 */ _CFFI_OP(_CFFI_OP_NOOP, 6), -/* 341 */ _CFFI_OP(_CFFI_OP_NOOP, 7), -/* 342 */ _CFFI_OP(_CFFI_OP_NOOP, 8), -/* 343 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), -/* 344 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 5), // short -/* 345 */ _CFFI_OP(_CFFI_OP_NOOP, 262), -/* 346 */ _CFFI_OP(_CFFI_OP_NOOP, 196), -/* 347 */ _CFFI_OP(_CFFI_OP_NOOP, 264), -/* 348 */ _CFFI_OP(_CFFI_OP_NOOP, 261), -/* 349 */ _CFFI_OP(_CFFI_OP_NOOP, 264), -/* 350 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), -/* 351 */ _CFFI_OP(_CFFI_OP_FUNCTION, 120), // int()(int, UserParams *, CosmoParams *, AstroParams *, FlagOptions *, int, int, float *, float *, double *, double *, double *) -/* 352 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 7), -/* 353 */ _CFFI_OP(_CFFI_OP_NOOP, 5), -/* 354 */ _CFFI_OP(_CFFI_OP_NOOP, 6), -/* 355 */ _CFFI_OP(_CFFI_OP_NOOP, 7), -/* 356 */ _CFFI_OP(_CFFI_OP_NOOP, 8), -/* 357 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 7), -/* 358 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 7), -/* 359 */ _CFFI_OP(_CFFI_OP_NOOP, 174), -/* 360 */ _CFFI_OP(_CFFI_OP_NOOP, 174), -/* 361 */ _CFFI_OP(_CFFI_OP_NOOP, 207), -/* 362 */ _CFFI_OP(_CFFI_OP_NOOP, 207), -/* 363 */ _CFFI_OP(_CFFI_OP_NOOP, 207), -/* 364 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), -/* 365 */ _CFFI_OP(_CFFI_OP_FUNCTION, 120), // int()(unsigned long long, UserParams *, CosmoParams *, InitialConditions *) -/* 366 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 12), -/* 367 */ _CFFI_OP(_CFFI_OP_NOOP, 5), -/* 368 */ _CFFI_OP(_CFFI_OP_NOOP, 6), -/* 369 */ _CFFI_OP(_CFFI_OP_NOOP, 261), -/* 370 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), -/* 371 */ _CFFI_OP(_CFFI_OP_FUNCTION, 120), // int()(void) -/* 372 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), -/* 373 */ _CFFI_OP(_CFFI_OP_FUNCTION, 518), // void()(AstroParams *, FlagOptions *, float *, float *, float *) -/* 374 */ _CFFI_OP(_CFFI_OP_NOOP, 7), -/* 375 */ _CFFI_OP(_CFFI_OP_NOOP, 8), -/* 376 */ _CFFI_OP(_CFFI_OP_NOOP, 174), -/* 377 */ _CFFI_OP(_CFFI_OP_NOOP, 174), -/* 378 */ _CFFI_OP(_CFFI_OP_NOOP, 174), -/* 379 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), -/* 380 */ _CFFI_OP(_CFFI_OP_FUNCTION, 518), // void()(UserParams *, CosmoParams *) -/* 381 */ _CFFI_OP(_CFFI_OP_NOOP, 5), -/* 382 */ _CFFI_OP(_CFFI_OP_NOOP, 6), -/* 383 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), -/* 384 */ _CFFI_OP(_CFFI_OP_FUNCTION, 518), // void()(UserParams *, CosmoParams *, AstroParams *, FlagOptions *) -/* 385 */ _CFFI_OP(_CFFI_OP_NOOP, 5), -/* 386 */ _CFFI_OP(_CFFI_OP_NOOP, 6), -/* 387 */ _CFFI_OP(_CFFI_OP_NOOP, 7), -/* 388 */ _CFFI_OP(_CFFI_OP_NOOP, 8), -/* 389 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), -/* 390 */ _CFFI_OP(_CFFI_OP_FUNCTION, 518), // void()(double, double) -/* 391 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 392 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 393 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), -/* 394 */ _CFFI_OP(_CFFI_OP_FUNCTION, 518), // void()(double, double, _Bool) -/* 395 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 396 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 397 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 1), -/* 398 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), -/* 399 */ _CFFI_OP(_CFFI_OP_FUNCTION, 518), // void()(double, double, double, double, double, _Bool) -/* 400 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 401 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 402 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 403 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 404 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 405 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 1), -/* 406 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), -/* 407 */ _CFFI_OP(_CFFI_OP_FUNCTION, 518), // void()(double, double, double, double, double, double) -/* 408 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 409 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 410 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 411 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 412 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 413 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 414 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), -/* 415 */ _CFFI_OP(_CFFI_OP_FUNCTION, 518), // void()(double, double, double, double, double, double, _Bool) -/* 416 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 417 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 418 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 419 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 420 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 421 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 422 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 1), -/* 423 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), -/* 424 */ _CFFI_OP(_CFFI_OP_FUNCTION, 518), // void()(double, double, double, float, double, double, double, float, float, float, float, int, int, _Bool) -/* 425 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 426 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 427 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 428 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), -/* 429 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 430 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 431 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 14), -/* 432 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), -/* 433 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), -/* 434 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), -/* 435 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), -/* 436 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 7), -/* 437 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 7), -/* 438 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 1), -/* 439 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), -/* 440 */ _CFFI_OP(_CFFI_OP_FUNCTION, 518), // void()(float, float) -/* 441 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), -/* 442 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), -/* 443 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), -/* 444 */ _CFFI_OP(_CFFI_OP_FUNCTION, 518), // void()(float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, int, int, _Bool, _Bool) -/* 445 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), -/* 446 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), -/* 447 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), -/* 448 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), -/* 449 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), -/* 450 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), -/* 451 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), -/* 452 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), -/* 453 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), -/* 454 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), -/* 455 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), -/* 456 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), -/* 457 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), -/* 458 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), -/* 459 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), -/* 460 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), -/* 461 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), -/* 462 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), -/* 463 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), -/* 464 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), -/* 465 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), -/* 466 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), -/* 467 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 7), -/* 468 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 7), -/* 469 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 1), -/* 470 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 1), -/* 471 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), -/* 472 */ _CFFI_OP(_CFFI_OP_FUNCTION, 518), // void()(int, float, float, float, float, float, float, float, _Bool) -/* 473 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 7), -/* 474 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), -/* 475 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), -/* 476 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), -/* 477 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), -/* 478 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), -/* 479 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), -/* 480 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), -/* 481 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 1), -/* 482 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), -/* 483 */ _CFFI_OP(_CFFI_OP_FUNCTION, 518), // void()(int, float, float, float, float, float, float, float, float, float, float, _Bool) -/* 484 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 7), -/* 485 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), -/* 486 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), -/* 487 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), -/* 488 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), -/* 489 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), -/* 490 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), -/* 491 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), -/* 492 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), -/* 493 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), -/* 494 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), -/* 495 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 1), -/* 496 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), -/* 497 */ _CFFI_OP(_CFFI_OP_FUNCTION, 518), // void()(void *) -/* 498 */ _CFFI_OP(_CFFI_OP_POINTER, 518), // void * -/* 499 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), -/* 500 */ _CFFI_OP(_CFFI_OP_FUNCTION, 518), // void()(void) -/* 501 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), -/* 502 */ _CFFI_OP(_CFFI_OP_STRUCT_UNION, 0), // AstroParams -/* 503 */ _CFFI_OP(_CFFI_OP_STRUCT_UNION, 1), // BrightnessTemp -/* 504 */ _CFFI_OP(_CFFI_OP_STRUCT_UNION, 2), // CosmoParams -/* 505 */ _CFFI_OP(_CFFI_OP_STRUCT_UNION, 3), // FlagOptions -/* 506 */ _CFFI_OP(_CFFI_OP_STRUCT_UNION, 4), // GlobalParams -/* 507 */ _CFFI_OP(_CFFI_OP_STRUCT_UNION, 5), // HaloBox -/* 508 */ _CFFI_OP(_CFFI_OP_STRUCT_UNION, 6), // HaloField -/* 509 */ _CFFI_OP(_CFFI_OP_STRUCT_UNION, 7), // InitialConditions -/* 510 */ _CFFI_OP(_CFFI_OP_STRUCT_UNION, 8), // IonizedBox -/* 511 */ _CFFI_OP(_CFFI_OP_STRUCT_UNION, 9), // PerturbHaloField -/* 512 */ _CFFI_OP(_CFFI_OP_STRUCT_UNION, 10), // PerturbedField -/* 513 */ _CFFI_OP(_CFFI_OP_STRUCT_UNION, 11), // TsBox -/* 514 */ _CFFI_OP(_CFFI_OP_STRUCT_UNION, 12), // UserParams -/* 515 */ _CFFI_OP(_CFFI_OP_STRUCT_UNION, 13), // XraySourceBox -/* 516 */ _CFFI_OP(_CFFI_OP_POINTER, 517), // char * -/* 517 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 2), // char -/* 518 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 0), // void -}; - -static void _cffi_d_Broadcast_struct_global_all(UserParams * x0, CosmoParams * x1, AstroParams * x2, FlagOptions * x3) -{ - Broadcast_struct_global_all(x0, x1, x2, x3); -} -#ifndef PYPY_VERSION -static PyObject * -_cffi_f_Broadcast_struct_global_all(PyObject *self, PyObject *args) -{ - UserParams * x0; - CosmoParams * x1; - AstroParams * x2; - FlagOptions * x3; - Py_ssize_t datasize; - struct _cffi_freeme_s *large_args_free = NULL; - PyObject *arg0; - PyObject *arg1; - PyObject *arg2; - PyObject *arg3; - - if (!PyArg_UnpackTuple(args, "Broadcast_struct_global_all", 4, 4, &arg0, &arg1, &arg2, &arg3)) - return NULL; - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(5), arg0, (char **)&x0); - if (datasize != 0) { - x0 = ((size_t)datasize) <= 640 ? (UserParams *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(5), arg0, (char **)&x0, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(6), arg1, (char **)&x1); - if (datasize != 0) { - x1 = ((size_t)datasize) <= 640 ? (CosmoParams *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(6), arg1, (char **)&x1, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(7), arg2, (char **)&x2); - if (datasize != 0) { - x2 = ((size_t)datasize) <= 640 ? (AstroParams *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(7), arg2, (char **)&x2, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(8), arg3, (char **)&x3); - if (datasize != 0) { - x3 = ((size_t)datasize) <= 640 ? (FlagOptions *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(8), arg3, (char **)&x3, - datasize, &large_args_free) < 0) - return NULL; - } - - Py_BEGIN_ALLOW_THREADS - _cffi_restore_errno(); - { Broadcast_struct_global_all(x0, x1, x2, x3); } - _cffi_save_errno(); - Py_END_ALLOW_THREADS - - (void)self; /* unused */ - if (large_args_free != NULL) _cffi_free_array_arguments(large_args_free); - Py_INCREF(Py_None); - return Py_None; -} -#else -# define _cffi_f_Broadcast_struct_global_all _cffi_d_Broadcast_struct_global_all -#endif - -static void _cffi_d_Broadcast_struct_global_noastro(UserParams * x0, CosmoParams * x1) -{ - Broadcast_struct_global_noastro(x0, x1); -} -#ifndef PYPY_VERSION -static PyObject * -_cffi_f_Broadcast_struct_global_noastro(PyObject *self, PyObject *args) -{ - UserParams * x0; - CosmoParams * x1; - Py_ssize_t datasize; - struct _cffi_freeme_s *large_args_free = NULL; - PyObject *arg0; - PyObject *arg1; - - if (!PyArg_UnpackTuple(args, "Broadcast_struct_global_noastro", 2, 2, &arg0, &arg1)) - return NULL; - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(5), arg0, (char **)&x0); - if (datasize != 0) { - x0 = ((size_t)datasize) <= 640 ? (UserParams *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(5), arg0, (char **)&x0, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(6), arg1, (char **)&x1); - if (datasize != 0) { - x1 = ((size_t)datasize) <= 640 ? (CosmoParams *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(6), arg1, (char **)&x1, - datasize, &large_args_free) < 0) - return NULL; - } - - Py_BEGIN_ALLOW_THREADS - _cffi_restore_errno(); - { Broadcast_struct_global_noastro(x0, x1); } - _cffi_save_errno(); - Py_END_ALLOW_THREADS - - (void)self; /* unused */ - if (large_args_free != NULL) _cffi_free_array_arguments(large_args_free); - Py_INCREF(Py_None); - return Py_None; -} -#else -# define _cffi_f_Broadcast_struct_global_noastro _cffi_d_Broadcast_struct_global_noastro -#endif - -static int _cffi_d_ComputeBrightnessTemp(float x0, UserParams * x1, CosmoParams * x2, AstroParams * x3, FlagOptions * x4, TsBox * x5, IonizedBox * x6, PerturbedField * x7, BrightnessTemp * x8) -{ - return ComputeBrightnessTemp(x0, x1, x2, x3, x4, x5, x6, x7, x8); -} -#ifndef PYPY_VERSION -static PyObject * -_cffi_f_ComputeBrightnessTemp(PyObject *self, PyObject *args) -{ - float x0; - UserParams * x1; - CosmoParams * x2; - AstroParams * x3; - FlagOptions * x4; - TsBox * x5; - IonizedBox * x6; - PerturbedField * x7; - BrightnessTemp * x8; - Py_ssize_t datasize; - struct _cffi_freeme_s *large_args_free = NULL; - int result; - PyObject *pyresult; - PyObject *arg0; - PyObject *arg1; - PyObject *arg2; - PyObject *arg3; - PyObject *arg4; - PyObject *arg5; - PyObject *arg6; - PyObject *arg7; - PyObject *arg8; - - if (!PyArg_UnpackTuple(args, "ComputeBrightnessTemp", 9, 9, &arg0, &arg1, &arg2, &arg3, &arg4, &arg5, &arg6, &arg7, &arg8)) - return NULL; - - x0 = (float)_cffi_to_c_float(arg0); - if (x0 == (float)-1 && PyErr_Occurred()) - return NULL; - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(5), arg1, (char **)&x1); - if (datasize != 0) { - x1 = ((size_t)datasize) <= 640 ? (UserParams *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(5), arg1, (char **)&x1, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(6), arg2, (char **)&x2); - if (datasize != 0) { - x2 = ((size_t)datasize) <= 640 ? (CosmoParams *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(6), arg2, (char **)&x2, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(7), arg3, (char **)&x3); - if (datasize != 0) { - x3 = ((size_t)datasize) <= 640 ? (AstroParams *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(7), arg3, (char **)&x3, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(8), arg4, (char **)&x4); - if (datasize != 0) { - x4 = ((size_t)datasize) <= 640 ? (FlagOptions *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(8), arg4, (char **)&x4, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(264), arg5, (char **)&x5); - if (datasize != 0) { - x5 = ((size_t)datasize) <= 640 ? (TsBox *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(264), arg5, (char **)&x5, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(265), arg6, (char **)&x6); - if (datasize != 0) { - x6 = ((size_t)datasize) <= 640 ? (IonizedBox *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(265), arg6, (char **)&x6, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(262), arg7, (char **)&x7); - if (datasize != 0) { - x7 = ((size_t)datasize) <= 640 ? (PerturbedField *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(262), arg7, (char **)&x7, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(300), arg8, (char **)&x8); - if (datasize != 0) { - x8 = ((size_t)datasize) <= 640 ? (BrightnessTemp *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(300), arg8, (char **)&x8, - datasize, &large_args_free) < 0) - return NULL; - } - - Py_BEGIN_ALLOW_THREADS - _cffi_restore_errno(); - { result = ComputeBrightnessTemp(x0, x1, x2, x3, x4, x5, x6, x7, x8); } - _cffi_save_errno(); - Py_END_ALLOW_THREADS - - (void)self; /* unused */ - pyresult = _cffi_from_c_int(result, int); - if (large_args_free != NULL) _cffi_free_array_arguments(large_args_free); - return pyresult; -} -#else -# define _cffi_f_ComputeBrightnessTemp _cffi_d_ComputeBrightnessTemp -#endif - -static int _cffi_d_ComputeHaloBox(double x0, UserParams * x1, CosmoParams * x2, AstroParams * x3, FlagOptions * x4, InitialConditions * x5, PerturbedField * x6, PerturbHaloField * x7, TsBox * x8, IonizedBox * x9, HaloBox * x10) -{ - return ComputeHaloBox(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10); -} -#ifndef PYPY_VERSION -static PyObject * -_cffi_f_ComputeHaloBox(PyObject *self, PyObject *args) -{ - double x0; - UserParams * x1; - CosmoParams * x2; - AstroParams * x3; - FlagOptions * x4; - InitialConditions * x5; - PerturbedField * x6; - PerturbHaloField * x7; - TsBox * x8; - IonizedBox * x9; - HaloBox * x10; - Py_ssize_t datasize; - struct _cffi_freeme_s *large_args_free = NULL; - int result; - PyObject *pyresult; - PyObject *arg0; - PyObject *arg1; - PyObject *arg2; - PyObject *arg3; - PyObject *arg4; - PyObject *arg5; - PyObject *arg6; - PyObject *arg7; - PyObject *arg8; - PyObject *arg9; - PyObject *arg10; - - if (!PyArg_UnpackTuple(args, "ComputeHaloBox", 11, 11, &arg0, &arg1, &arg2, &arg3, &arg4, &arg5, &arg6, &arg7, &arg8, &arg9, &arg10)) - return NULL; - - x0 = (double)_cffi_to_c_double(arg0); - if (x0 == (double)-1 && PyErr_Occurred()) - return NULL; - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(5), arg1, (char **)&x1); - if (datasize != 0) { - x1 = ((size_t)datasize) <= 640 ? (UserParams *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(5), arg1, (char **)&x1, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(6), arg2, (char **)&x2); - if (datasize != 0) { - x2 = ((size_t)datasize) <= 640 ? (CosmoParams *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(6), arg2, (char **)&x2, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(7), arg3, (char **)&x3); - if (datasize != 0) { - x3 = ((size_t)datasize) <= 640 ? (AstroParams *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(7), arg3, (char **)&x3, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(8), arg4, (char **)&x4); - if (datasize != 0) { - x4 = ((size_t)datasize) <= 640 ? (FlagOptions *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(8), arg4, (char **)&x4, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(261), arg5, (char **)&x5); - if (datasize != 0) { - x5 = ((size_t)datasize) <= 640 ? (InitialConditions *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(261), arg5, (char **)&x5, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(262), arg6, (char **)&x6); - if (datasize != 0) { - x6 = ((size_t)datasize) <= 640 ? (PerturbedField *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(262), arg6, (char **)&x6, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(263), arg7, (char **)&x7); - if (datasize != 0) { - x7 = ((size_t)datasize) <= 640 ? (PerturbHaloField *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(263), arg7, (char **)&x7, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(264), arg8, (char **)&x8); - if (datasize != 0) { - x8 = ((size_t)datasize) <= 640 ? (TsBox *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(264), arg8, (char **)&x8, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(265), arg9, (char **)&x9); - if (datasize != 0) { - x9 = ((size_t)datasize) <= 640 ? (IonizedBox *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(265), arg9, (char **)&x9, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(192), arg10, (char **)&x10); - if (datasize != 0) { - x10 = ((size_t)datasize) <= 640 ? (HaloBox *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(192), arg10, (char **)&x10, - datasize, &large_args_free) < 0) - return NULL; - } - - Py_BEGIN_ALLOW_THREADS - _cffi_restore_errno(); - { result = ComputeHaloBox(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10); } - _cffi_save_errno(); - Py_END_ALLOW_THREADS - - (void)self; /* unused */ - pyresult = _cffi_from_c_int(result, int); - if (large_args_free != NULL) _cffi_free_array_arguments(large_args_free); - return pyresult; -} -#else -# define _cffi_f_ComputeHaloBox _cffi_d_ComputeHaloBox -#endif - -static int _cffi_d_ComputeHaloField(float x0, float x1, UserParams * x2, CosmoParams * x3, AstroParams * x4, FlagOptions * x5, InitialConditions * x6, unsigned long long x7, HaloField * x8, HaloField * x9) -{ - return ComputeHaloField(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9); -} -#ifndef PYPY_VERSION -static PyObject * -_cffi_f_ComputeHaloField(PyObject *self, PyObject *args) -{ - float x0; - float x1; - UserParams * x2; - CosmoParams * x3; - AstroParams * x4; - FlagOptions * x5; - InitialConditions * x6; - unsigned long long x7; - HaloField * x8; - HaloField * x9; - Py_ssize_t datasize; - struct _cffi_freeme_s *large_args_free = NULL; - int result; - PyObject *pyresult; - PyObject *arg0; - PyObject *arg1; - PyObject *arg2; - PyObject *arg3; - PyObject *arg4; - PyObject *arg5; - PyObject *arg6; - PyObject *arg7; - PyObject *arg8; - PyObject *arg9; - - if (!PyArg_UnpackTuple(args, "ComputeHaloField", 10, 10, &arg0, &arg1, &arg2, &arg3, &arg4, &arg5, &arg6, &arg7, &arg8, &arg9)) - return NULL; - - x0 = (float)_cffi_to_c_float(arg0); - if (x0 == (float)-1 && PyErr_Occurred()) - return NULL; - - x1 = (float)_cffi_to_c_float(arg1); - if (x1 == (float)-1 && PyErr_Occurred()) - return NULL; - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(5), arg2, (char **)&x2); - if (datasize != 0) { - x2 = ((size_t)datasize) <= 640 ? (UserParams *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(5), arg2, (char **)&x2, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(6), arg3, (char **)&x3); - if (datasize != 0) { - x3 = ((size_t)datasize) <= 640 ? (CosmoParams *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(6), arg3, (char **)&x3, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(7), arg4, (char **)&x4); - if (datasize != 0) { - x4 = ((size_t)datasize) <= 640 ? (AstroParams *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(7), arg4, (char **)&x4, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(8), arg5, (char **)&x5); - if (datasize != 0) { - x5 = ((size_t)datasize) <= 640 ? (FlagOptions *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(8), arg5, (char **)&x5, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(261), arg6, (char **)&x6); - if (datasize != 0) { - x6 = ((size_t)datasize) <= 640 ? (InitialConditions *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(261), arg6, (char **)&x6, - datasize, &large_args_free) < 0) - return NULL; - } - - x7 = _cffi_to_c_int(arg7, unsigned long long); - if (x7 == (unsigned long long)-1 && PyErr_Occurred()) - return NULL; - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(288), arg8, (char **)&x8); - if (datasize != 0) { - x8 = ((size_t)datasize) <= 640 ? (HaloField *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(288), arg8, (char **)&x8, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(288), arg9, (char **)&x9); - if (datasize != 0) { - x9 = ((size_t)datasize) <= 640 ? (HaloField *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(288), arg9, (char **)&x9, - datasize, &large_args_free) < 0) - return NULL; - } - - Py_BEGIN_ALLOW_THREADS - _cffi_restore_errno(); - { result = ComputeHaloField(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9); } - _cffi_save_errno(); - Py_END_ALLOW_THREADS - - (void)self; /* unused */ - pyresult = _cffi_from_c_int(result, int); - if (large_args_free != NULL) _cffi_free_array_arguments(large_args_free); - return pyresult; -} -#else -# define _cffi_f_ComputeHaloField _cffi_d_ComputeHaloField -#endif - -static int _cffi_d_ComputeInitialConditions(unsigned long long x0, UserParams * x1, CosmoParams * x2, InitialConditions * x3) -{ - return ComputeInitialConditions(x0, x1, x2, x3); -} -#ifndef PYPY_VERSION -static PyObject * -_cffi_f_ComputeInitialConditions(PyObject *self, PyObject *args) -{ - unsigned long long x0; - UserParams * x1; - CosmoParams * x2; - InitialConditions * x3; - Py_ssize_t datasize; - struct _cffi_freeme_s *large_args_free = NULL; - int result; - PyObject *pyresult; - PyObject *arg0; - PyObject *arg1; - PyObject *arg2; - PyObject *arg3; - - if (!PyArg_UnpackTuple(args, "ComputeInitialConditions", 4, 4, &arg0, &arg1, &arg2, &arg3)) - return NULL; - - x0 = _cffi_to_c_int(arg0, unsigned long long); - if (x0 == (unsigned long long)-1 && PyErr_Occurred()) - return NULL; - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(5), arg1, (char **)&x1); - if (datasize != 0) { - x1 = ((size_t)datasize) <= 640 ? (UserParams *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(5), arg1, (char **)&x1, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(6), arg2, (char **)&x2); - if (datasize != 0) { - x2 = ((size_t)datasize) <= 640 ? (CosmoParams *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(6), arg2, (char **)&x2, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(261), arg3, (char **)&x3); - if (datasize != 0) { - x3 = ((size_t)datasize) <= 640 ? (InitialConditions *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(261), arg3, (char **)&x3, - datasize, &large_args_free) < 0) - return NULL; - } - - Py_BEGIN_ALLOW_THREADS - _cffi_restore_errno(); - { result = ComputeInitialConditions(x0, x1, x2, x3); } - _cffi_save_errno(); - Py_END_ALLOW_THREADS - - (void)self; /* unused */ - pyresult = _cffi_from_c_int(result, int); - if (large_args_free != NULL) _cffi_free_array_arguments(large_args_free); - return pyresult; -} -#else -# define _cffi_f_ComputeInitialConditions _cffi_d_ComputeInitialConditions -#endif - -static int _cffi_d_ComputeIonizedBox(float x0, float x1, UserParams * x2, CosmoParams * x3, AstroParams * x4, FlagOptions * x5, PerturbedField * x6, PerturbedField * x7, IonizedBox * x8, TsBox * x9, HaloBox * x10, InitialConditions * x11, IonizedBox * x12) -{ - return ComputeIonizedBox(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12); -} -#ifndef PYPY_VERSION -static PyObject * -_cffi_f_ComputeIonizedBox(PyObject *self, PyObject *args) -{ - float x0; - float x1; - UserParams * x2; - CosmoParams * x3; - AstroParams * x4; - FlagOptions * x5; - PerturbedField * x6; - PerturbedField * x7; - IonizedBox * x8; - TsBox * x9; - HaloBox * x10; - InitialConditions * x11; - IonizedBox * x12; - Py_ssize_t datasize; - struct _cffi_freeme_s *large_args_free = NULL; - int result; - PyObject *pyresult; - PyObject *arg0; - PyObject *arg1; - PyObject *arg2; - PyObject *arg3; - PyObject *arg4; - PyObject *arg5; - PyObject *arg6; - PyObject *arg7; - PyObject *arg8; - PyObject *arg9; - PyObject *arg10; - PyObject *arg11; - PyObject *arg12; - - if (!PyArg_UnpackTuple(args, "ComputeIonizedBox", 13, 13, &arg0, &arg1, &arg2, &arg3, &arg4, &arg5, &arg6, &arg7, &arg8, &arg9, &arg10, &arg11, &arg12)) - return NULL; - - x0 = (float)_cffi_to_c_float(arg0); - if (x0 == (float)-1 && PyErr_Occurred()) - return NULL; - - x1 = (float)_cffi_to_c_float(arg1); - if (x1 == (float)-1 && PyErr_Occurred()) - return NULL; - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(5), arg2, (char **)&x2); - if (datasize != 0) { - x2 = ((size_t)datasize) <= 640 ? (UserParams *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(5), arg2, (char **)&x2, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(6), arg3, (char **)&x3); - if (datasize != 0) { - x3 = ((size_t)datasize) <= 640 ? (CosmoParams *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(6), arg3, (char **)&x3, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(7), arg4, (char **)&x4); - if (datasize != 0) { - x4 = ((size_t)datasize) <= 640 ? (AstroParams *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(7), arg4, (char **)&x4, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(8), arg5, (char **)&x5); - if (datasize != 0) { - x5 = ((size_t)datasize) <= 640 ? (FlagOptions *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(8), arg5, (char **)&x5, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(262), arg6, (char **)&x6); - if (datasize != 0) { - x6 = ((size_t)datasize) <= 640 ? (PerturbedField *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(262), arg6, (char **)&x6, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(262), arg7, (char **)&x7); - if (datasize != 0) { - x7 = ((size_t)datasize) <= 640 ? (PerturbedField *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(262), arg7, (char **)&x7, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(265), arg8, (char **)&x8); - if (datasize != 0) { - x8 = ((size_t)datasize) <= 640 ? (IonizedBox *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(265), arg8, (char **)&x8, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(264), arg9, (char **)&x9); - if (datasize != 0) { - x9 = ((size_t)datasize) <= 640 ? (TsBox *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(264), arg9, (char **)&x9, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(192), arg10, (char **)&x10); - if (datasize != 0) { - x10 = ((size_t)datasize) <= 640 ? (HaloBox *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(192), arg10, (char **)&x10, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(261), arg11, (char **)&x11); - if (datasize != 0) { - x11 = ((size_t)datasize) <= 640 ? (InitialConditions *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(261), arg11, (char **)&x11, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(265), arg12, (char **)&x12); - if (datasize != 0) { - x12 = ((size_t)datasize) <= 640 ? (IonizedBox *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(265), arg12, (char **)&x12, - datasize, &large_args_free) < 0) - return NULL; - } - - Py_BEGIN_ALLOW_THREADS - _cffi_restore_errno(); - { result = ComputeIonizedBox(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12); } - _cffi_save_errno(); - Py_END_ALLOW_THREADS - - (void)self; /* unused */ - pyresult = _cffi_from_c_int(result, int); - if (large_args_free != NULL) _cffi_free_array_arguments(large_args_free); - return pyresult; -} -#else -# define _cffi_f_ComputeIonizedBox _cffi_d_ComputeIonizedBox -#endif - -static int _cffi_d_ComputeLF(int x0, UserParams * x1, CosmoParams * x2, AstroParams * x3, FlagOptions * x4, int x5, int x6, float * x7, float * x8, double * x9, double * x10, double * x11) -{ - return ComputeLF(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11); -} -#ifndef PYPY_VERSION -static PyObject * -_cffi_f_ComputeLF(PyObject *self, PyObject *args) -{ - int x0; - UserParams * x1; - CosmoParams * x2; - AstroParams * x3; - FlagOptions * x4; - int x5; - int x6; - float * x7; - float * x8; - double * x9; - double * x10; - double * x11; - Py_ssize_t datasize; - struct _cffi_freeme_s *large_args_free = NULL; - int result; - PyObject *pyresult; - PyObject *arg0; - PyObject *arg1; - PyObject *arg2; - PyObject *arg3; - PyObject *arg4; - PyObject *arg5; - PyObject *arg6; - PyObject *arg7; - PyObject *arg8; - PyObject *arg9; - PyObject *arg10; - PyObject *arg11; - - if (!PyArg_UnpackTuple(args, "ComputeLF", 12, 12, &arg0, &arg1, &arg2, &arg3, &arg4, &arg5, &arg6, &arg7, &arg8, &arg9, &arg10, &arg11)) - return NULL; - - x0 = _cffi_to_c_int(arg0, int); - if (x0 == (int)-1 && PyErr_Occurred()) - return NULL; - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(5), arg1, (char **)&x1); - if (datasize != 0) { - x1 = ((size_t)datasize) <= 640 ? (UserParams *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(5), arg1, (char **)&x1, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(6), arg2, (char **)&x2); - if (datasize != 0) { - x2 = ((size_t)datasize) <= 640 ? (CosmoParams *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(6), arg2, (char **)&x2, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(7), arg3, (char **)&x3); - if (datasize != 0) { - x3 = ((size_t)datasize) <= 640 ? (AstroParams *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(7), arg3, (char **)&x3, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(8), arg4, (char **)&x4); - if (datasize != 0) { - x4 = ((size_t)datasize) <= 640 ? (FlagOptions *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(8), arg4, (char **)&x4, - datasize, &large_args_free) < 0) - return NULL; - } - - x5 = _cffi_to_c_int(arg5, int); - if (x5 == (int)-1 && PyErr_Occurred()) - return NULL; - - x6 = _cffi_to_c_int(arg6, int); - if (x6 == (int)-1 && PyErr_Occurred()) - return NULL; - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(174), arg7, (char **)&x7); - if (datasize != 0) { - x7 = ((size_t)datasize) <= 640 ? (float *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(174), arg7, (char **)&x7, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(174), arg8, (char **)&x8); - if (datasize != 0) { - x8 = ((size_t)datasize) <= 640 ? (float *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(174), arg8, (char **)&x8, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(207), arg9, (char **)&x9); - if (datasize != 0) { - x9 = ((size_t)datasize) <= 640 ? (double *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(207), arg9, (char **)&x9, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(207), arg10, (char **)&x10); - if (datasize != 0) { - x10 = ((size_t)datasize) <= 640 ? (double *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(207), arg10, (char **)&x10, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(207), arg11, (char **)&x11); - if (datasize != 0) { - x11 = ((size_t)datasize) <= 640 ? (double *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(207), arg11, (char **)&x11, - datasize, &large_args_free) < 0) - return NULL; - } - - Py_BEGIN_ALLOW_THREADS - _cffi_restore_errno(); - { result = ComputeLF(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11); } - _cffi_save_errno(); - Py_END_ALLOW_THREADS - - (void)self; /* unused */ - pyresult = _cffi_from_c_int(result, int); - if (large_args_free != NULL) _cffi_free_array_arguments(large_args_free); - return pyresult; -} -#else -# define _cffi_f_ComputeLF _cffi_d_ComputeLF -#endif - -static int _cffi_d_ComputePerturbField(float x0, UserParams * x1, CosmoParams * x2, InitialConditions * x3, PerturbedField * x4) -{ - return ComputePerturbField(x0, x1, x2, x3, x4); -} -#ifndef PYPY_VERSION -static PyObject * -_cffi_f_ComputePerturbField(PyObject *self, PyObject *args) -{ - float x0; - UserParams * x1; - CosmoParams * x2; - InitialConditions * x3; - PerturbedField * x4; - Py_ssize_t datasize; - struct _cffi_freeme_s *large_args_free = NULL; - int result; - PyObject *pyresult; - PyObject *arg0; - PyObject *arg1; - PyObject *arg2; - PyObject *arg3; - PyObject *arg4; - - if (!PyArg_UnpackTuple(args, "ComputePerturbField", 5, 5, &arg0, &arg1, &arg2, &arg3, &arg4)) - return NULL; - - x0 = (float)_cffi_to_c_float(arg0); - if (x0 == (float)-1 && PyErr_Occurred()) - return NULL; - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(5), arg1, (char **)&x1); - if (datasize != 0) { - x1 = ((size_t)datasize) <= 640 ? (UserParams *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(5), arg1, (char **)&x1, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(6), arg2, (char **)&x2); - if (datasize != 0) { - x2 = ((size_t)datasize) <= 640 ? (CosmoParams *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(6), arg2, (char **)&x2, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(261), arg3, (char **)&x3); - if (datasize != 0) { - x3 = ((size_t)datasize) <= 640 ? (InitialConditions *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(261), arg3, (char **)&x3, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(262), arg4, (char **)&x4); - if (datasize != 0) { - x4 = ((size_t)datasize) <= 640 ? (PerturbedField *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(262), arg4, (char **)&x4, - datasize, &large_args_free) < 0) - return NULL; - } - - Py_BEGIN_ALLOW_THREADS - _cffi_restore_errno(); - { result = ComputePerturbField(x0, x1, x2, x3, x4); } - _cffi_save_errno(); - Py_END_ALLOW_THREADS - - (void)self; /* unused */ - pyresult = _cffi_from_c_int(result, int); - if (large_args_free != NULL) _cffi_free_array_arguments(large_args_free); - return pyresult; -} -#else -# define _cffi_f_ComputePerturbField _cffi_d_ComputePerturbField -#endif - -static int _cffi_d_ComputePerturbHaloField(float x0, UserParams * x1, CosmoParams * x2, AstroParams * x3, FlagOptions * x4, InitialConditions * x5, HaloField * x6, PerturbHaloField * x7) -{ - return ComputePerturbHaloField(x0, x1, x2, x3, x4, x5, x6, x7); -} -#ifndef PYPY_VERSION -static PyObject * -_cffi_f_ComputePerturbHaloField(PyObject *self, PyObject *args) -{ - float x0; - UserParams * x1; - CosmoParams * x2; - AstroParams * x3; - FlagOptions * x4; - InitialConditions * x5; - HaloField * x6; - PerturbHaloField * x7; - Py_ssize_t datasize; - struct _cffi_freeme_s *large_args_free = NULL; - int result; - PyObject *pyresult; - PyObject *arg0; - PyObject *arg1; - PyObject *arg2; - PyObject *arg3; - PyObject *arg4; - PyObject *arg5; - PyObject *arg6; - PyObject *arg7; - - if (!PyArg_UnpackTuple(args, "ComputePerturbHaloField", 8, 8, &arg0, &arg1, &arg2, &arg3, &arg4, &arg5, &arg6, &arg7)) - return NULL; - - x0 = (float)_cffi_to_c_float(arg0); - if (x0 == (float)-1 && PyErr_Occurred()) - return NULL; - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(5), arg1, (char **)&x1); - if (datasize != 0) { - x1 = ((size_t)datasize) <= 640 ? (UserParams *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(5), arg1, (char **)&x1, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(6), arg2, (char **)&x2); - if (datasize != 0) { - x2 = ((size_t)datasize) <= 640 ? (CosmoParams *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(6), arg2, (char **)&x2, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(7), arg3, (char **)&x3); - if (datasize != 0) { - x3 = ((size_t)datasize) <= 640 ? (AstroParams *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(7), arg3, (char **)&x3, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(8), arg4, (char **)&x4); - if (datasize != 0) { - x4 = ((size_t)datasize) <= 640 ? (FlagOptions *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(8), arg4, (char **)&x4, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(261), arg5, (char **)&x5); - if (datasize != 0) { - x5 = ((size_t)datasize) <= 640 ? (InitialConditions *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(261), arg5, (char **)&x5, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(288), arg6, (char **)&x6); - if (datasize != 0) { - x6 = ((size_t)datasize) <= 640 ? (HaloField *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(288), arg6, (char **)&x6, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(263), arg7, (char **)&x7); - if (datasize != 0) { - x7 = ((size_t)datasize) <= 640 ? (PerturbHaloField *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(263), arg7, (char **)&x7, - datasize, &large_args_free) < 0) - return NULL; - } - - Py_BEGIN_ALLOW_THREADS - _cffi_restore_errno(); - { result = ComputePerturbHaloField(x0, x1, x2, x3, x4, x5, x6, x7); } - _cffi_save_errno(); - Py_END_ALLOW_THREADS - - (void)self; /* unused */ - pyresult = _cffi_from_c_int(result, int); - if (large_args_free != NULL) _cffi_free_array_arguments(large_args_free); - return pyresult; -} -#else -# define _cffi_f_ComputePerturbHaloField _cffi_d_ComputePerturbHaloField -#endif - -static float _cffi_d_ComputeTau(UserParams * x0, CosmoParams * x1, int x2, float * x3, float * x4) -{ - return ComputeTau(x0, x1, x2, x3, x4); -} -#ifndef PYPY_VERSION -static PyObject * -_cffi_f_ComputeTau(PyObject *self, PyObject *args) -{ - UserParams * x0; - CosmoParams * x1; - int x2; - float * x3; - float * x4; - Py_ssize_t datasize; - struct _cffi_freeme_s *large_args_free = NULL; - float result; - PyObject *pyresult; - PyObject *arg0; - PyObject *arg1; - PyObject *arg2; - PyObject *arg3; - PyObject *arg4; - - if (!PyArg_UnpackTuple(args, "ComputeTau", 5, 5, &arg0, &arg1, &arg2, &arg3, &arg4)) - return NULL; - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(5), arg0, (char **)&x0); - if (datasize != 0) { - x0 = ((size_t)datasize) <= 640 ? (UserParams *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(5), arg0, (char **)&x0, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(6), arg1, (char **)&x1); - if (datasize != 0) { - x1 = ((size_t)datasize) <= 640 ? (CosmoParams *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(6), arg1, (char **)&x1, - datasize, &large_args_free) < 0) - return NULL; - } - - x2 = _cffi_to_c_int(arg2, int); - if (x2 == (int)-1 && PyErr_Occurred()) - return NULL; - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(174), arg3, (char **)&x3); - if (datasize != 0) { - x3 = ((size_t)datasize) <= 640 ? (float *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(174), arg3, (char **)&x3, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(174), arg4, (char **)&x4); - if (datasize != 0) { - x4 = ((size_t)datasize) <= 640 ? (float *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(174), arg4, (char **)&x4, - datasize, &large_args_free) < 0) - return NULL; - } - - Py_BEGIN_ALLOW_THREADS - _cffi_restore_errno(); - { result = ComputeTau(x0, x1, x2, x3, x4); } - _cffi_save_errno(); - Py_END_ALLOW_THREADS - - (void)self; /* unused */ - pyresult = _cffi_from_c_float(result); - if (large_args_free != NULL) _cffi_free_array_arguments(large_args_free); - return pyresult; -} -#else -# define _cffi_f_ComputeTau _cffi_d_ComputeTau -#endif - -static int _cffi_d_ComputeTsBox(float x0, float x1, UserParams * x2, CosmoParams * x3, AstroParams * x4, FlagOptions * x5, float x6, short x7, PerturbedField * x8, XraySourceBox * x9, TsBox * x10, InitialConditions * x11, TsBox * x12) -{ - return ComputeTsBox(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12); -} -#ifndef PYPY_VERSION -static PyObject * -_cffi_f_ComputeTsBox(PyObject *self, PyObject *args) -{ - float x0; - float x1; - UserParams * x2; - CosmoParams * x3; - AstroParams * x4; - FlagOptions * x5; - float x6; - short x7; - PerturbedField * x8; - XraySourceBox * x9; - TsBox * x10; - InitialConditions * x11; - TsBox * x12; - Py_ssize_t datasize; - struct _cffi_freeme_s *large_args_free = NULL; - int result; - PyObject *pyresult; - PyObject *arg0; - PyObject *arg1; - PyObject *arg2; - PyObject *arg3; - PyObject *arg4; - PyObject *arg5; - PyObject *arg6; - PyObject *arg7; - PyObject *arg8; - PyObject *arg9; - PyObject *arg10; - PyObject *arg11; - PyObject *arg12; - - if (!PyArg_UnpackTuple(args, "ComputeTsBox", 13, 13, &arg0, &arg1, &arg2, &arg3, &arg4, &arg5, &arg6, &arg7, &arg8, &arg9, &arg10, &arg11, &arg12)) - return NULL; - - x0 = (float)_cffi_to_c_float(arg0); - if (x0 == (float)-1 && PyErr_Occurred()) - return NULL; - - x1 = (float)_cffi_to_c_float(arg1); - if (x1 == (float)-1 && PyErr_Occurred()) - return NULL; - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(5), arg2, (char **)&x2); - if (datasize != 0) { - x2 = ((size_t)datasize) <= 640 ? (UserParams *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(5), arg2, (char **)&x2, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(6), arg3, (char **)&x3); - if (datasize != 0) { - x3 = ((size_t)datasize) <= 640 ? (CosmoParams *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(6), arg3, (char **)&x3, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(7), arg4, (char **)&x4); - if (datasize != 0) { - x4 = ((size_t)datasize) <= 640 ? (AstroParams *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(7), arg4, (char **)&x4, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(8), arg5, (char **)&x5); - if (datasize != 0) { - x5 = ((size_t)datasize) <= 640 ? (FlagOptions *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(8), arg5, (char **)&x5, - datasize, &large_args_free) < 0) - return NULL; - } - - x6 = (float)_cffi_to_c_float(arg6); - if (x6 == (float)-1 && PyErr_Occurred()) - return NULL; - - x7 = _cffi_to_c_int(arg7, short); - if (x7 == (short)-1 && PyErr_Occurred()) - return NULL; - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(262), arg8, (char **)&x8); - if (datasize != 0) { - x8 = ((size_t)datasize) <= 640 ? (PerturbedField *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(262), arg8, (char **)&x8, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(196), arg9, (char **)&x9); - if (datasize != 0) { - x9 = ((size_t)datasize) <= 640 ? (XraySourceBox *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(196), arg9, (char **)&x9, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(264), arg10, (char **)&x10); - if (datasize != 0) { - x10 = ((size_t)datasize) <= 640 ? (TsBox *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(264), arg10, (char **)&x10, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(261), arg11, (char **)&x11); - if (datasize != 0) { - x11 = ((size_t)datasize) <= 640 ? (InitialConditions *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(261), arg11, (char **)&x11, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(264), arg12, (char **)&x12); - if (datasize != 0) { - x12 = ((size_t)datasize) <= 640 ? (TsBox *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(264), arg12, (char **)&x12, - datasize, &large_args_free) < 0) - return NULL; - } - - Py_BEGIN_ALLOW_THREADS - _cffi_restore_errno(); - { result = ComputeTsBox(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12); } - _cffi_save_errno(); - Py_END_ALLOW_THREADS - - (void)self; /* unused */ - pyresult = _cffi_from_c_int(result, int); - if (large_args_free != NULL) _cffi_free_array_arguments(large_args_free); - return pyresult; -} -#else -# define _cffi_f_ComputeTsBox _cffi_d_ComputeTsBox -#endif - -static int _cffi_d_ComputeZstart_PhotonCons(double * x0) -{ - return ComputeZstart_PhotonCons(x0); -} -#ifndef PYPY_VERSION -static PyObject * -_cffi_f_ComputeZstart_PhotonCons(PyObject *self, PyObject *arg0) -{ - double * x0; - Py_ssize_t datasize; - struct _cffi_freeme_s *large_args_free = NULL; - int result; - PyObject *pyresult; - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(207), arg0, (char **)&x0); - if (datasize != 0) { - x0 = ((size_t)datasize) <= 640 ? (double *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(207), arg0, (char **)&x0, - datasize, &large_args_free) < 0) - return NULL; - } - - Py_BEGIN_ALLOW_THREADS - _cffi_restore_errno(); - { result = ComputeZstart_PhotonCons(x0); } - _cffi_save_errno(); - Py_END_ALLOW_THREADS - - (void)self; /* unused */ - pyresult = _cffi_from_c_int(result, int); - if (large_args_free != NULL) _cffi_free_array_arguments(large_args_free); - return pyresult; -} -#else -# define _cffi_f_ComputeZstart_PhotonCons _cffi_d_ComputeZstart_PhotonCons -#endif - -static int _cffi_d_CreateFFTWWisdoms(UserParams * x0, CosmoParams * x1) -{ - return CreateFFTWWisdoms(x0, x1); -} -#ifndef PYPY_VERSION -static PyObject * -_cffi_f_CreateFFTWWisdoms(PyObject *self, PyObject *args) -{ - UserParams * x0; - CosmoParams * x1; - Py_ssize_t datasize; - struct _cffi_freeme_s *large_args_free = NULL; - int result; - PyObject *pyresult; - PyObject *arg0; - PyObject *arg1; - - if (!PyArg_UnpackTuple(args, "CreateFFTWWisdoms", 2, 2, &arg0, &arg1)) - return NULL; - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(5), arg0, (char **)&x0); - if (datasize != 0) { - x0 = ((size_t)datasize) <= 640 ? (UserParams *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(5), arg0, (char **)&x0, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(6), arg1, (char **)&x1); - if (datasize != 0) { - x1 = ((size_t)datasize) <= 640 ? (CosmoParams *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(6), arg1, (char **)&x1, - datasize, &large_args_free) < 0) - return NULL; - } - - Py_BEGIN_ALLOW_THREADS - _cffi_restore_errno(); - { result = CreateFFTWWisdoms(x0, x1); } - _cffi_save_errno(); - Py_END_ALLOW_THREADS - - (void)self; /* unused */ - pyresult = _cffi_from_c_int(result, int); - if (large_args_free != NULL) _cffi_free_array_arguments(large_args_free); - return pyresult; -} -#else -# define _cffi_f_CreateFFTWWisdoms _cffi_d_CreateFFTWWisdoms -#endif - -static double _cffi_d_EvaluateFcoll_delta(double x0, double x1, double x2, double x3) -{ - return EvaluateFcoll_delta(x0, x1, x2, x3); -} -#ifndef PYPY_VERSION -static PyObject * -_cffi_f_EvaluateFcoll_delta(PyObject *self, PyObject *args) -{ - double x0; - double x1; - double x2; - double x3; - double result; - PyObject *pyresult; - PyObject *arg0; - PyObject *arg1; - PyObject *arg2; - PyObject *arg3; - - if (!PyArg_UnpackTuple(args, "EvaluateFcoll_delta", 4, 4, &arg0, &arg1, &arg2, &arg3)) - return NULL; - - x0 = (double)_cffi_to_c_double(arg0); - if (x0 == (double)-1 && PyErr_Occurred()) - return NULL; - - x1 = (double)_cffi_to_c_double(arg1); - if (x1 == (double)-1 && PyErr_Occurred()) - return NULL; - - x2 = (double)_cffi_to_c_double(arg2); - if (x2 == (double)-1 && PyErr_Occurred()) - return NULL; - - x3 = (double)_cffi_to_c_double(arg3); - if (x3 == (double)-1 && PyErr_Occurred()) - return NULL; - - Py_BEGIN_ALLOW_THREADS - _cffi_restore_errno(); - { result = EvaluateFcoll_delta(x0, x1, x2, x3); } - _cffi_save_errno(); - Py_END_ALLOW_THREADS - - (void)self; /* unused */ - pyresult = _cffi_from_c_double(result); - return pyresult; -} -#else -# define _cffi_f_EvaluateFcoll_delta _cffi_d_EvaluateFcoll_delta -#endif - -static double _cffi_d_EvaluateMcoll(double x0, double x1, double x2, double x3, double x4, double x5, double x6) -{ - return EvaluateMcoll(x0, x1, x2, x3, x4, x5, x6); -} -#ifndef PYPY_VERSION -static PyObject * -_cffi_f_EvaluateMcoll(PyObject *self, PyObject *args) -{ - double x0; - double x1; - double x2; - double x3; - double x4; - double x5; - double x6; - double result; - PyObject *pyresult; - PyObject *arg0; - PyObject *arg1; - PyObject *arg2; - PyObject *arg3; - PyObject *arg4; - PyObject *arg5; - PyObject *arg6; - - if (!PyArg_UnpackTuple(args, "EvaluateMcoll", 7, 7, &arg0, &arg1, &arg2, &arg3, &arg4, &arg5, &arg6)) - return NULL; - - x0 = (double)_cffi_to_c_double(arg0); - if (x0 == (double)-1 && PyErr_Occurred()) - return NULL; - - x1 = (double)_cffi_to_c_double(arg1); - if (x1 == (double)-1 && PyErr_Occurred()) - return NULL; - - x2 = (double)_cffi_to_c_double(arg2); - if (x2 == (double)-1 && PyErr_Occurred()) - return NULL; - - x3 = (double)_cffi_to_c_double(arg3); - if (x3 == (double)-1 && PyErr_Occurred()) - return NULL; - - x4 = (double)_cffi_to_c_double(arg4); - if (x4 == (double)-1 && PyErr_Occurred()) - return NULL; - - x5 = (double)_cffi_to_c_double(arg5); - if (x5 == (double)-1 && PyErr_Occurred()) - return NULL; - - x6 = (double)_cffi_to_c_double(arg6); - if (x6 == (double)-1 && PyErr_Occurred()) - return NULL; - - Py_BEGIN_ALLOW_THREADS - _cffi_restore_errno(); - { result = EvaluateMcoll(x0, x1, x2, x3, x4, x5, x6); } - _cffi_save_errno(); - Py_END_ALLOW_THREADS - - (void)self; /* unused */ - pyresult = _cffi_from_c_double(result); - return pyresult; -} -#else -# define _cffi_f_EvaluateMcoll _cffi_d_EvaluateMcoll -#endif - -static double _cffi_d_EvaluateNhalo(double x0, double x1, double x2, double x3, double x4, double x5, double x6) -{ - return EvaluateNhalo(x0, x1, x2, x3, x4, x5, x6); -} -#ifndef PYPY_VERSION -static PyObject * -_cffi_f_EvaluateNhalo(PyObject *self, PyObject *args) -{ - double x0; - double x1; - double x2; - double x3; - double x4; - double x5; - double x6; - double result; - PyObject *pyresult; - PyObject *arg0; - PyObject *arg1; - PyObject *arg2; - PyObject *arg3; - PyObject *arg4; - PyObject *arg5; - PyObject *arg6; - - if (!PyArg_UnpackTuple(args, "EvaluateNhalo", 7, 7, &arg0, &arg1, &arg2, &arg3, &arg4, &arg5, &arg6)) - return NULL; - - x0 = (double)_cffi_to_c_double(arg0); - if (x0 == (double)-1 && PyErr_Occurred()) - return NULL; - - x1 = (double)_cffi_to_c_double(arg1); - if (x1 == (double)-1 && PyErr_Occurred()) - return NULL; - - x2 = (double)_cffi_to_c_double(arg2); - if (x2 == (double)-1 && PyErr_Occurred()) - return NULL; - - x3 = (double)_cffi_to_c_double(arg3); - if (x3 == (double)-1 && PyErr_Occurred()) - return NULL; - - x4 = (double)_cffi_to_c_double(arg4); - if (x4 == (double)-1 && PyErr_Occurred()) - return NULL; - - x5 = (double)_cffi_to_c_double(arg5); - if (x5 == (double)-1 && PyErr_Occurred()) - return NULL; - - x6 = (double)_cffi_to_c_double(arg6); - if (x6 == (double)-1 && PyErr_Occurred()) - return NULL; - - Py_BEGIN_ALLOW_THREADS - _cffi_restore_errno(); - { result = EvaluateNhalo(x0, x1, x2, x3, x4, x5, x6); } - _cffi_save_errno(); - Py_END_ALLOW_THREADS - - (void)self; /* unused */ - pyresult = _cffi_from_c_double(result); - return pyresult; -} -#else -# define _cffi_f_EvaluateNhalo _cffi_d_EvaluateNhalo -#endif - -static double _cffi_d_EvaluateNhaloInv(double x0, double x1) -{ - return EvaluateNhaloInv(x0, x1); -} -#ifndef PYPY_VERSION -static PyObject * -_cffi_f_EvaluateNhaloInv(PyObject *self, PyObject *args) -{ - double x0; - double x1; - double result; - PyObject *pyresult; - PyObject *arg0; - PyObject *arg1; - - if (!PyArg_UnpackTuple(args, "EvaluateNhaloInv", 2, 2, &arg0, &arg1)) - return NULL; - - x0 = (double)_cffi_to_c_double(arg0); - if (x0 == (double)-1 && PyErr_Occurred()) - return NULL; - - x1 = (double)_cffi_to_c_double(arg1); - if (x1 == (double)-1 && PyErr_Occurred()) - return NULL; - - Py_BEGIN_ALLOW_THREADS - _cffi_restore_errno(); - { result = EvaluateNhaloInv(x0, x1); } - _cffi_save_errno(); - Py_END_ALLOW_THREADS - - (void)self; /* unused */ - pyresult = _cffi_from_c_double(result); - return pyresult; -} -#else -# define _cffi_f_EvaluateNhaloInv _cffi_d_EvaluateNhaloInv -#endif - -static double _cffi_d_EvaluateNionTs(double x0, double x1, double x2) -{ - return EvaluateNionTs(x0, x1, x2); -} -#ifndef PYPY_VERSION -static PyObject * -_cffi_f_EvaluateNionTs(PyObject *self, PyObject *args) -{ - double x0; - double x1; - double x2; - double result; - PyObject *pyresult; - PyObject *arg0; - PyObject *arg1; - PyObject *arg2; - - if (!PyArg_UnpackTuple(args, "EvaluateNionTs", 3, 3, &arg0, &arg1, &arg2)) - return NULL; - - x0 = (double)_cffi_to_c_double(arg0); - if (x0 == (double)-1 && PyErr_Occurred()) - return NULL; - - x1 = (double)_cffi_to_c_double(arg1); - if (x1 == (double)-1 && PyErr_Occurred()) - return NULL; - - x2 = (double)_cffi_to_c_double(arg2); - if (x2 == (double)-1 && PyErr_Occurred()) - return NULL; - - Py_BEGIN_ALLOW_THREADS - _cffi_restore_errno(); - { result = EvaluateNionTs(x0, x1, x2); } - _cffi_save_errno(); - Py_END_ALLOW_THREADS - - (void)self; /* unused */ - pyresult = _cffi_from_c_double(result); - return pyresult; -} -#else -# define _cffi_f_EvaluateNionTs _cffi_d_EvaluateNionTs -#endif - -static double _cffi_d_EvaluateNionTs_MINI(double x0, double x1, double x2, double x3) -{ - return EvaluateNionTs_MINI(x0, x1, x2, x3); -} -#ifndef PYPY_VERSION -static PyObject * -_cffi_f_EvaluateNionTs_MINI(PyObject *self, PyObject *args) -{ - double x0; - double x1; - double x2; - double x3; - double result; - PyObject *pyresult; - PyObject *arg0; - PyObject *arg1; - PyObject *arg2; - PyObject *arg3; - - if (!PyArg_UnpackTuple(args, "EvaluateNionTs_MINI", 4, 4, &arg0, &arg1, &arg2, &arg3)) - return NULL; - - x0 = (double)_cffi_to_c_double(arg0); - if (x0 == (double)-1 && PyErr_Occurred()) - return NULL; - - x1 = (double)_cffi_to_c_double(arg1); - if (x1 == (double)-1 && PyErr_Occurred()) - return NULL; - - x2 = (double)_cffi_to_c_double(arg2); - if (x2 == (double)-1 && PyErr_Occurred()) - return NULL; - - x3 = (double)_cffi_to_c_double(arg3); - if (x3 == (double)-1 && PyErr_Occurred()) - return NULL; - - Py_BEGIN_ALLOW_THREADS - _cffi_restore_errno(); - { result = EvaluateNionTs_MINI(x0, x1, x2, x3); } - _cffi_save_errno(); - Py_END_ALLOW_THREADS - - (void)self; /* unused */ - pyresult = _cffi_from_c_double(result); - return pyresult; -} -#else -# define _cffi_f_EvaluateNionTs_MINI _cffi_d_EvaluateNionTs_MINI -#endif - -static double _cffi_d_EvaluateNion_Conditional(double x0, double x1, double x2, double x3, double x4, double x5, double x6, double x7, double x8, _Bool x9) -{ - return EvaluateNion_Conditional(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9); -} -#ifndef PYPY_VERSION -static PyObject * -_cffi_f_EvaluateNion_Conditional(PyObject *self, PyObject *args) -{ - double x0; - double x1; - double x2; - double x3; - double x4; - double x5; - double x6; - double x7; - double x8; - _Bool x9; - double result; - PyObject *pyresult; - PyObject *arg0; - PyObject *arg1; - PyObject *arg2; - PyObject *arg3; - PyObject *arg4; - PyObject *arg5; - PyObject *arg6; - PyObject *arg7; - PyObject *arg8; - PyObject *arg9; - - if (!PyArg_UnpackTuple(args, "EvaluateNion_Conditional", 10, 10, &arg0, &arg1, &arg2, &arg3, &arg4, &arg5, &arg6, &arg7, &arg8, &arg9)) - return NULL; - - x0 = (double)_cffi_to_c_double(arg0); - if (x0 == (double)-1 && PyErr_Occurred()) - return NULL; - - x1 = (double)_cffi_to_c_double(arg1); - if (x1 == (double)-1 && PyErr_Occurred()) - return NULL; - - x2 = (double)_cffi_to_c_double(arg2); - if (x2 == (double)-1 && PyErr_Occurred()) - return NULL; - - x3 = (double)_cffi_to_c_double(arg3); - if (x3 == (double)-1 && PyErr_Occurred()) - return NULL; - - x4 = (double)_cffi_to_c_double(arg4); - if (x4 == (double)-1 && PyErr_Occurred()) - return NULL; - - x5 = (double)_cffi_to_c_double(arg5); - if (x5 == (double)-1 && PyErr_Occurred()) - return NULL; - - x6 = (double)_cffi_to_c_double(arg6); - if (x6 == (double)-1 && PyErr_Occurred()) - return NULL; - - x7 = (double)_cffi_to_c_double(arg7); - if (x7 == (double)-1 && PyErr_Occurred()) - return NULL; - - x8 = (double)_cffi_to_c_double(arg8); - if (x8 == (double)-1 && PyErr_Occurred()) - return NULL; - - x9 = (_Bool)_cffi_to_c__Bool(arg9); - if (x9 == (_Bool)-1 && PyErr_Occurred()) - return NULL; - - Py_BEGIN_ALLOW_THREADS - _cffi_restore_errno(); - { result = EvaluateNion_Conditional(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9); } - _cffi_save_errno(); - Py_END_ALLOW_THREADS - - (void)self; /* unused */ - pyresult = _cffi_from_c_double(result); - return pyresult; -} -#else -# define _cffi_f_EvaluateNion_Conditional _cffi_d_EvaluateNion_Conditional -#endif - -static double _cffi_d_EvaluateNion_Conditional_MINI(double x0, double x1, double x2, double x3, double x4, double x5, double x6, double x7, double x8, double x9, _Bool x10) -{ - return EvaluateNion_Conditional_MINI(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10); -} -#ifndef PYPY_VERSION -static PyObject * -_cffi_f_EvaluateNion_Conditional_MINI(PyObject *self, PyObject *args) -{ - double x0; - double x1; - double x2; - double x3; - double x4; - double x5; - double x6; - double x7; - double x8; - double x9; - _Bool x10; - double result; - PyObject *pyresult; - PyObject *arg0; - PyObject *arg1; - PyObject *arg2; - PyObject *arg3; - PyObject *arg4; - PyObject *arg5; - PyObject *arg6; - PyObject *arg7; - PyObject *arg8; - PyObject *arg9; - PyObject *arg10; - - if (!PyArg_UnpackTuple(args, "EvaluateNion_Conditional_MINI", 11, 11, &arg0, &arg1, &arg2, &arg3, &arg4, &arg5, &arg6, &arg7, &arg8, &arg9, &arg10)) - return NULL; - - x0 = (double)_cffi_to_c_double(arg0); - if (x0 == (double)-1 && PyErr_Occurred()) - return NULL; - - x1 = (double)_cffi_to_c_double(arg1); - if (x1 == (double)-1 && PyErr_Occurred()) - return NULL; - - x2 = (double)_cffi_to_c_double(arg2); - if (x2 == (double)-1 && PyErr_Occurred()) - return NULL; - - x3 = (double)_cffi_to_c_double(arg3); - if (x3 == (double)-1 && PyErr_Occurred()) - return NULL; - - x4 = (double)_cffi_to_c_double(arg4); - if (x4 == (double)-1 && PyErr_Occurred()) - return NULL; - - x5 = (double)_cffi_to_c_double(arg5); - if (x5 == (double)-1 && PyErr_Occurred()) - return NULL; - - x6 = (double)_cffi_to_c_double(arg6); - if (x6 == (double)-1 && PyErr_Occurred()) - return NULL; - - x7 = (double)_cffi_to_c_double(arg7); - if (x7 == (double)-1 && PyErr_Occurred()) - return NULL; - - x8 = (double)_cffi_to_c_double(arg8); - if (x8 == (double)-1 && PyErr_Occurred()) - return NULL; - - x9 = (double)_cffi_to_c_double(arg9); - if (x9 == (double)-1 && PyErr_Occurred()) - return NULL; - - x10 = (_Bool)_cffi_to_c__Bool(arg10); - if (x10 == (_Bool)-1 && PyErr_Occurred()) - return NULL; - - Py_BEGIN_ALLOW_THREADS - _cffi_restore_errno(); - { result = EvaluateNion_Conditional_MINI(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10); } - _cffi_save_errno(); - Py_END_ALLOW_THREADS - - (void)self; /* unused */ - pyresult = _cffi_from_c_double(result); - return pyresult; -} -#else -# define _cffi_f_EvaluateNion_Conditional_MINI _cffi_d_EvaluateNion_Conditional_MINI -#endif - -static double _cffi_d_EvaluateSFRD(double x0, double x1) -{ - return EvaluateSFRD(x0, x1); -} -#ifndef PYPY_VERSION -static PyObject * -_cffi_f_EvaluateSFRD(PyObject *self, PyObject *args) -{ - double x0; - double x1; - double result; - PyObject *pyresult; - PyObject *arg0; - PyObject *arg1; - - if (!PyArg_UnpackTuple(args, "EvaluateSFRD", 2, 2, &arg0, &arg1)) - return NULL; - - x0 = (double)_cffi_to_c_double(arg0); - if (x0 == (double)-1 && PyErr_Occurred()) - return NULL; - - x1 = (double)_cffi_to_c_double(arg1); - if (x1 == (double)-1 && PyErr_Occurred()) - return NULL; - - Py_BEGIN_ALLOW_THREADS - _cffi_restore_errno(); - { result = EvaluateSFRD(x0, x1); } - _cffi_save_errno(); - Py_END_ALLOW_THREADS - - (void)self; /* unused */ - pyresult = _cffi_from_c_double(result); - return pyresult; -} -#else -# define _cffi_f_EvaluateSFRD _cffi_d_EvaluateSFRD -#endif - -static double _cffi_d_EvaluateSFRD_Conditional(double x0, double x1, double x2, double x3, double x4, double x5, double x6, double x7) -{ - return EvaluateSFRD_Conditional(x0, x1, x2, x3, x4, x5, x6, x7); -} -#ifndef PYPY_VERSION -static PyObject * -_cffi_f_EvaluateSFRD_Conditional(PyObject *self, PyObject *args) -{ - double x0; - double x1; - double x2; - double x3; - double x4; - double x5; - double x6; - double x7; - double result; - PyObject *pyresult; - PyObject *arg0; - PyObject *arg1; - PyObject *arg2; - PyObject *arg3; - PyObject *arg4; - PyObject *arg5; - PyObject *arg6; - PyObject *arg7; - - if (!PyArg_UnpackTuple(args, "EvaluateSFRD_Conditional", 8, 8, &arg0, &arg1, &arg2, &arg3, &arg4, &arg5, &arg6, &arg7)) - return NULL; - - x0 = (double)_cffi_to_c_double(arg0); - if (x0 == (double)-1 && PyErr_Occurred()) - return NULL; - - x1 = (double)_cffi_to_c_double(arg1); - if (x1 == (double)-1 && PyErr_Occurred()) - return NULL; - - x2 = (double)_cffi_to_c_double(arg2); - if (x2 == (double)-1 && PyErr_Occurred()) - return NULL; - - x3 = (double)_cffi_to_c_double(arg3); - if (x3 == (double)-1 && PyErr_Occurred()) - return NULL; - - x4 = (double)_cffi_to_c_double(arg4); - if (x4 == (double)-1 && PyErr_Occurred()) - return NULL; - - x5 = (double)_cffi_to_c_double(arg5); - if (x5 == (double)-1 && PyErr_Occurred()) - return NULL; - - x6 = (double)_cffi_to_c_double(arg6); - if (x6 == (double)-1 && PyErr_Occurred()) - return NULL; - - x7 = (double)_cffi_to_c_double(arg7); - if (x7 == (double)-1 && PyErr_Occurred()) - return NULL; - - Py_BEGIN_ALLOW_THREADS - _cffi_restore_errno(); - { result = EvaluateSFRD_Conditional(x0, x1, x2, x3, x4, x5, x6, x7); } - _cffi_save_errno(); - Py_END_ALLOW_THREADS - - (void)self; /* unused */ - pyresult = _cffi_from_c_double(result); - return pyresult; -} -#else -# define _cffi_f_EvaluateSFRD_Conditional _cffi_d_EvaluateSFRD_Conditional -#endif - -static double _cffi_d_EvaluateSFRD_Conditional_MINI(double x0, double x1, double x2, double x3, double x4, double x5, double x6, double x7, double x8) -{ - return EvaluateSFRD_Conditional_MINI(x0, x1, x2, x3, x4, x5, x6, x7, x8); -} -#ifndef PYPY_VERSION -static PyObject * -_cffi_f_EvaluateSFRD_Conditional_MINI(PyObject *self, PyObject *args) -{ - double x0; - double x1; - double x2; - double x3; - double x4; - double x5; - double x6; - double x7; - double x8; - double result; - PyObject *pyresult; - PyObject *arg0; - PyObject *arg1; - PyObject *arg2; - PyObject *arg3; - PyObject *arg4; - PyObject *arg5; - PyObject *arg6; - PyObject *arg7; - PyObject *arg8; - - if (!PyArg_UnpackTuple(args, "EvaluateSFRD_Conditional_MINI", 9, 9, &arg0, &arg1, &arg2, &arg3, &arg4, &arg5, &arg6, &arg7, &arg8)) - return NULL; - - x0 = (double)_cffi_to_c_double(arg0); - if (x0 == (double)-1 && PyErr_Occurred()) - return NULL; - - x1 = (double)_cffi_to_c_double(arg1); - if (x1 == (double)-1 && PyErr_Occurred()) - return NULL; - - x2 = (double)_cffi_to_c_double(arg2); - if (x2 == (double)-1 && PyErr_Occurred()) - return NULL; - - x3 = (double)_cffi_to_c_double(arg3); - if (x3 == (double)-1 && PyErr_Occurred()) - return NULL; - - x4 = (double)_cffi_to_c_double(arg4); - if (x4 == (double)-1 && PyErr_Occurred()) - return NULL; - - x5 = (double)_cffi_to_c_double(arg5); - if (x5 == (double)-1 && PyErr_Occurred()) - return NULL; - - x6 = (double)_cffi_to_c_double(arg6); - if (x6 == (double)-1 && PyErr_Occurred()) - return NULL; - - x7 = (double)_cffi_to_c_double(arg7); - if (x7 == (double)-1 && PyErr_Occurred()) - return NULL; - - x8 = (double)_cffi_to_c_double(arg8); - if (x8 == (double)-1 && PyErr_Occurred()) - return NULL; - - Py_BEGIN_ALLOW_THREADS - _cffi_restore_errno(); - { result = EvaluateSFRD_Conditional_MINI(x0, x1, x2, x3, x4, x5, x6, x7, x8); } - _cffi_save_errno(); - Py_END_ALLOW_THREADS - - (void)self; /* unused */ - pyresult = _cffi_from_c_double(result); - return pyresult; -} -#else -# define _cffi_f_EvaluateSFRD_Conditional_MINI _cffi_d_EvaluateSFRD_Conditional_MINI -#endif - -static double _cffi_d_EvaluateSFRD_MINI(double x0, double x1, double x2) -{ - return EvaluateSFRD_MINI(x0, x1, x2); -} -#ifndef PYPY_VERSION -static PyObject * -_cffi_f_EvaluateSFRD_MINI(PyObject *self, PyObject *args) -{ - double x0; - double x1; - double x2; - double result; - PyObject *pyresult; - PyObject *arg0; - PyObject *arg1; - PyObject *arg2; - - if (!PyArg_UnpackTuple(args, "EvaluateSFRD_MINI", 3, 3, &arg0, &arg1, &arg2)) - return NULL; - - x0 = (double)_cffi_to_c_double(arg0); - if (x0 == (double)-1 && PyErr_Occurred()) - return NULL; - - x1 = (double)_cffi_to_c_double(arg1); - if (x1 == (double)-1 && PyErr_Occurred()) - return NULL; - - x2 = (double)_cffi_to_c_double(arg2); - if (x2 == (double)-1 && PyErr_Occurred()) - return NULL; - - Py_BEGIN_ALLOW_THREADS - _cffi_restore_errno(); - { result = EvaluateSFRD_MINI(x0, x1, x2); } - _cffi_save_errno(); - Py_END_ALLOW_THREADS - - (void)self; /* unused */ - pyresult = _cffi_from_c_double(result); - return pyresult; -} -#else -# define _cffi_f_EvaluateSFRD_MINI _cffi_d_EvaluateSFRD_MINI -#endif - -static double _cffi_d_EvaluateSigma(double x0) -{ - return EvaluateSigma(x0); -} -#ifndef PYPY_VERSION -static PyObject * -_cffi_f_EvaluateSigma(PyObject *self, PyObject *arg0) -{ - double x0; - double result; - PyObject *pyresult; - - x0 = (double)_cffi_to_c_double(arg0); - if (x0 == (double)-1 && PyErr_Occurred()) - return NULL; - - Py_BEGIN_ALLOW_THREADS - _cffi_restore_errno(); - { result = EvaluateSigma(x0); } - _cffi_save_errno(); - Py_END_ALLOW_THREADS - - (void)self; /* unused */ - pyresult = _cffi_from_c_double(result); - return pyresult; -} -#else -# define _cffi_f_EvaluateSigma _cffi_d_EvaluateSigma -#endif - -static double _cffi_d_EvaluatedFcolldz(double x0, double x1, double x2, double x3) -{ - return EvaluatedFcolldz(x0, x1, x2, x3); -} -#ifndef PYPY_VERSION -static PyObject * -_cffi_f_EvaluatedFcolldz(PyObject *self, PyObject *args) -{ - double x0; - double x1; - double x2; - double x3; - double result; - PyObject *pyresult; - PyObject *arg0; - PyObject *arg1; - PyObject *arg2; - PyObject *arg3; - - if (!PyArg_UnpackTuple(args, "EvaluatedFcolldz", 4, 4, &arg0, &arg1, &arg2, &arg3)) - return NULL; - - x0 = (double)_cffi_to_c_double(arg0); - if (x0 == (double)-1 && PyErr_Occurred()) - return NULL; - - x1 = (double)_cffi_to_c_double(arg1); - if (x1 == (double)-1 && PyErr_Occurred()) - return NULL; - - x2 = (double)_cffi_to_c_double(arg2); - if (x2 == (double)-1 && PyErr_Occurred()) - return NULL; - - x3 = (double)_cffi_to_c_double(arg3); - if (x3 == (double)-1 && PyErr_Occurred()) - return NULL; - - Py_BEGIN_ALLOW_THREADS - _cffi_restore_errno(); - { result = EvaluatedFcolldz(x0, x1, x2, x3); } - _cffi_save_errno(); - Py_END_ALLOW_THREADS - - (void)self; /* unused */ - pyresult = _cffi_from_c_double(result); - return pyresult; -} -#else -# define _cffi_f_EvaluatedFcolldz _cffi_d_EvaluatedFcolldz -#endif - -static double _cffi_d_EvaluatedSigmasqdm(double x0) -{ - return EvaluatedSigmasqdm(x0); -} -#ifndef PYPY_VERSION -static PyObject * -_cffi_f_EvaluatedSigmasqdm(PyObject *self, PyObject *arg0) -{ - double x0; - double result; - PyObject *pyresult; - - x0 = (double)_cffi_to_c_double(arg0); - if (x0 == (double)-1 && PyErr_Occurred()) - return NULL; - - Py_BEGIN_ALLOW_THREADS - _cffi_restore_errno(); - { result = EvaluatedSigmasqdm(x0); } - _cffi_save_errno(); - Py_END_ALLOW_THREADS - - (void)self; /* unused */ - pyresult = _cffi_from_c_double(result); - return pyresult; -} -#else -# define _cffi_f_EvaluatedSigmasqdm _cffi_d_EvaluatedSigmasqdm -#endif - -static double _cffi_d_Fcoll_General(double x0, double x1, double x2) -{ - return Fcoll_General(x0, x1, x2); -} -#ifndef PYPY_VERSION -static PyObject * -_cffi_f_Fcoll_General(PyObject *self, PyObject *args) -{ - double x0; - double x1; - double x2; - double result; - PyObject *pyresult; - PyObject *arg0; - PyObject *arg1; - PyObject *arg2; - - if (!PyArg_UnpackTuple(args, "Fcoll_General", 3, 3, &arg0, &arg1, &arg2)) - return NULL; - - x0 = (double)_cffi_to_c_double(arg0); - if (x0 == (double)-1 && PyErr_Occurred()) - return NULL; - - x1 = (double)_cffi_to_c_double(arg1); - if (x1 == (double)-1 && PyErr_Occurred()) - return NULL; - - x2 = (double)_cffi_to_c_double(arg2); - if (x2 == (double)-1 && PyErr_Occurred()) - return NULL; - - Py_BEGIN_ALLOW_THREADS - _cffi_restore_errno(); - { result = Fcoll_General(x0, x1, x2); } - _cffi_save_errno(); - Py_END_ALLOW_THREADS - - (void)self; /* unused */ - pyresult = _cffi_from_c_double(result); - return pyresult; -} -#else -# define _cffi_f_Fcoll_General _cffi_d_Fcoll_General -#endif - -static void _cffi_d_FreePhotonConsMemory(void) -{ - FreePhotonConsMemory(); -} -#ifndef PYPY_VERSION -static PyObject * -_cffi_f_FreePhotonConsMemory(PyObject *self, PyObject *noarg) -{ - - Py_BEGIN_ALLOW_THREADS - _cffi_restore_errno(); - { FreePhotonConsMemory(); } - _cffi_save_errno(); - Py_END_ALLOW_THREADS - - (void)self; /* unused */ - (void)noarg; /* unused */ - Py_INCREF(Py_None); - return Py_None; -} -#else -# define _cffi_f_FreePhotonConsMemory _cffi_d_FreePhotonConsMemory -#endif - -static int _cffi_d_FunctionThatCatches(_Bool x0, _Bool x1, double * x2) -{ - return FunctionThatCatches(x0, x1, x2); -} -#ifndef PYPY_VERSION -static PyObject * -_cffi_f_FunctionThatCatches(PyObject *self, PyObject *args) -{ - _Bool x0; - _Bool x1; - double * x2; - Py_ssize_t datasize; - struct _cffi_freeme_s *large_args_free = NULL; - int result; - PyObject *pyresult; - PyObject *arg0; - PyObject *arg1; - PyObject *arg2; - - if (!PyArg_UnpackTuple(args, "FunctionThatCatches", 3, 3, &arg0, &arg1, &arg2)) - return NULL; - - x0 = (_Bool)_cffi_to_c__Bool(arg0); - if (x0 == (_Bool)-1 && PyErr_Occurred()) - return NULL; - - x1 = (_Bool)_cffi_to_c__Bool(arg1); - if (x1 == (_Bool)-1 && PyErr_Occurred()) - return NULL; - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(207), arg2, (char **)&x2); - if (datasize != 0) { - x2 = ((size_t)datasize) <= 640 ? (double *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(207), arg2, (char **)&x2, - datasize, &large_args_free) < 0) - return NULL; - } - - Py_BEGIN_ALLOW_THREADS - _cffi_restore_errno(); - { result = FunctionThatCatches(x0, x1, x2); } - _cffi_save_errno(); - Py_END_ALLOW_THREADS - - (void)self; /* unused */ - pyresult = _cffi_from_c_int(result, int); - if (large_args_free != NULL) _cffi_free_array_arguments(large_args_free); - return pyresult; -} -#else -# define _cffi_f_FunctionThatCatches _cffi_d_FunctionThatCatches -#endif - -static void _cffi_d_FunctionThatThrows(void) -{ - FunctionThatThrows(); -} -#ifndef PYPY_VERSION -static PyObject * -_cffi_f_FunctionThatThrows(PyObject *self, PyObject *noarg) -{ - - Py_BEGIN_ALLOW_THREADS - _cffi_restore_errno(); - { FunctionThatThrows(); } - _cffi_save_errno(); - Py_END_ALLOW_THREADS - - (void)self; /* unused */ - (void)noarg; /* unused */ - Py_INCREF(Py_None); - return Py_None; -} -#else -# define _cffi_f_FunctionThatThrows _cffi_d_FunctionThatThrows -#endif - -static int _cffi_d_InitialisePhotonCons(UserParams * x0, CosmoParams * x1, AstroParams * x2, FlagOptions * x3) -{ - return InitialisePhotonCons(x0, x1, x2, x3); -} -#ifndef PYPY_VERSION -static PyObject * -_cffi_f_InitialisePhotonCons(PyObject *self, PyObject *args) -{ - UserParams * x0; - CosmoParams * x1; - AstroParams * x2; - FlagOptions * x3; - Py_ssize_t datasize; - struct _cffi_freeme_s *large_args_free = NULL; - int result; - PyObject *pyresult; - PyObject *arg0; - PyObject *arg1; - PyObject *arg2; - PyObject *arg3; - - if (!PyArg_UnpackTuple(args, "InitialisePhotonCons", 4, 4, &arg0, &arg1, &arg2, &arg3)) - return NULL; - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(5), arg0, (char **)&x0); - if (datasize != 0) { - x0 = ((size_t)datasize) <= 640 ? (UserParams *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(5), arg0, (char **)&x0, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(6), arg1, (char **)&x1); - if (datasize != 0) { - x1 = ((size_t)datasize) <= 640 ? (CosmoParams *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(6), arg1, (char **)&x1, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(7), arg2, (char **)&x2); - if (datasize != 0) { - x2 = ((size_t)datasize) <= 640 ? (AstroParams *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(7), arg2, (char **)&x2, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(8), arg3, (char **)&x3); - if (datasize != 0) { - x3 = ((size_t)datasize) <= 640 ? (FlagOptions *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(8), arg3, (char **)&x3, - datasize, &large_args_free) < 0) - return NULL; - } - - Py_BEGIN_ALLOW_THREADS - _cffi_restore_errno(); - { result = InitialisePhotonCons(x0, x1, x2, x3); } - _cffi_save_errno(); - Py_END_ALLOW_THREADS - - (void)self; /* unused */ - pyresult = _cffi_from_c_int(result, int); - if (large_args_free != NULL) _cffi_free_array_arguments(large_args_free); - return pyresult; -} -#else -# define _cffi_f_InitialisePhotonCons _cffi_d_InitialisePhotonCons -#endif - -static double _cffi_d_Mcoll_Conditional(double x0, double x1, double x2, double x3, double x4, double x5, int x6) -{ - return Mcoll_Conditional(x0, x1, x2, x3, x4, x5, x6); -} -#ifndef PYPY_VERSION -static PyObject * -_cffi_f_Mcoll_Conditional(PyObject *self, PyObject *args) -{ - double x0; - double x1; - double x2; - double x3; - double x4; - double x5; - int x6; - double result; - PyObject *pyresult; - PyObject *arg0; - PyObject *arg1; - PyObject *arg2; - PyObject *arg3; - PyObject *arg4; - PyObject *arg5; - PyObject *arg6; - - if (!PyArg_UnpackTuple(args, "Mcoll_Conditional", 7, 7, &arg0, &arg1, &arg2, &arg3, &arg4, &arg5, &arg6)) - return NULL; - - x0 = (double)_cffi_to_c_double(arg0); - if (x0 == (double)-1 && PyErr_Occurred()) - return NULL; - - x1 = (double)_cffi_to_c_double(arg1); - if (x1 == (double)-1 && PyErr_Occurred()) - return NULL; - - x2 = (double)_cffi_to_c_double(arg2); - if (x2 == (double)-1 && PyErr_Occurred()) - return NULL; - - x3 = (double)_cffi_to_c_double(arg3); - if (x3 == (double)-1 && PyErr_Occurred()) - return NULL; - - x4 = (double)_cffi_to_c_double(arg4); - if (x4 == (double)-1 && PyErr_Occurred()) - return NULL; - - x5 = (double)_cffi_to_c_double(arg5); - if (x5 == (double)-1 && PyErr_Occurred()) - return NULL; - - x6 = _cffi_to_c_int(arg6, int); - if (x6 == (int)-1 && PyErr_Occurred()) - return NULL; - - Py_BEGIN_ALLOW_THREADS - _cffi_restore_errno(); - { result = Mcoll_Conditional(x0, x1, x2, x3, x4, x5, x6); } - _cffi_save_errno(); - Py_END_ALLOW_THREADS - - (void)self; /* unused */ - pyresult = _cffi_from_c_double(result); - return pyresult; -} -#else -# define _cffi_f_Mcoll_Conditional _cffi_d_Mcoll_Conditional -#endif - -static double _cffi_d_Nhalo_Conditional(double x0, double x1, double x2, double x3, double x4, double x5, int x6) -{ - return Nhalo_Conditional(x0, x1, x2, x3, x4, x5, x6); -} -#ifndef PYPY_VERSION -static PyObject * -_cffi_f_Nhalo_Conditional(PyObject *self, PyObject *args) -{ - double x0; - double x1; - double x2; - double x3; - double x4; - double x5; - int x6; - double result; - PyObject *pyresult; - PyObject *arg0; - PyObject *arg1; - PyObject *arg2; - PyObject *arg3; - PyObject *arg4; - PyObject *arg5; - PyObject *arg6; - - if (!PyArg_UnpackTuple(args, "Nhalo_Conditional", 7, 7, &arg0, &arg1, &arg2, &arg3, &arg4, &arg5, &arg6)) - return NULL; - - x0 = (double)_cffi_to_c_double(arg0); - if (x0 == (double)-1 && PyErr_Occurred()) - return NULL; - - x1 = (double)_cffi_to_c_double(arg1); - if (x1 == (double)-1 && PyErr_Occurred()) - return NULL; - - x2 = (double)_cffi_to_c_double(arg2); - if (x2 == (double)-1 && PyErr_Occurred()) - return NULL; - - x3 = (double)_cffi_to_c_double(arg3); - if (x3 == (double)-1 && PyErr_Occurred()) - return NULL; - - x4 = (double)_cffi_to_c_double(arg4); - if (x4 == (double)-1 && PyErr_Occurred()) - return NULL; - - x5 = (double)_cffi_to_c_double(arg5); - if (x5 == (double)-1 && PyErr_Occurred()) - return NULL; - - x6 = _cffi_to_c_int(arg6, int); - if (x6 == (int)-1 && PyErr_Occurred()) - return NULL; - - Py_BEGIN_ALLOW_THREADS - _cffi_restore_errno(); - { result = Nhalo_Conditional(x0, x1, x2, x3, x4, x5, x6); } - _cffi_save_errno(); - Py_END_ALLOW_THREADS - - (void)self; /* unused */ - pyresult = _cffi_from_c_double(result); - return pyresult; -} -#else -# define _cffi_f_Nhalo_Conditional _cffi_d_Nhalo_Conditional -#endif - -static double _cffi_d_Nion_ConditionalM(double x0, double x1, double x2, double x3, double x4, double x5, double x6, double x7, double x8, double x9, double x10, double x11, double x12, int x13) -{ - return Nion_ConditionalM(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13); -} -#ifndef PYPY_VERSION -static PyObject * -_cffi_f_Nion_ConditionalM(PyObject *self, PyObject *args) -{ - double x0; - double x1; - double x2; - double x3; - double x4; - double x5; - double x6; - double x7; - double x8; - double x9; - double x10; - double x11; - double x12; - int x13; - double result; - PyObject *pyresult; - PyObject *arg0; - PyObject *arg1; - PyObject *arg2; - PyObject *arg3; - PyObject *arg4; - PyObject *arg5; - PyObject *arg6; - PyObject *arg7; - PyObject *arg8; - PyObject *arg9; - PyObject *arg10; - PyObject *arg11; - PyObject *arg12; - PyObject *arg13; - - if (!PyArg_UnpackTuple(args, "Nion_ConditionalM", 14, 14, &arg0, &arg1, &arg2, &arg3, &arg4, &arg5, &arg6, &arg7, &arg8, &arg9, &arg10, &arg11, &arg12, &arg13)) - return NULL; - - x0 = (double)_cffi_to_c_double(arg0); - if (x0 == (double)-1 && PyErr_Occurred()) - return NULL; - - x1 = (double)_cffi_to_c_double(arg1); - if (x1 == (double)-1 && PyErr_Occurred()) - return NULL; - - x2 = (double)_cffi_to_c_double(arg2); - if (x2 == (double)-1 && PyErr_Occurred()) - return NULL; - - x3 = (double)_cffi_to_c_double(arg3); - if (x3 == (double)-1 && PyErr_Occurred()) - return NULL; - - x4 = (double)_cffi_to_c_double(arg4); - if (x4 == (double)-1 && PyErr_Occurred()) - return NULL; - - x5 = (double)_cffi_to_c_double(arg5); - if (x5 == (double)-1 && PyErr_Occurred()) - return NULL; - - x6 = (double)_cffi_to_c_double(arg6); - if (x6 == (double)-1 && PyErr_Occurred()) - return NULL; - - x7 = (double)_cffi_to_c_double(arg7); - if (x7 == (double)-1 && PyErr_Occurred()) - return NULL; - - x8 = (double)_cffi_to_c_double(arg8); - if (x8 == (double)-1 && PyErr_Occurred()) - return NULL; - - x9 = (double)_cffi_to_c_double(arg9); - if (x9 == (double)-1 && PyErr_Occurred()) - return NULL; - - x10 = (double)_cffi_to_c_double(arg10); - if (x10 == (double)-1 && PyErr_Occurred()) - return NULL; - - x11 = (double)_cffi_to_c_double(arg11); - if (x11 == (double)-1 && PyErr_Occurred()) - return NULL; - - x12 = (double)_cffi_to_c_double(arg12); - if (x12 == (double)-1 && PyErr_Occurred()) - return NULL; - - x13 = _cffi_to_c_int(arg13, int); - if (x13 == (int)-1 && PyErr_Occurred()) - return NULL; - - Py_BEGIN_ALLOW_THREADS - _cffi_restore_errno(); - { result = Nion_ConditionalM(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13); } - _cffi_save_errno(); - Py_END_ALLOW_THREADS - - (void)self; /* unused */ - pyresult = _cffi_from_c_double(result); - return pyresult; -} -#else -# define _cffi_f_Nion_ConditionalM _cffi_d_Nion_ConditionalM -#endif - -static double _cffi_d_Nion_ConditionalM_MINI(double x0, double x1, double x2, double x3, double x4, double x5, double x6, double x7, double x8, double x9, double x10, double x11, double x12, double x13, int x14) -{ - return Nion_ConditionalM_MINI(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14); -} -#ifndef PYPY_VERSION -static PyObject * -_cffi_f_Nion_ConditionalM_MINI(PyObject *self, PyObject *args) -{ - double x0; - double x1; - double x2; - double x3; - double x4; - double x5; - double x6; - double x7; - double x8; - double x9; - double x10; - double x11; - double x12; - double x13; - int x14; - double result; - PyObject *pyresult; - PyObject *arg0; - PyObject *arg1; - PyObject *arg2; - PyObject *arg3; - PyObject *arg4; - PyObject *arg5; - PyObject *arg6; - PyObject *arg7; - PyObject *arg8; - PyObject *arg9; - PyObject *arg10; - PyObject *arg11; - PyObject *arg12; - PyObject *arg13; - PyObject *arg14; - - if (!PyArg_UnpackTuple(args, "Nion_ConditionalM_MINI", 15, 15, &arg0, &arg1, &arg2, &arg3, &arg4, &arg5, &arg6, &arg7, &arg8, &arg9, &arg10, &arg11, &arg12, &arg13, &arg14)) - return NULL; - - x0 = (double)_cffi_to_c_double(arg0); - if (x0 == (double)-1 && PyErr_Occurred()) - return NULL; - - x1 = (double)_cffi_to_c_double(arg1); - if (x1 == (double)-1 && PyErr_Occurred()) - return NULL; - - x2 = (double)_cffi_to_c_double(arg2); - if (x2 == (double)-1 && PyErr_Occurred()) - return NULL; - - x3 = (double)_cffi_to_c_double(arg3); - if (x3 == (double)-1 && PyErr_Occurred()) - return NULL; - - x4 = (double)_cffi_to_c_double(arg4); - if (x4 == (double)-1 && PyErr_Occurred()) - return NULL; - - x5 = (double)_cffi_to_c_double(arg5); - if (x5 == (double)-1 && PyErr_Occurred()) - return NULL; - - x6 = (double)_cffi_to_c_double(arg6); - if (x6 == (double)-1 && PyErr_Occurred()) - return NULL; - - x7 = (double)_cffi_to_c_double(arg7); - if (x7 == (double)-1 && PyErr_Occurred()) - return NULL; - - x8 = (double)_cffi_to_c_double(arg8); - if (x8 == (double)-1 && PyErr_Occurred()) - return NULL; - - x9 = (double)_cffi_to_c_double(arg9); - if (x9 == (double)-1 && PyErr_Occurred()) - return NULL; - - x10 = (double)_cffi_to_c_double(arg10); - if (x10 == (double)-1 && PyErr_Occurred()) - return NULL; - - x11 = (double)_cffi_to_c_double(arg11); - if (x11 == (double)-1 && PyErr_Occurred()) - return NULL; - - x12 = (double)_cffi_to_c_double(arg12); - if (x12 == (double)-1 && PyErr_Occurred()) - return NULL; - - x13 = (double)_cffi_to_c_double(arg13); - if (x13 == (double)-1 && PyErr_Occurred()) - return NULL; - - x14 = _cffi_to_c_int(arg14, int); - if (x14 == (int)-1 && PyErr_Occurred()) - return NULL; - - Py_BEGIN_ALLOW_THREADS - _cffi_restore_errno(); - { result = Nion_ConditionalM_MINI(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14); } - _cffi_save_errno(); - Py_END_ALLOW_THREADS - - (void)self; /* unused */ - pyresult = _cffi_from_c_double(result); - return pyresult; -} -#else -# define _cffi_f_Nion_ConditionalM_MINI _cffi_d_Nion_ConditionalM_MINI -#endif - -static double _cffi_d_Nion_General(double x0, double x1, double x2, double x3, double x4, double x5, double x6, double x7, double x8, double x9) -{ - return Nion_General(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9); -} -#ifndef PYPY_VERSION -static PyObject * -_cffi_f_Nion_General(PyObject *self, PyObject *args) -{ - double x0; - double x1; - double x2; - double x3; - double x4; - double x5; - double x6; - double x7; - double x8; - double x9; - double result; - PyObject *pyresult; - PyObject *arg0; - PyObject *arg1; - PyObject *arg2; - PyObject *arg3; - PyObject *arg4; - PyObject *arg5; - PyObject *arg6; - PyObject *arg7; - PyObject *arg8; - PyObject *arg9; - - if (!PyArg_UnpackTuple(args, "Nion_General", 10, 10, &arg0, &arg1, &arg2, &arg3, &arg4, &arg5, &arg6, &arg7, &arg8, &arg9)) - return NULL; - - x0 = (double)_cffi_to_c_double(arg0); - if (x0 == (double)-1 && PyErr_Occurred()) - return NULL; - - x1 = (double)_cffi_to_c_double(arg1); - if (x1 == (double)-1 && PyErr_Occurred()) - return NULL; - - x2 = (double)_cffi_to_c_double(arg2); - if (x2 == (double)-1 && PyErr_Occurred()) - return NULL; - - x3 = (double)_cffi_to_c_double(arg3); - if (x3 == (double)-1 && PyErr_Occurred()) - return NULL; - - x4 = (double)_cffi_to_c_double(arg4); - if (x4 == (double)-1 && PyErr_Occurred()) - return NULL; - - x5 = (double)_cffi_to_c_double(arg5); - if (x5 == (double)-1 && PyErr_Occurred()) - return NULL; - - x6 = (double)_cffi_to_c_double(arg6); - if (x6 == (double)-1 && PyErr_Occurred()) - return NULL; - - x7 = (double)_cffi_to_c_double(arg7); - if (x7 == (double)-1 && PyErr_Occurred()) - return NULL; - - x8 = (double)_cffi_to_c_double(arg8); - if (x8 == (double)-1 && PyErr_Occurred()) - return NULL; - - x9 = (double)_cffi_to_c_double(arg9); - if (x9 == (double)-1 && PyErr_Occurred()) - return NULL; - - Py_BEGIN_ALLOW_THREADS - _cffi_restore_errno(); - { result = Nion_General(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9); } - _cffi_save_errno(); - Py_END_ALLOW_THREADS - - (void)self; /* unused */ - pyresult = _cffi_from_c_double(result); - return pyresult; -} -#else -# define _cffi_f_Nion_General _cffi_d_Nion_General -#endif - -static double _cffi_d_Nion_General_MINI(double x0, double x1, double x2, double x3, double x4, double x5, double x6, double x7, double x8, double x9, double x10) -{ - return Nion_General_MINI(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10); -} -#ifndef PYPY_VERSION -static PyObject * -_cffi_f_Nion_General_MINI(PyObject *self, PyObject *args) -{ - double x0; - double x1; - double x2; - double x3; - double x4; - double x5; - double x6; - double x7; - double x8; - double x9; - double x10; - double result; - PyObject *pyresult; - PyObject *arg0; - PyObject *arg1; - PyObject *arg2; - PyObject *arg3; - PyObject *arg4; - PyObject *arg5; - PyObject *arg6; - PyObject *arg7; - PyObject *arg8; - PyObject *arg9; - PyObject *arg10; - - if (!PyArg_UnpackTuple(args, "Nion_General_MINI", 11, 11, &arg0, &arg1, &arg2, &arg3, &arg4, &arg5, &arg6, &arg7, &arg8, &arg9, &arg10)) - return NULL; - - x0 = (double)_cffi_to_c_double(arg0); - if (x0 == (double)-1 && PyErr_Occurred()) - return NULL; - - x1 = (double)_cffi_to_c_double(arg1); - if (x1 == (double)-1 && PyErr_Occurred()) - return NULL; - - x2 = (double)_cffi_to_c_double(arg2); - if (x2 == (double)-1 && PyErr_Occurred()) - return NULL; - - x3 = (double)_cffi_to_c_double(arg3); - if (x3 == (double)-1 && PyErr_Occurred()) - return NULL; - - x4 = (double)_cffi_to_c_double(arg4); - if (x4 == (double)-1 && PyErr_Occurred()) - return NULL; - - x5 = (double)_cffi_to_c_double(arg5); - if (x5 == (double)-1 && PyErr_Occurred()) - return NULL; - - x6 = (double)_cffi_to_c_double(arg6); - if (x6 == (double)-1 && PyErr_Occurred()) - return NULL; - - x7 = (double)_cffi_to_c_double(arg7); - if (x7 == (double)-1 && PyErr_Occurred()) - return NULL; - - x8 = (double)_cffi_to_c_double(arg8); - if (x8 == (double)-1 && PyErr_Occurred()) - return NULL; - - x9 = (double)_cffi_to_c_double(arg9); - if (x9 == (double)-1 && PyErr_Occurred()) - return NULL; - - x10 = (double)_cffi_to_c_double(arg10); - if (x10 == (double)-1 && PyErr_Occurred()) - return NULL; - - Py_BEGIN_ALLOW_THREADS - _cffi_restore_errno(); - { result = Nion_General_MINI(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10); } - _cffi_save_errno(); - Py_END_ALLOW_THREADS - - (void)self; /* unused */ - pyresult = _cffi_from_c_double(result); - return pyresult; -} -#else -# define _cffi_f_Nion_General_MINI _cffi_d_Nion_General_MINI -#endif - -static int _cffi_d_ObtainPhotonConsData(double * x0, double * x1, int * x2, double * x3, double * x4, int * x5, double * x6, double * x7, int * x8) -{ - return ObtainPhotonConsData(x0, x1, x2, x3, x4, x5, x6, x7, x8); -} -#ifndef PYPY_VERSION -static PyObject * -_cffi_f_ObtainPhotonConsData(PyObject *self, PyObject *args) -{ - double * x0; - double * x1; - int * x2; - double * x3; - double * x4; - int * x5; - double * x6; - double * x7; - int * x8; - Py_ssize_t datasize; - struct _cffi_freeme_s *large_args_free = NULL; - int result; - PyObject *pyresult; - PyObject *arg0; - PyObject *arg1; - PyObject *arg2; - PyObject *arg3; - PyObject *arg4; - PyObject *arg5; - PyObject *arg6; - PyObject *arg7; - PyObject *arg8; - - if (!PyArg_UnpackTuple(args, "ObtainPhotonConsData", 9, 9, &arg0, &arg1, &arg2, &arg3, &arg4, &arg5, &arg6, &arg7, &arg8)) - return NULL; - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(207), arg0, (char **)&x0); - if (datasize != 0) { - x0 = ((size_t)datasize) <= 640 ? (double *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(207), arg0, (char **)&x0, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(207), arg1, (char **)&x1); - if (datasize != 0) { - x1 = ((size_t)datasize) <= 640 ? (double *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(207), arg1, (char **)&x1, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(217), arg2, (char **)&x2); - if (datasize != 0) { - x2 = ((size_t)datasize) <= 640 ? (int *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(217), arg2, (char **)&x2, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(207), arg3, (char **)&x3); - if (datasize != 0) { - x3 = ((size_t)datasize) <= 640 ? (double *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(207), arg3, (char **)&x3, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(207), arg4, (char **)&x4); - if (datasize != 0) { - x4 = ((size_t)datasize) <= 640 ? (double *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(207), arg4, (char **)&x4, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(217), arg5, (char **)&x5); - if (datasize != 0) { - x5 = ((size_t)datasize) <= 640 ? (int *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(217), arg5, (char **)&x5, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(207), arg6, (char **)&x6); - if (datasize != 0) { - x6 = ((size_t)datasize) <= 640 ? (double *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(207), arg6, (char **)&x6, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(207), arg7, (char **)&x7); - if (datasize != 0) { - x7 = ((size_t)datasize) <= 640 ? (double *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(207), arg7, (char **)&x7, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(217), arg8, (char **)&x8); - if (datasize != 0) { - x8 = ((size_t)datasize) <= 640 ? (int *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(217), arg8, (char **)&x8, - datasize, &large_args_free) < 0) - return NULL; - } - - Py_BEGIN_ALLOW_THREADS - _cffi_restore_errno(); - { result = ObtainPhotonConsData(x0, x1, x2, x3, x4, x5, x6, x7, x8); } - _cffi_save_errno(); - Py_END_ALLOW_THREADS - - (void)self; /* unused */ - pyresult = _cffi_from_c_int(result, int); - if (large_args_free != NULL) _cffi_free_array_arguments(large_args_free); - return pyresult; -} -#else -# define _cffi_f_ObtainPhotonConsData _cffi_d_ObtainPhotonConsData -#endif - -static int _cffi_d_PhotonCons_Calibration(double * x0, double * x1, int x2) -{ - return PhotonCons_Calibration(x0, x1, x2); -} -#ifndef PYPY_VERSION -static PyObject * -_cffi_f_PhotonCons_Calibration(PyObject *self, PyObject *args) -{ - double * x0; - double * x1; - int x2; - Py_ssize_t datasize; - struct _cffi_freeme_s *large_args_free = NULL; - int result; - PyObject *pyresult; - PyObject *arg0; - PyObject *arg1; - PyObject *arg2; - - if (!PyArg_UnpackTuple(args, "PhotonCons_Calibration", 3, 3, &arg0, &arg1, &arg2)) - return NULL; - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(207), arg0, (char **)&x0); - if (datasize != 0) { - x0 = ((size_t)datasize) <= 640 ? (double *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(207), arg0, (char **)&x0, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(207), arg1, (char **)&x1); - if (datasize != 0) { - x1 = ((size_t)datasize) <= 640 ? (double *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(207), arg1, (char **)&x1, - datasize, &large_args_free) < 0) - return NULL; - } - - x2 = _cffi_to_c_int(arg2, int); - if (x2 == (int)-1 && PyErr_Occurred()) - return NULL; - - Py_BEGIN_ALLOW_THREADS - _cffi_restore_errno(); - { result = PhotonCons_Calibration(x0, x1, x2); } - _cffi_save_errno(); - Py_END_ALLOW_THREADS - - (void)self; /* unused */ - pyresult = _cffi_from_c_int(result, int); - if (large_args_free != NULL) _cffi_free_array_arguments(large_args_free); - return pyresult; -} -#else -# define _cffi_f_PhotonCons_Calibration _cffi_d_PhotonCons_Calibration -#endif - -static int _cffi_d_SomethingThatCatches(_Bool x0) -{ - return SomethingThatCatches(x0); -} -#ifndef PYPY_VERSION -static PyObject * -_cffi_f_SomethingThatCatches(PyObject *self, PyObject *arg0) -{ - _Bool x0; - int result; - PyObject *pyresult; - - x0 = (_Bool)_cffi_to_c__Bool(arg0); - if (x0 == (_Bool)-1 && PyErr_Occurred()) - return NULL; - - Py_BEGIN_ALLOW_THREADS - _cffi_restore_errno(); - { result = SomethingThatCatches(x0); } - _cffi_save_errno(); - Py_END_ALLOW_THREADS - - (void)self; /* unused */ - pyresult = _cffi_from_c_int(result, int); - return pyresult; -} -#else -# define _cffi_f_SomethingThatCatches _cffi_d_SomethingThatCatches -#endif - -static int _cffi_d_UpdateXraySourceBox(UserParams * x0, CosmoParams * x1, AstroParams * x2, FlagOptions * x3, HaloBox * x4, double x5, double x6, int x7, XraySourceBox * x8) -{ - return UpdateXraySourceBox(x0, x1, x2, x3, x4, x5, x6, x7, x8); -} -#ifndef PYPY_VERSION -static PyObject * -_cffi_f_UpdateXraySourceBox(PyObject *self, PyObject *args) -{ - UserParams * x0; - CosmoParams * x1; - AstroParams * x2; - FlagOptions * x3; - HaloBox * x4; - double x5; - double x6; - int x7; - XraySourceBox * x8; - Py_ssize_t datasize; - struct _cffi_freeme_s *large_args_free = NULL; - int result; - PyObject *pyresult; - PyObject *arg0; - PyObject *arg1; - PyObject *arg2; - PyObject *arg3; - PyObject *arg4; - PyObject *arg5; - PyObject *arg6; - PyObject *arg7; - PyObject *arg8; - - if (!PyArg_UnpackTuple(args, "UpdateXraySourceBox", 9, 9, &arg0, &arg1, &arg2, &arg3, &arg4, &arg5, &arg6, &arg7, &arg8)) - return NULL; - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(5), arg0, (char **)&x0); - if (datasize != 0) { - x0 = ((size_t)datasize) <= 640 ? (UserParams *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(5), arg0, (char **)&x0, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(6), arg1, (char **)&x1); - if (datasize != 0) { - x1 = ((size_t)datasize) <= 640 ? (CosmoParams *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(6), arg1, (char **)&x1, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(7), arg2, (char **)&x2); - if (datasize != 0) { - x2 = ((size_t)datasize) <= 640 ? (AstroParams *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(7), arg2, (char **)&x2, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(8), arg3, (char **)&x3); - if (datasize != 0) { - x3 = ((size_t)datasize) <= 640 ? (FlagOptions *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(8), arg3, (char **)&x3, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(192), arg4, (char **)&x4); - if (datasize != 0) { - x4 = ((size_t)datasize) <= 640 ? (HaloBox *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(192), arg4, (char **)&x4, - datasize, &large_args_free) < 0) - return NULL; - } - - x5 = (double)_cffi_to_c_double(arg5); - if (x5 == (double)-1 && PyErr_Occurred()) - return NULL; - - x6 = (double)_cffi_to_c_double(arg6); - if (x6 == (double)-1 && PyErr_Occurred()) - return NULL; - - x7 = _cffi_to_c_int(arg7, int); - if (x7 == (int)-1 && PyErr_Occurred()) - return NULL; - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(196), arg8, (char **)&x8); - if (datasize != 0) { - x8 = ((size_t)datasize) <= 640 ? (XraySourceBox *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(196), arg8, (char **)&x8, - datasize, &large_args_free) < 0) - return NULL; - } - - Py_BEGIN_ALLOW_THREADS - _cffi_restore_errno(); - { result = UpdateXraySourceBox(x0, x1, x2, x3, x4, x5, x6, x7, x8); } - _cffi_save_errno(); - Py_END_ALLOW_THREADS - - (void)self; /* unused */ - pyresult = _cffi_from_c_int(result, int); - if (large_args_free != NULL) _cffi_free_array_arguments(large_args_free); - return pyresult; -} -#else -# define _cffi_f_UpdateXraySourceBox _cffi_d_UpdateXraySourceBox -#endif - -static void _cffi_d_adjust_redshifts_for_photoncons(AstroParams * x0, FlagOptions * x1, float * x2, float * x3, float * x4) -{ - adjust_redshifts_for_photoncons(x0, x1, x2, x3, x4); -} -#ifndef PYPY_VERSION -static PyObject * -_cffi_f_adjust_redshifts_for_photoncons(PyObject *self, PyObject *args) -{ - AstroParams * x0; - FlagOptions * x1; - float * x2; - float * x3; - float * x4; - Py_ssize_t datasize; - struct _cffi_freeme_s *large_args_free = NULL; - PyObject *arg0; - PyObject *arg1; - PyObject *arg2; - PyObject *arg3; - PyObject *arg4; - - if (!PyArg_UnpackTuple(args, "adjust_redshifts_for_photoncons", 5, 5, &arg0, &arg1, &arg2, &arg3, &arg4)) - return NULL; - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(7), arg0, (char **)&x0); - if (datasize != 0) { - x0 = ((size_t)datasize) <= 640 ? (AstroParams *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(7), arg0, (char **)&x0, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(8), arg1, (char **)&x1); - if (datasize != 0) { - x1 = ((size_t)datasize) <= 640 ? (FlagOptions *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(8), arg1, (char **)&x1, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(174), arg2, (char **)&x2); - if (datasize != 0) { - x2 = ((size_t)datasize) <= 640 ? (float *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(174), arg2, (char **)&x2, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(174), arg3, (char **)&x3); - if (datasize != 0) { - x3 = ((size_t)datasize) <= 640 ? (float *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(174), arg3, (char **)&x3, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(174), arg4, (char **)&x4); - if (datasize != 0) { - x4 = ((size_t)datasize) <= 640 ? (float *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(174), arg4, (char **)&x4, - datasize, &large_args_free) < 0) - return NULL; - } - - Py_BEGIN_ALLOW_THREADS - _cffi_restore_errno(); - { adjust_redshifts_for_photoncons(x0, x1, x2, x3, x4); } - _cffi_save_errno(); - Py_END_ALLOW_THREADS - - (void)self; /* unused */ - if (large_args_free != NULL) _cffi_free_array_arguments(large_args_free); - Py_INCREF(Py_None); - return Py_None; -} -#else -# define _cffi_f_adjust_redshifts_for_photoncons _cffi_d_adjust_redshifts_for_photoncons -#endif - -static double _cffi_d_atomic_cooling_threshold(float x0) -{ - return atomic_cooling_threshold(x0); -} -#ifndef PYPY_VERSION -static PyObject * -_cffi_f_atomic_cooling_threshold(PyObject *self, PyObject *arg0) -{ - float x0; - double result; - PyObject *pyresult; - - x0 = (float)_cffi_to_c_float(arg0); - if (x0 == (float)-1 && PyErr_Occurred()) - return NULL; - - Py_BEGIN_ALLOW_THREADS - _cffi_restore_errno(); - { result = atomic_cooling_threshold(x0); } - _cffi_save_errno(); - Py_END_ALLOW_THREADS - - (void)self; /* unused */ - pyresult = _cffi_from_c_double(result); - return pyresult; -} -#else -# define _cffi_f_atomic_cooling_threshold _cffi_d_atomic_cooling_threshold -#endif - -static double _cffi_d_conditional_mf(double x0, double x1, double x2, double x3, int x4) -{ - return conditional_mf(x0, x1, x2, x3, x4); -} -#ifndef PYPY_VERSION -static PyObject * -_cffi_f_conditional_mf(PyObject *self, PyObject *args) -{ - double x0; - double x1; - double x2; - double x3; - int x4; - double result; - PyObject *pyresult; - PyObject *arg0; - PyObject *arg1; - PyObject *arg2; - PyObject *arg3; - PyObject *arg4; - - if (!PyArg_UnpackTuple(args, "conditional_mf", 5, 5, &arg0, &arg1, &arg2, &arg3, &arg4)) - return NULL; - - x0 = (double)_cffi_to_c_double(arg0); - if (x0 == (double)-1 && PyErr_Occurred()) - return NULL; - - x1 = (double)_cffi_to_c_double(arg1); - if (x1 == (double)-1 && PyErr_Occurred()) - return NULL; - - x2 = (double)_cffi_to_c_double(arg2); - if (x2 == (double)-1 && PyErr_Occurred()) - return NULL; - - x3 = (double)_cffi_to_c_double(arg3); - if (x3 == (double)-1 && PyErr_Occurred()) - return NULL; - - x4 = _cffi_to_c_int(arg4, int); - if (x4 == (int)-1 && PyErr_Occurred()) - return NULL; - - Py_BEGIN_ALLOW_THREADS - _cffi_restore_errno(); - { result = conditional_mf(x0, x1, x2, x3, x4); } - _cffi_save_errno(); - Py_END_ALLOW_THREADS - - (void)self; /* unused */ - pyresult = _cffi_from_c_double(result); - return pyresult; -} -#else -# define _cffi_f_conditional_mf _cffi_d_conditional_mf -#endif - -static void _cffi_d_determine_deltaz_for_photoncons(void) -{ - determine_deltaz_for_photoncons(); -} -#ifndef PYPY_VERSION -static PyObject * -_cffi_f_determine_deltaz_for_photoncons(PyObject *self, PyObject *noarg) -{ - - Py_BEGIN_ALLOW_THREADS - _cffi_restore_errno(); - { determine_deltaz_for_photoncons(); } - _cffi_save_errno(); - Py_END_ALLOW_THREADS - - (void)self; /* unused */ - (void)noarg; /* unused */ - Py_INCREF(Py_None); - return Py_None; -} -#else -# define _cffi_f_determine_deltaz_for_photoncons _cffi_d_determine_deltaz_for_photoncons -#endif - -static double _cffi_d_dicke(double x0) -{ - return dicke(x0); -} -#ifndef PYPY_VERSION -static PyObject * -_cffi_f_dicke(PyObject *self, PyObject *arg0) -{ - double x0; - double result; - PyObject *pyresult; - - x0 = (double)_cffi_to_c_double(arg0); - if (x0 == (double)-1 && PyErr_Occurred()) - return NULL; - - Py_BEGIN_ALLOW_THREADS - _cffi_restore_errno(); - { result = dicke(x0); } - _cffi_save_errno(); - Py_END_ALLOW_THREADS - - (void)self; /* unused */ - pyresult = _cffi_from_c_double(result); - return pyresult; -} -#else -# define _cffi_f_dicke _cffi_d_dicke -#endif - -static double _cffi_d_dsigmasqdm_z0(double x0) -{ - return dsigmasqdm_z0(x0); -} -#ifndef PYPY_VERSION -static PyObject * -_cffi_f_dsigmasqdm_z0(PyObject *self, PyObject *arg0) -{ - double x0; - double result; - PyObject *pyresult; - - x0 = (double)_cffi_to_c_double(arg0); - if (x0 == (double)-1 && PyErr_Occurred()) - return NULL; - - Py_BEGIN_ALLOW_THREADS - _cffi_restore_errno(); - { result = dsigmasqdm_z0(x0); } - _cffi_save_errno(); - Py_END_ALLOW_THREADS - - (void)self; /* unused */ - pyresult = _cffi_from_c_double(result); - return pyresult; -} -#else -# define _cffi_f_dsigmasqdm_z0 _cffi_d_dsigmasqdm_z0 -#endif - -static double _cffi_d_expected_nhalo(double x0, UserParams * x1, CosmoParams * x2, AstroParams * x3, FlagOptions * x4) -{ - return expected_nhalo(x0, x1, x2, x3, x4); -} -#ifndef PYPY_VERSION -static PyObject * -_cffi_f_expected_nhalo(PyObject *self, PyObject *args) -{ - double x0; - UserParams * x1; - CosmoParams * x2; - AstroParams * x3; - FlagOptions * x4; - Py_ssize_t datasize; - struct _cffi_freeme_s *large_args_free = NULL; - double result; - PyObject *pyresult; - PyObject *arg0; - PyObject *arg1; - PyObject *arg2; - PyObject *arg3; - PyObject *arg4; - - if (!PyArg_UnpackTuple(args, "expected_nhalo", 5, 5, &arg0, &arg1, &arg2, &arg3, &arg4)) - return NULL; - - x0 = (double)_cffi_to_c_double(arg0); - if (x0 == (double)-1 && PyErr_Occurred()) - return NULL; - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(5), arg1, (char **)&x1); - if (datasize != 0) { - x1 = ((size_t)datasize) <= 640 ? (UserParams *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(5), arg1, (char **)&x1, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(6), arg2, (char **)&x2); - if (datasize != 0) { - x2 = ((size_t)datasize) <= 640 ? (CosmoParams *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(6), arg2, (char **)&x2, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(7), arg3, (char **)&x3); - if (datasize != 0) { - x3 = ((size_t)datasize) <= 640 ? (AstroParams *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(7), arg3, (char **)&x3, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(8), arg4, (char **)&x4); - if (datasize != 0) { - x4 = ((size_t)datasize) <= 640 ? (FlagOptions *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(8), arg4, (char **)&x4, - datasize, &large_args_free) < 0) - return NULL; - } - - Py_BEGIN_ALLOW_THREADS - _cffi_restore_errno(); - { result = expected_nhalo(x0, x1, x2, x3, x4); } - _cffi_save_errno(); - Py_END_ALLOW_THREADS - - (void)self; /* unused */ - pyresult = _cffi_from_c_double(result); - if (large_args_free != NULL) _cffi_free_array_arguments(large_args_free); - return pyresult; -} -#else -# define _cffi_f_expected_nhalo _cffi_d_expected_nhalo -#endif - -static void _cffi_d_free(void * x0) -{ - free(x0); -} -#ifndef PYPY_VERSION -static PyObject * -_cffi_f_free(PyObject *self, PyObject *arg0) -{ - void * x0; - Py_ssize_t datasize; - struct _cffi_freeme_s *large_args_free = NULL; - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(498), arg0, (char **)&x0); - if (datasize != 0) { - x0 = ((size_t)datasize) <= 640 ? (void *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(498), arg0, (char **)&x0, - datasize, &large_args_free) < 0) - return NULL; - } - - Py_BEGIN_ALLOW_THREADS - _cffi_restore_errno(); - { free(x0); } - _cffi_save_errno(); - Py_END_ALLOW_THREADS - - (void)self; /* unused */ - if (large_args_free != NULL) _cffi_free_array_arguments(large_args_free); - Py_INCREF(Py_None); - return Py_None; -} -#else -# define _cffi_f_free _cffi_d_free -#endif - -static double _cffi_d_get_delta_crit(int x0, double x1, double x2) -{ - return get_delta_crit(x0, x1, x2); -} -#ifndef PYPY_VERSION -static PyObject * -_cffi_f_get_delta_crit(PyObject *self, PyObject *args) -{ - int x0; - double x1; - double x2; - double result; - PyObject *pyresult; - PyObject *arg0; - PyObject *arg1; - PyObject *arg2; - - if (!PyArg_UnpackTuple(args, "get_delta_crit", 3, 3, &arg0, &arg1, &arg2)) - return NULL; - - x0 = _cffi_to_c_int(arg0, int); - if (x0 == (int)-1 && PyErr_Occurred()) - return NULL; - - x1 = (double)_cffi_to_c_double(arg1); - if (x1 == (double)-1 && PyErr_Occurred()) - return NULL; - - x2 = (double)_cffi_to_c_double(arg2); - if (x2 == (double)-1 && PyErr_Occurred()) - return NULL; - - Py_BEGIN_ALLOW_THREADS - _cffi_restore_errno(); - { result = get_delta_crit(x0, x1, x2); } - _cffi_save_errno(); - Py_END_ALLOW_THREADS - - (void)self; /* unused */ - pyresult = _cffi_from_c_double(result); - return pyresult; -} -#else -# define _cffi_f_get_delta_crit _cffi_d_get_delta_crit -#endif - -static void _cffi_d_init_FcollTable(double x0, double x1, _Bool x2) -{ - init_FcollTable(x0, x1, x2); -} -#ifndef PYPY_VERSION -static PyObject * -_cffi_f_init_FcollTable(PyObject *self, PyObject *args) -{ - double x0; - double x1; - _Bool x2; - PyObject *arg0; - PyObject *arg1; - PyObject *arg2; - - if (!PyArg_UnpackTuple(args, "init_FcollTable", 3, 3, &arg0, &arg1, &arg2)) - return NULL; - - x0 = (double)_cffi_to_c_double(arg0); - if (x0 == (double)-1 && PyErr_Occurred()) - return NULL; - - x1 = (double)_cffi_to_c_double(arg1); - if (x1 == (double)-1 && PyErr_Occurred()) - return NULL; - - x2 = (_Bool)_cffi_to_c__Bool(arg2); - if (x2 == (_Bool)-1 && PyErr_Occurred()) - return NULL; - - Py_BEGIN_ALLOW_THREADS - _cffi_restore_errno(); - { init_FcollTable(x0, x1, x2); } - _cffi_save_errno(); - Py_END_ALLOW_THREADS - - (void)self; /* unused */ - Py_INCREF(Py_None); - return Py_None; -} -#else -# define _cffi_f_init_FcollTable _cffi_d_init_FcollTable -#endif - -static int _cffi_d_init_heat(void) -{ - return init_heat(); -} -#ifndef PYPY_VERSION -static PyObject * -_cffi_f_init_heat(PyObject *self, PyObject *noarg) -{ - int result; - PyObject *pyresult; - - Py_BEGIN_ALLOW_THREADS - _cffi_restore_errno(); - { result = init_heat(); } - _cffi_save_errno(); - Py_END_ALLOW_THREADS - - (void)self; /* unused */ - (void)noarg; /* unused */ - pyresult = _cffi_from_c_int(result, int); - return pyresult; -} -#else -# define _cffi_f_init_heat _cffi_d_init_heat -#endif - -static double _cffi_d_init_ps(void) -{ - return init_ps(); -} -#ifndef PYPY_VERSION -static PyObject * -_cffi_f_init_ps(PyObject *self, PyObject *noarg) -{ - double result; - PyObject *pyresult; - - Py_BEGIN_ALLOW_THREADS - _cffi_restore_errno(); - { result = init_ps(); } - _cffi_save_errno(); - Py_END_ALLOW_THREADS - - (void)self; /* unused */ - (void)noarg; /* unused */ - pyresult = _cffi_from_c_double(result); - return pyresult; -} -#else -# define _cffi_f_init_ps _cffi_d_init_ps -#endif - -static void _cffi_d_initialiseSigmaMInterpTable(float x0, float x1) -{ - initialiseSigmaMInterpTable(x0, x1); -} -#ifndef PYPY_VERSION -static PyObject * -_cffi_f_initialiseSigmaMInterpTable(PyObject *self, PyObject *args) -{ - float x0; - float x1; - PyObject *arg0; - PyObject *arg1; - - if (!PyArg_UnpackTuple(args, "initialiseSigmaMInterpTable", 2, 2, &arg0, &arg1)) - return NULL; - - x0 = (float)_cffi_to_c_float(arg0); - if (x0 == (float)-1 && PyErr_Occurred()) - return NULL; - - x1 = (float)_cffi_to_c_float(arg1); - if (x1 == (float)-1 && PyErr_Occurred()) - return NULL; - - Py_BEGIN_ALLOW_THREADS - _cffi_restore_errno(); - { initialiseSigmaMInterpTable(x0, x1); } - _cffi_save_errno(); - Py_END_ALLOW_THREADS - - (void)self; /* unused */ - Py_INCREF(Py_None); - return Py_None; -} -#else -# define _cffi_f_initialiseSigmaMInterpTable _cffi_d_initialiseSigmaMInterpTable -#endif - -static void _cffi_d_initialise_FgtrM_delta_table(double x0, double x1, double x2, double x3, double x4, double x5) -{ - initialise_FgtrM_delta_table(x0, x1, x2, x3, x4, x5); -} -#ifndef PYPY_VERSION -static PyObject * -_cffi_f_initialise_FgtrM_delta_table(PyObject *self, PyObject *args) -{ - double x0; - double x1; - double x2; - double x3; - double x4; - double x5; - PyObject *arg0; - PyObject *arg1; - PyObject *arg2; - PyObject *arg3; - PyObject *arg4; - PyObject *arg5; - - if (!PyArg_UnpackTuple(args, "initialise_FgtrM_delta_table", 6, 6, &arg0, &arg1, &arg2, &arg3, &arg4, &arg5)) - return NULL; - - x0 = (double)_cffi_to_c_double(arg0); - if (x0 == (double)-1 && PyErr_Occurred()) - return NULL; - - x1 = (double)_cffi_to_c_double(arg1); - if (x1 == (double)-1 && PyErr_Occurred()) - return NULL; - - x2 = (double)_cffi_to_c_double(arg2); - if (x2 == (double)-1 && PyErr_Occurred()) - return NULL; - - x3 = (double)_cffi_to_c_double(arg3); - if (x3 == (double)-1 && PyErr_Occurred()) - return NULL; - - x4 = (double)_cffi_to_c_double(arg4); - if (x4 == (double)-1 && PyErr_Occurred()) - return NULL; - - x5 = (double)_cffi_to_c_double(arg5); - if (x5 == (double)-1 && PyErr_Occurred()) - return NULL; - - Py_BEGIN_ALLOW_THREADS - _cffi_restore_errno(); - { initialise_FgtrM_delta_table(x0, x1, x2, x3, x4, x5); } - _cffi_save_errno(); - Py_END_ALLOW_THREADS - - (void)self; /* unused */ - Py_INCREF(Py_None); - return Py_None; -} -#else -# define _cffi_f_initialise_FgtrM_delta_table _cffi_d_initialise_FgtrM_delta_table -#endif - -static void _cffi_d_initialise_GL(float x0, float x1) -{ - initialise_GL(x0, x1); -} -#ifndef PYPY_VERSION -static PyObject * -_cffi_f_initialise_GL(PyObject *self, PyObject *args) -{ - float x0; - float x1; - PyObject *arg0; - PyObject *arg1; - - if (!PyArg_UnpackTuple(args, "initialise_GL", 2, 2, &arg0, &arg1)) - return NULL; - - x0 = (float)_cffi_to_c_float(arg0); - if (x0 == (float)-1 && PyErr_Occurred()) - return NULL; - - x1 = (float)_cffi_to_c_float(arg1); - if (x1 == (float)-1 && PyErr_Occurred()) - return NULL; - - Py_BEGIN_ALLOW_THREADS - _cffi_restore_errno(); - { initialise_GL(x0, x1); } - _cffi_save_errno(); - Py_END_ALLOW_THREADS - - (void)self; /* unused */ - Py_INCREF(Py_None); - return Py_None; -} -#else -# define _cffi_f_initialise_GL _cffi_d_initialise_GL -#endif - -static void _cffi_d_initialise_Nion_Conditional_spline(float x0, float x1, float x2, float x3, float x4, float x5, float x6, float x7, float x8, float x9, float x10, float x11, float x12, float x13, float x14, float x15, float x16, float x17, float x18, float x19, float x20, float x21, int x22, int x23, _Bool x24, _Bool x25) -{ - initialise_Nion_Conditional_spline(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, x16, x17, x18, x19, x20, x21, x22, x23, x24, x25); -} -#ifndef PYPY_VERSION -static PyObject * -_cffi_f_initialise_Nion_Conditional_spline(PyObject *self, PyObject *args) -{ - float x0; - float x1; - float x2; - float x3; - float x4; - float x5; - float x6; - float x7; - float x8; - float x9; - float x10; - float x11; - float x12; - float x13; - float x14; - float x15; - float x16; - float x17; - float x18; - float x19; - float x20; - float x21; - int x22; - int x23; - _Bool x24; - _Bool x25; - PyObject *arg0; - PyObject *arg1; - PyObject *arg2; - PyObject *arg3; - PyObject *arg4; - PyObject *arg5; - PyObject *arg6; - PyObject *arg7; - PyObject *arg8; - PyObject *arg9; - PyObject *arg10; - PyObject *arg11; - PyObject *arg12; - PyObject *arg13; - PyObject *arg14; - PyObject *arg15; - PyObject *arg16; - PyObject *arg17; - PyObject *arg18; - PyObject *arg19; - PyObject *arg20; - PyObject *arg21; - PyObject *arg22; - PyObject *arg23; - PyObject *arg24; - PyObject *arg25; - - if (!PyArg_UnpackTuple(args, "initialise_Nion_Conditional_spline", 26, 26, &arg0, &arg1, &arg2, &arg3, &arg4, &arg5, &arg6, &arg7, &arg8, &arg9, &arg10, &arg11, &arg12, &arg13, &arg14, &arg15, &arg16, &arg17, &arg18, &arg19, &arg20, &arg21, &arg22, &arg23, &arg24, &arg25)) - return NULL; - - x0 = (float)_cffi_to_c_float(arg0); - if (x0 == (float)-1 && PyErr_Occurred()) - return NULL; - - x1 = (float)_cffi_to_c_float(arg1); - if (x1 == (float)-1 && PyErr_Occurred()) - return NULL; - - x2 = (float)_cffi_to_c_float(arg2); - if (x2 == (float)-1 && PyErr_Occurred()) - return NULL; - - x3 = (float)_cffi_to_c_float(arg3); - if (x3 == (float)-1 && PyErr_Occurred()) - return NULL; - - x4 = (float)_cffi_to_c_float(arg4); - if (x4 == (float)-1 && PyErr_Occurred()) - return NULL; - - x5 = (float)_cffi_to_c_float(arg5); - if (x5 == (float)-1 && PyErr_Occurred()) - return NULL; - - x6 = (float)_cffi_to_c_float(arg6); - if (x6 == (float)-1 && PyErr_Occurred()) - return NULL; - - x7 = (float)_cffi_to_c_float(arg7); - if (x7 == (float)-1 && PyErr_Occurred()) - return NULL; - - x8 = (float)_cffi_to_c_float(arg8); - if (x8 == (float)-1 && PyErr_Occurred()) - return NULL; - - x9 = (float)_cffi_to_c_float(arg9); - if (x9 == (float)-1 && PyErr_Occurred()) - return NULL; - - x10 = (float)_cffi_to_c_float(arg10); - if (x10 == (float)-1 && PyErr_Occurred()) - return NULL; - - x11 = (float)_cffi_to_c_float(arg11); - if (x11 == (float)-1 && PyErr_Occurred()) - return NULL; - - x12 = (float)_cffi_to_c_float(arg12); - if (x12 == (float)-1 && PyErr_Occurred()) - return NULL; - - x13 = (float)_cffi_to_c_float(arg13); - if (x13 == (float)-1 && PyErr_Occurred()) - return NULL; - - x14 = (float)_cffi_to_c_float(arg14); - if (x14 == (float)-1 && PyErr_Occurred()) - return NULL; - - x15 = (float)_cffi_to_c_float(arg15); - if (x15 == (float)-1 && PyErr_Occurred()) - return NULL; - - x16 = (float)_cffi_to_c_float(arg16); - if (x16 == (float)-1 && PyErr_Occurred()) - return NULL; - - x17 = (float)_cffi_to_c_float(arg17); - if (x17 == (float)-1 && PyErr_Occurred()) - return NULL; - - x18 = (float)_cffi_to_c_float(arg18); - if (x18 == (float)-1 && PyErr_Occurred()) - return NULL; - - x19 = (float)_cffi_to_c_float(arg19); - if (x19 == (float)-1 && PyErr_Occurred()) - return NULL; - - x20 = (float)_cffi_to_c_float(arg20); - if (x20 == (float)-1 && PyErr_Occurred()) - return NULL; - - x21 = (float)_cffi_to_c_float(arg21); - if (x21 == (float)-1 && PyErr_Occurred()) - return NULL; - - x22 = _cffi_to_c_int(arg22, int); - if (x22 == (int)-1 && PyErr_Occurred()) - return NULL; - - x23 = _cffi_to_c_int(arg23, int); - if (x23 == (int)-1 && PyErr_Occurred()) - return NULL; - - x24 = (_Bool)_cffi_to_c__Bool(arg24); - if (x24 == (_Bool)-1 && PyErr_Occurred()) - return NULL; - - x25 = (_Bool)_cffi_to_c__Bool(arg25); - if (x25 == (_Bool)-1 && PyErr_Occurred()) - return NULL; - - Py_BEGIN_ALLOW_THREADS - _cffi_restore_errno(); - { initialise_Nion_Conditional_spline(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, x16, x17, x18, x19, x20, x21, x22, x23, x24, x25); } - _cffi_save_errno(); - Py_END_ALLOW_THREADS - - (void)self; /* unused */ - Py_INCREF(Py_None); - return Py_None; -} -#else -# define _cffi_f_initialise_Nion_Conditional_spline _cffi_d_initialise_Nion_Conditional_spline -#endif - -static void _cffi_d_initialise_Nion_Ts_spline(int x0, float x1, float x2, float x3, float x4, float x5, float x6, float x7, float x8, float x9, float x10, _Bool x11) -{ - initialise_Nion_Ts_spline(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11); -} -#ifndef PYPY_VERSION -static PyObject * -_cffi_f_initialise_Nion_Ts_spline(PyObject *self, PyObject *args) -{ - int x0; - float x1; - float x2; - float x3; - float x4; - float x5; - float x6; - float x7; - float x8; - float x9; - float x10; - _Bool x11; - PyObject *arg0; - PyObject *arg1; - PyObject *arg2; - PyObject *arg3; - PyObject *arg4; - PyObject *arg5; - PyObject *arg6; - PyObject *arg7; - PyObject *arg8; - PyObject *arg9; - PyObject *arg10; - PyObject *arg11; - - if (!PyArg_UnpackTuple(args, "initialise_Nion_Ts_spline", 12, 12, &arg0, &arg1, &arg2, &arg3, &arg4, &arg5, &arg6, &arg7, &arg8, &arg9, &arg10, &arg11)) - return NULL; - - x0 = _cffi_to_c_int(arg0, int); - if (x0 == (int)-1 && PyErr_Occurred()) - return NULL; - - x1 = (float)_cffi_to_c_float(arg1); - if (x1 == (float)-1 && PyErr_Occurred()) - return NULL; - - x2 = (float)_cffi_to_c_float(arg2); - if (x2 == (float)-1 && PyErr_Occurred()) - return NULL; - - x3 = (float)_cffi_to_c_float(arg3); - if (x3 == (float)-1 && PyErr_Occurred()) - return NULL; - - x4 = (float)_cffi_to_c_float(arg4); - if (x4 == (float)-1 && PyErr_Occurred()) - return NULL; - - x5 = (float)_cffi_to_c_float(arg5); - if (x5 == (float)-1 && PyErr_Occurred()) - return NULL; - - x6 = (float)_cffi_to_c_float(arg6); - if (x6 == (float)-1 && PyErr_Occurred()) - return NULL; - - x7 = (float)_cffi_to_c_float(arg7); - if (x7 == (float)-1 && PyErr_Occurred()) - return NULL; - - x8 = (float)_cffi_to_c_float(arg8); - if (x8 == (float)-1 && PyErr_Occurred()) - return NULL; - - x9 = (float)_cffi_to_c_float(arg9); - if (x9 == (float)-1 && PyErr_Occurred()) - return NULL; - - x10 = (float)_cffi_to_c_float(arg10); - if (x10 == (float)-1 && PyErr_Occurred()) - return NULL; - - x11 = (_Bool)_cffi_to_c__Bool(arg11); - if (x11 == (_Bool)-1 && PyErr_Occurred()) - return NULL; - - Py_BEGIN_ALLOW_THREADS - _cffi_restore_errno(); - { initialise_Nion_Ts_spline(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11); } - _cffi_save_errno(); - Py_END_ALLOW_THREADS - - (void)self; /* unused */ - Py_INCREF(Py_None); - return Py_None; -} -#else -# define _cffi_f_initialise_Nion_Ts_spline _cffi_d_initialise_Nion_Ts_spline -#endif - -static void _cffi_d_initialise_SFRD_Conditional_table(double x0, double x1, double x2, float x3, double x4, double x5, double x6, float x7, float x8, float x9, float x10, int x11, int x12, _Bool x13) -{ - initialise_SFRD_Conditional_table(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13); -} -#ifndef PYPY_VERSION -static PyObject * -_cffi_f_initialise_SFRD_Conditional_table(PyObject *self, PyObject *args) -{ - double x0; - double x1; - double x2; - float x3; - double x4; - double x5; - double x6; - float x7; - float x8; - float x9; - float x10; - int x11; - int x12; - _Bool x13; - PyObject *arg0; - PyObject *arg1; - PyObject *arg2; - PyObject *arg3; - PyObject *arg4; - PyObject *arg5; - PyObject *arg6; - PyObject *arg7; - PyObject *arg8; - PyObject *arg9; - PyObject *arg10; - PyObject *arg11; - PyObject *arg12; - PyObject *arg13; - - if (!PyArg_UnpackTuple(args, "initialise_SFRD_Conditional_table", 14, 14, &arg0, &arg1, &arg2, &arg3, &arg4, &arg5, &arg6, &arg7, &arg8, &arg9, &arg10, &arg11, &arg12, &arg13)) - return NULL; - - x0 = (double)_cffi_to_c_double(arg0); - if (x0 == (double)-1 && PyErr_Occurred()) - return NULL; - - x1 = (double)_cffi_to_c_double(arg1); - if (x1 == (double)-1 && PyErr_Occurred()) - return NULL; - - x2 = (double)_cffi_to_c_double(arg2); - if (x2 == (double)-1 && PyErr_Occurred()) - return NULL; - - x3 = (float)_cffi_to_c_float(arg3); - if (x3 == (float)-1 && PyErr_Occurred()) - return NULL; - - x4 = (double)_cffi_to_c_double(arg4); - if (x4 == (double)-1 && PyErr_Occurred()) - return NULL; - - x5 = (double)_cffi_to_c_double(arg5); - if (x5 == (double)-1 && PyErr_Occurred()) - return NULL; - - x6 = (double)_cffi_to_c_double(arg6); - if (x6 == (double)-1 && PyErr_Occurred()) - return NULL; - - x7 = (float)_cffi_to_c_float(arg7); - if (x7 == (float)-1 && PyErr_Occurred()) - return NULL; - - x8 = (float)_cffi_to_c_float(arg8); - if (x8 == (float)-1 && PyErr_Occurred()) - return NULL; - - x9 = (float)_cffi_to_c_float(arg9); - if (x9 == (float)-1 && PyErr_Occurred()) - return NULL; - - x10 = (float)_cffi_to_c_float(arg10); - if (x10 == (float)-1 && PyErr_Occurred()) - return NULL; - - x11 = _cffi_to_c_int(arg11, int); - if (x11 == (int)-1 && PyErr_Occurred()) - return NULL; - - x12 = _cffi_to_c_int(arg12, int); - if (x12 == (int)-1 && PyErr_Occurred()) - return NULL; - - x13 = (_Bool)_cffi_to_c__Bool(arg13); - if (x13 == (_Bool)-1 && PyErr_Occurred()) - return NULL; - - Py_BEGIN_ALLOW_THREADS - _cffi_restore_errno(); - { initialise_SFRD_Conditional_table(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13); } - _cffi_save_errno(); - Py_END_ALLOW_THREADS - - (void)self; /* unused */ - Py_INCREF(Py_None); - return Py_None; -} -#else -# define _cffi_f_initialise_SFRD_Conditional_table _cffi_d_initialise_SFRD_Conditional_table -#endif - -static void _cffi_d_initialise_SFRD_spline(int x0, float x1, float x2, float x3, float x4, float x5, float x6, float x7, _Bool x8) -{ - initialise_SFRD_spline(x0, x1, x2, x3, x4, x5, x6, x7, x8); -} -#ifndef PYPY_VERSION -static PyObject * -_cffi_f_initialise_SFRD_spline(PyObject *self, PyObject *args) -{ - int x0; - float x1; - float x2; - float x3; - float x4; - float x5; - float x6; - float x7; - _Bool x8; - PyObject *arg0; - PyObject *arg1; - PyObject *arg2; - PyObject *arg3; - PyObject *arg4; - PyObject *arg5; - PyObject *arg6; - PyObject *arg7; - PyObject *arg8; - - if (!PyArg_UnpackTuple(args, "initialise_SFRD_spline", 9, 9, &arg0, &arg1, &arg2, &arg3, &arg4, &arg5, &arg6, &arg7, &arg8)) - return NULL; - - x0 = _cffi_to_c_int(arg0, int); - if (x0 == (int)-1 && PyErr_Occurred()) - return NULL; - - x1 = (float)_cffi_to_c_float(arg1); - if (x1 == (float)-1 && PyErr_Occurred()) - return NULL; - - x2 = (float)_cffi_to_c_float(arg2); - if (x2 == (float)-1 && PyErr_Occurred()) - return NULL; - - x3 = (float)_cffi_to_c_float(arg3); - if (x3 == (float)-1 && PyErr_Occurred()) - return NULL; - - x4 = (float)_cffi_to_c_float(arg4); - if (x4 == (float)-1 && PyErr_Occurred()) - return NULL; - - x5 = (float)_cffi_to_c_float(arg5); - if (x5 == (float)-1 && PyErr_Occurred()) - return NULL; - - x6 = (float)_cffi_to_c_float(arg6); - if (x6 == (float)-1 && PyErr_Occurred()) - return NULL; - - x7 = (float)_cffi_to_c_float(arg7); - if (x7 == (float)-1 && PyErr_Occurred()) - return NULL; - - x8 = (_Bool)_cffi_to_c__Bool(arg8); - if (x8 == (_Bool)-1 && PyErr_Occurred()) - return NULL; - - Py_BEGIN_ALLOW_THREADS - _cffi_restore_errno(); - { initialise_SFRD_spline(x0, x1, x2, x3, x4, x5, x6, x7, x8); } - _cffi_save_errno(); - Py_END_ALLOW_THREADS - - (void)self; /* unused */ - Py_INCREF(Py_None); - return Py_None; -} -#else -# define _cffi_f_initialise_SFRD_spline _cffi_d_initialise_SFRD_spline -#endif - -static void _cffi_d_initialise_dNdM_inverse_table(double x0, double x1, double x2, double x3, double x4, _Bool x5) -{ - initialise_dNdM_inverse_table(x0, x1, x2, x3, x4, x5); -} -#ifndef PYPY_VERSION -static PyObject * -_cffi_f_initialise_dNdM_inverse_table(PyObject *self, PyObject *args) -{ - double x0; - double x1; - double x2; - double x3; - double x4; - _Bool x5; - PyObject *arg0; - PyObject *arg1; - PyObject *arg2; - PyObject *arg3; - PyObject *arg4; - PyObject *arg5; - - if (!PyArg_UnpackTuple(args, "initialise_dNdM_inverse_table", 6, 6, &arg0, &arg1, &arg2, &arg3, &arg4, &arg5)) - return NULL; - - x0 = (double)_cffi_to_c_double(arg0); - if (x0 == (double)-1 && PyErr_Occurred()) - return NULL; - - x1 = (double)_cffi_to_c_double(arg1); - if (x1 == (double)-1 && PyErr_Occurred()) - return NULL; - - x2 = (double)_cffi_to_c_double(arg2); - if (x2 == (double)-1 && PyErr_Occurred()) - return NULL; - - x3 = (double)_cffi_to_c_double(arg3); - if (x3 == (double)-1 && PyErr_Occurred()) - return NULL; - - x4 = (double)_cffi_to_c_double(arg4); - if (x4 == (double)-1 && PyErr_Occurred()) - return NULL; - - x5 = (_Bool)_cffi_to_c__Bool(arg5); - if (x5 == (_Bool)-1 && PyErr_Occurred()) - return NULL; - - Py_BEGIN_ALLOW_THREADS - _cffi_restore_errno(); - { initialise_dNdM_inverse_table(x0, x1, x2, x3, x4, x5); } - _cffi_save_errno(); - Py_END_ALLOW_THREADS - - (void)self; /* unused */ - Py_INCREF(Py_None); - return Py_None; -} -#else -# define _cffi_f_initialise_dNdM_inverse_table _cffi_d_initialise_dNdM_inverse_table -#endif - -static void _cffi_d_initialise_dNdM_tables(double x0, double x1, double x2, double x3, double x4, double x5, _Bool x6) -{ - initialise_dNdM_tables(x0, x1, x2, x3, x4, x5, x6); -} -#ifndef PYPY_VERSION -static PyObject * -_cffi_f_initialise_dNdM_tables(PyObject *self, PyObject *args) -{ - double x0; - double x1; - double x2; - double x3; - double x4; - double x5; - _Bool x6; - PyObject *arg0; - PyObject *arg1; - PyObject *arg2; - PyObject *arg3; - PyObject *arg4; - PyObject *arg5; - PyObject *arg6; - - if (!PyArg_UnpackTuple(args, "initialise_dNdM_tables", 7, 7, &arg0, &arg1, &arg2, &arg3, &arg4, &arg5, &arg6)) - return NULL; - - x0 = (double)_cffi_to_c_double(arg0); - if (x0 == (double)-1 && PyErr_Occurred()) - return NULL; - - x1 = (double)_cffi_to_c_double(arg1); - if (x1 == (double)-1 && PyErr_Occurred()) - return NULL; - - x2 = (double)_cffi_to_c_double(arg2); - if (x2 == (double)-1 && PyErr_Occurred()) - return NULL; - - x3 = (double)_cffi_to_c_double(arg3); - if (x3 == (double)-1 && PyErr_Occurred()) - return NULL; - - x4 = (double)_cffi_to_c_double(arg4); - if (x4 == (double)-1 && PyErr_Occurred()) - return NULL; - - x5 = (double)_cffi_to_c_double(arg5); - if (x5 == (double)-1 && PyErr_Occurred()) - return NULL; - - x6 = (_Bool)_cffi_to_c__Bool(arg6); - if (x6 == (_Bool)-1 && PyErr_Occurred()) - return NULL; - - Py_BEGIN_ALLOW_THREADS - _cffi_restore_errno(); - { initialise_dNdM_tables(x0, x1, x2, x3, x4, x5, x6); } - _cffi_save_errno(); - Py_END_ALLOW_THREADS - - (void)self; /* unused */ - Py_INCREF(Py_None); - return Py_None; -} -#else -# define _cffi_f_initialise_dNdM_tables _cffi_d_initialise_dNdM_tables -#endif - -static void _cffi_d_set_alphacons_params(double x0, double x1) -{ - set_alphacons_params(x0, x1); -} -#ifndef PYPY_VERSION -static PyObject * -_cffi_f_set_alphacons_params(PyObject *self, PyObject *args) -{ - double x0; - double x1; - PyObject *arg0; - PyObject *arg1; - - if (!PyArg_UnpackTuple(args, "set_alphacons_params", 2, 2, &arg0, &arg1)) - return NULL; - - x0 = (double)_cffi_to_c_double(arg0); - if (x0 == (double)-1 && PyErr_Occurred()) - return NULL; - - x1 = (double)_cffi_to_c_double(arg1); - if (x1 == (double)-1 && PyErr_Occurred()) - return NULL; - - Py_BEGIN_ALLOW_THREADS - _cffi_restore_errno(); - { set_alphacons_params(x0, x1); } - _cffi_save_errno(); - Py_END_ALLOW_THREADS - - (void)self; /* unused */ - Py_INCREF(Py_None); - return Py_None; -} -#else -# define _cffi_f_set_alphacons_params _cffi_d_set_alphacons_params -#endif - -static double _cffi_d_sigma_z0(double x0) -{ - return sigma_z0(x0); -} -#ifndef PYPY_VERSION -static PyObject * -_cffi_f_sigma_z0(PyObject *self, PyObject *arg0) -{ - double x0; - double result; - PyObject *pyresult; - - x0 = (double)_cffi_to_c_double(arg0); - if (x0 == (double)-1 && PyErr_Occurred()) - return NULL; - - Py_BEGIN_ALLOW_THREADS - _cffi_restore_errno(); - { result = sigma_z0(x0); } - _cffi_save_errno(); - Py_END_ALLOW_THREADS - - (void)self; /* unused */ - pyresult = _cffi_from_c_double(result); - return pyresult; -} -#else -# define _cffi_f_sigma_z0 _cffi_d_sigma_z0 -#endif - -static int _cffi_d_single_test_sample(UserParams * x0, CosmoParams * x1, AstroParams * x2, FlagOptions * x3, int x4, int x5, float * x6, int * x7, double x8, double x9, int * x10, int * x11, double * x12, double * x13, double * x14, float * x15, int * x16) -{ - return single_test_sample(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, x16); -} -#ifndef PYPY_VERSION -static PyObject * -_cffi_f_single_test_sample(PyObject *self, PyObject *args) -{ - UserParams * x0; - CosmoParams * x1; - AstroParams * x2; - FlagOptions * x3; - int x4; - int x5; - float * x6; - int * x7; - double x8; - double x9; - int * x10; - int * x11; - double * x12; - double * x13; - double * x14; - float * x15; - int * x16; - Py_ssize_t datasize; - struct _cffi_freeme_s *large_args_free = NULL; - int result; - PyObject *pyresult; - PyObject *arg0; - PyObject *arg1; - PyObject *arg2; - PyObject *arg3; - PyObject *arg4; - PyObject *arg5; - PyObject *arg6; - PyObject *arg7; - PyObject *arg8; - PyObject *arg9; - PyObject *arg10; - PyObject *arg11; - PyObject *arg12; - PyObject *arg13; - PyObject *arg14; - PyObject *arg15; - PyObject *arg16; - - if (!PyArg_UnpackTuple(args, "single_test_sample", 17, 17, &arg0, &arg1, &arg2, &arg3, &arg4, &arg5, &arg6, &arg7, &arg8, &arg9, &arg10, &arg11, &arg12, &arg13, &arg14, &arg15, &arg16)) - return NULL; - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(5), arg0, (char **)&x0); - if (datasize != 0) { - x0 = ((size_t)datasize) <= 640 ? (UserParams *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(5), arg0, (char **)&x0, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(6), arg1, (char **)&x1); - if (datasize != 0) { - x1 = ((size_t)datasize) <= 640 ? (CosmoParams *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(6), arg1, (char **)&x1, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(7), arg2, (char **)&x2); - if (datasize != 0) { - x2 = ((size_t)datasize) <= 640 ? (AstroParams *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(7), arg2, (char **)&x2, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(8), arg3, (char **)&x3); - if (datasize != 0) { - x3 = ((size_t)datasize) <= 640 ? (FlagOptions *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(8), arg3, (char **)&x3, - datasize, &large_args_free) < 0) - return NULL; - } - - x4 = _cffi_to_c_int(arg4, int); - if (x4 == (int)-1 && PyErr_Occurred()) - return NULL; - - x5 = _cffi_to_c_int(arg5, int); - if (x5 == (int)-1 && PyErr_Occurred()) - return NULL; - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(174), arg6, (char **)&x6); - if (datasize != 0) { - x6 = ((size_t)datasize) <= 640 ? (float *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(174), arg6, (char **)&x6, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(217), arg7, (char **)&x7); - if (datasize != 0) { - x7 = ((size_t)datasize) <= 640 ? (int *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(217), arg7, (char **)&x7, - datasize, &large_args_free) < 0) - return NULL; - } - - x8 = (double)_cffi_to_c_double(arg8); - if (x8 == (double)-1 && PyErr_Occurred()) - return NULL; - - x9 = (double)_cffi_to_c_double(arg9); - if (x9 == (double)-1 && PyErr_Occurred()) - return NULL; - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(217), arg10, (char **)&x10); - if (datasize != 0) { - x10 = ((size_t)datasize) <= 640 ? (int *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(217), arg10, (char **)&x10, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(217), arg11, (char **)&x11); - if (datasize != 0) { - x11 = ((size_t)datasize) <= 640 ? (int *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(217), arg11, (char **)&x11, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(207), arg12, (char **)&x12); - if (datasize != 0) { - x12 = ((size_t)datasize) <= 640 ? (double *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(207), arg12, (char **)&x12, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(207), arg13, (char **)&x13); - if (datasize != 0) { - x13 = ((size_t)datasize) <= 640 ? (double *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(207), arg13, (char **)&x13, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(207), arg14, (char **)&x14); - if (datasize != 0) { - x14 = ((size_t)datasize) <= 640 ? (double *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(207), arg14, (char **)&x14, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(174), arg15, (char **)&x15); - if (datasize != 0) { - x15 = ((size_t)datasize) <= 640 ? (float *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(174), arg15, (char **)&x15, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(217), arg16, (char **)&x16); - if (datasize != 0) { - x16 = ((size_t)datasize) <= 640 ? (int *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(217), arg16, (char **)&x16, - datasize, &large_args_free) < 0) - return NULL; - } - - Py_BEGIN_ALLOW_THREADS - _cffi_restore_errno(); - { result = single_test_sample(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, x16); } - _cffi_save_errno(); - Py_END_ALLOW_THREADS - - (void)self; /* unused */ - pyresult = _cffi_from_c_int(result, int); - if (large_args_free != NULL) _cffi_free_array_arguments(large_args_free); - return pyresult; -} -#else -# define _cffi_f_single_test_sample _cffi_d_single_test_sample -#endif - -static int _cffi_d_test_filter(UserParams * x0, CosmoParams * x1, AstroParams * x2, FlagOptions * x3, float * x4, double x5, double x6, int x7, double * x8) -{ - return test_filter(x0, x1, x2, x3, x4, x5, x6, x7, x8); -} -#ifndef PYPY_VERSION -static PyObject * -_cffi_f_test_filter(PyObject *self, PyObject *args) -{ - UserParams * x0; - CosmoParams * x1; - AstroParams * x2; - FlagOptions * x3; - float * x4; - double x5; - double x6; - int x7; - double * x8; - Py_ssize_t datasize; - struct _cffi_freeme_s *large_args_free = NULL; - int result; - PyObject *pyresult; - PyObject *arg0; - PyObject *arg1; - PyObject *arg2; - PyObject *arg3; - PyObject *arg4; - PyObject *arg5; - PyObject *arg6; - PyObject *arg7; - PyObject *arg8; - - if (!PyArg_UnpackTuple(args, "test_filter", 9, 9, &arg0, &arg1, &arg2, &arg3, &arg4, &arg5, &arg6, &arg7, &arg8)) - return NULL; - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(5), arg0, (char **)&x0); - if (datasize != 0) { - x0 = ((size_t)datasize) <= 640 ? (UserParams *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(5), arg0, (char **)&x0, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(6), arg1, (char **)&x1); - if (datasize != 0) { - x1 = ((size_t)datasize) <= 640 ? (CosmoParams *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(6), arg1, (char **)&x1, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(7), arg2, (char **)&x2); - if (datasize != 0) { - x2 = ((size_t)datasize) <= 640 ? (AstroParams *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(7), arg2, (char **)&x2, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(8), arg3, (char **)&x3); - if (datasize != 0) { - x3 = ((size_t)datasize) <= 640 ? (FlagOptions *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(8), arg3, (char **)&x3, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(174), arg4, (char **)&x4); - if (datasize != 0) { - x4 = ((size_t)datasize) <= 640 ? (float *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(174), arg4, (char **)&x4, - datasize, &large_args_free) < 0) - return NULL; - } - - x5 = (double)_cffi_to_c_double(arg5); - if (x5 == (double)-1 && PyErr_Occurred()) - return NULL; - - x6 = (double)_cffi_to_c_double(arg6); - if (x6 == (double)-1 && PyErr_Occurred()) - return NULL; - - x7 = _cffi_to_c_int(arg7, int); - if (x7 == (int)-1 && PyErr_Occurred()) - return NULL; - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(207), arg8, (char **)&x8); - if (datasize != 0) { - x8 = ((size_t)datasize) <= 640 ? (double *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(207), arg8, (char **)&x8, - datasize, &large_args_free) < 0) - return NULL; - } - - Py_BEGIN_ALLOW_THREADS - _cffi_restore_errno(); - { result = test_filter(x0, x1, x2, x3, x4, x5, x6, x7, x8); } - _cffi_save_errno(); - Py_END_ALLOW_THREADS - - (void)self; /* unused */ - pyresult = _cffi_from_c_int(result, int); - if (large_args_free != NULL) _cffi_free_array_arguments(large_args_free); - return pyresult; -} -#else -# define _cffi_f_test_filter _cffi_d_test_filter -#endif - -static int _cffi_d_test_halo_props(double x0, UserParams * x1, CosmoParams * x2, AstroParams * x3, FlagOptions * x4, float * x5, float * x6, float * x7, float * x8, PerturbHaloField * x9, float * x10) -{ - return test_halo_props(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10); -} -#ifndef PYPY_VERSION -static PyObject * -_cffi_f_test_halo_props(PyObject *self, PyObject *args) -{ - double x0; - UserParams * x1; - CosmoParams * x2; - AstroParams * x3; - FlagOptions * x4; - float * x5; - float * x6; - float * x7; - float * x8; - PerturbHaloField * x9; - float * x10; - Py_ssize_t datasize; - struct _cffi_freeme_s *large_args_free = NULL; - int result; - PyObject *pyresult; - PyObject *arg0; - PyObject *arg1; - PyObject *arg2; - PyObject *arg3; - PyObject *arg4; - PyObject *arg5; - PyObject *arg6; - PyObject *arg7; - PyObject *arg8; - PyObject *arg9; - PyObject *arg10; - - if (!PyArg_UnpackTuple(args, "test_halo_props", 11, 11, &arg0, &arg1, &arg2, &arg3, &arg4, &arg5, &arg6, &arg7, &arg8, &arg9, &arg10)) - return NULL; - - x0 = (double)_cffi_to_c_double(arg0); - if (x0 == (double)-1 && PyErr_Occurred()) - return NULL; - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(5), arg1, (char **)&x1); - if (datasize != 0) { - x1 = ((size_t)datasize) <= 640 ? (UserParams *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(5), arg1, (char **)&x1, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(6), arg2, (char **)&x2); - if (datasize != 0) { - x2 = ((size_t)datasize) <= 640 ? (CosmoParams *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(6), arg2, (char **)&x2, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(7), arg3, (char **)&x3); - if (datasize != 0) { - x3 = ((size_t)datasize) <= 640 ? (AstroParams *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(7), arg3, (char **)&x3, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(8), arg4, (char **)&x4); - if (datasize != 0) { - x4 = ((size_t)datasize) <= 640 ? (FlagOptions *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(8), arg4, (char **)&x4, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(174), arg5, (char **)&x5); - if (datasize != 0) { - x5 = ((size_t)datasize) <= 640 ? (float *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(174), arg5, (char **)&x5, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(174), arg6, (char **)&x6); - if (datasize != 0) { - x6 = ((size_t)datasize) <= 640 ? (float *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(174), arg6, (char **)&x6, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(174), arg7, (char **)&x7); - if (datasize != 0) { - x7 = ((size_t)datasize) <= 640 ? (float *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(174), arg7, (char **)&x7, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(174), arg8, (char **)&x8); - if (datasize != 0) { - x8 = ((size_t)datasize) <= 640 ? (float *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(174), arg8, (char **)&x8, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(263), arg9, (char **)&x9); - if (datasize != 0) { - x9 = ((size_t)datasize) <= 640 ? (PerturbHaloField *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(263), arg9, (char **)&x9, - datasize, &large_args_free) < 0) - return NULL; - } - - datasize = _cffi_prepare_pointer_call_argument( - _cffi_type(174), arg10, (char **)&x10); - if (datasize != 0) { - x10 = ((size_t)datasize) <= 640 ? (float *)alloca((size_t)datasize) : NULL; - if (_cffi_convert_array_argument(_cffi_type(174), arg10, (char **)&x10, - datasize, &large_args_free) < 0) - return NULL; - } - - Py_BEGIN_ALLOW_THREADS - _cffi_restore_errno(); - { result = test_halo_props(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10); } - _cffi_save_errno(); - Py_END_ALLOW_THREADS - - (void)self; /* unused */ - pyresult = _cffi_from_c_int(result, int); - if (large_args_free != NULL) _cffi_free_array_arguments(large_args_free); - return pyresult; -} -#else -# define _cffi_f_test_halo_props _cffi_d_test_halo_props -#endif - -static double _cffi_d_unconditional_mf(double x0, double x1, double x2, int x3) -{ - return unconditional_mf(x0, x1, x2, x3); -} -#ifndef PYPY_VERSION -static PyObject * -_cffi_f_unconditional_mf(PyObject *self, PyObject *args) -{ - double x0; - double x1; - double x2; - int x3; - double result; - PyObject *pyresult; - PyObject *arg0; - PyObject *arg1; - PyObject *arg2; - PyObject *arg3; - - if (!PyArg_UnpackTuple(args, "unconditional_mf", 4, 4, &arg0, &arg1, &arg2, &arg3)) - return NULL; - - x0 = (double)_cffi_to_c_double(arg0); - if (x0 == (double)-1 && PyErr_Occurred()) - return NULL; - - x1 = (double)_cffi_to_c_double(arg1); - if (x1 == (double)-1 && PyErr_Occurred()) - return NULL; - - x2 = (double)_cffi_to_c_double(arg2); - if (x2 == (double)-1 && PyErr_Occurred()) - return NULL; - - x3 = _cffi_to_c_int(arg3, int); - if (x3 == (int)-1 && PyErr_Occurred()) - return NULL; - - Py_BEGIN_ALLOW_THREADS - _cffi_restore_errno(); - { result = unconditional_mf(x0, x1, x2, x3); } - _cffi_save_errno(); - Py_END_ALLOW_THREADS - - (void)self; /* unused */ - pyresult = _cffi_from_c_double(result); - return pyresult; -} -#else -# define _cffi_f_unconditional_mf _cffi_d_unconditional_mf -#endif - -_CFFI_UNUSED_FN -static void _cffi_checkfld__AstroParams(AstroParams *p) -{ - /* only to generate compile-time warnings or errors */ - (void)p; - { float *tmp = &p->HII_EFF_FACTOR; (void)tmp; } - { float *tmp = &p->F_STAR10; (void)tmp; } - { float *tmp = &p->ALPHA_STAR; (void)tmp; } - { float *tmp = &p->ALPHA_STAR_MINI; (void)tmp; } - { float *tmp = &p->SIGMA_STAR; (void)tmp; } - { float *tmp = &p->CORR_STAR; (void)tmp; } - { double *tmp = &p->UPPER_STELLAR_TURNOVER_MASS; (void)tmp; } - { double *tmp = &p->UPPER_STELLAR_TURNOVER_INDEX; (void)tmp; } - { float *tmp = &p->F_STAR7_MINI; (void)tmp; } - { float *tmp = &p->t_STAR; (void)tmp; } - { float *tmp = &p->CORR_SFR; (void)tmp; } - { double *tmp = &p->SIGMA_SFR_INDEX; (void)tmp; } - { double *tmp = &p->SIGMA_SFR_LIM; (void)tmp; } - { double *tmp = &p->L_X; (void)tmp; } - { double *tmp = &p->L_X_MINI; (void)tmp; } - { double *tmp = &p->SIGMA_LX; (void)tmp; } - { double *tmp = &p->CORR_LX; (void)tmp; } - { float *tmp = &p->F_ESC10; (void)tmp; } - { float *tmp = &p->ALPHA_ESC; (void)tmp; } - { float *tmp = &p->F_ESC7_MINI; (void)tmp; } - { float *tmp = &p->M_TURN; (void)tmp; } - { float *tmp = &p->R_BUBBLE_MAX; (void)tmp; } - { float *tmp = &p->ION_Tvir_MIN; (void)tmp; } - { double *tmp = &p->F_H2_SHIELD; (void)tmp; } - { float *tmp = &p->NU_X_THRESH; (void)tmp; } - { float *tmp = &p->X_RAY_SPEC_INDEX; (void)tmp; } - { float *tmp = &p->X_RAY_Tvir_MIN; (void)tmp; } - { double *tmp = &p->A_LW; (void)tmp; } - { double *tmp = &p->BETA_LW; (void)tmp; } - { double *tmp = &p->A_VCB; (void)tmp; } - { double *tmp = &p->BETA_VCB; (void)tmp; } - (void)((p->N_RSD_STEPS) | 0); /* check that 'AstroParams.N_RSD_STEPS' is an integer */ -} -struct _cffi_align__AstroParams { char x; AstroParams y; }; - -_CFFI_UNUSED_FN -static void _cffi_checkfld__BrightnessTemp(BrightnessTemp *p) -{ - /* only to generate compile-time warnings or errors */ - (void)p; - { float * *tmp = &p->brightness_temp; (void)tmp; } -} -struct _cffi_align__BrightnessTemp { char x; BrightnessTemp y; }; - -_CFFI_UNUSED_FN -static void _cffi_checkfld__CosmoParams(CosmoParams *p) -{ - /* only to generate compile-time warnings or errors */ - (void)p; - { float *tmp = &p->SIGMA_8; (void)tmp; } - { float *tmp = &p->hlittle; (void)tmp; } - { float *tmp = &p->OMm; (void)tmp; } - { float *tmp = &p->OMl; (void)tmp; } - { float *tmp = &p->OMb; (void)tmp; } - { float *tmp = &p->POWER_INDEX; (void)tmp; } -} -struct _cffi_align__CosmoParams { char x; CosmoParams y; }; - -_CFFI_UNUSED_FN -static void _cffi_checkfld__FlagOptions(FlagOptions *p) -{ - /* only to generate compile-time warnings or errors */ - (void)p; - (void)((p->USE_HALO_FIELD) | 0); /* check that 'FlagOptions.USE_HALO_FIELD' is an integer */ - (void)((p->USE_MINI_HALOS) | 0); /* check that 'FlagOptions.USE_MINI_HALOS' is an integer */ - (void)((p->USE_CMB_HEATING) | 0); /* check that 'FlagOptions.USE_CMB_HEATING' is an integer */ - (void)((p->USE_LYA_HEATING) | 0); /* check that 'FlagOptions.USE_LYA_HEATING' is an integer */ - (void)((p->USE_MASS_DEPENDENT_ZETA) | 0); /* check that 'FlagOptions.USE_MASS_DEPENDENT_ZETA' is an integer */ - (void)((p->SUBCELL_RSD) | 0); /* check that 'FlagOptions.SUBCELL_RSD' is an integer */ - (void)((p->APPLY_RSDS) | 0); /* check that 'FlagOptions.APPLY_RSDS' is an integer */ - (void)((p->INHOMO_RECO) | 0); /* check that 'FlagOptions.INHOMO_RECO' is an integer */ - (void)((p->USE_TS_FLUCT) | 0); /* check that 'FlagOptions.USE_TS_FLUCT' is an integer */ - (void)((p->M_MIN_in_Mass) | 0); /* check that 'FlagOptions.M_MIN_in_Mass' is an integer */ - (void)((p->FIX_VCB_AVG) | 0); /* check that 'FlagOptions.FIX_VCB_AVG' is an integer */ - (void)((p->HALO_STOCHASTICITY) | 0); /* check that 'FlagOptions.HALO_STOCHASTICITY' is an integer */ - (void)((p->USE_EXP_FILTER) | 0); /* check that 'FlagOptions.USE_EXP_FILTER' is an integer */ - (void)((p->FIXED_HALO_GRIDS) | 0); /* check that 'FlagOptions.FIXED_HALO_GRIDS' is an integer */ - (void)((p->CELL_RECOMB) | 0); /* check that 'FlagOptions.CELL_RECOMB' is an integer */ - (void)((p->PHOTON_CONS_TYPE) | 0); /* check that 'FlagOptions.PHOTON_CONS_TYPE' is an integer */ - (void)((p->USE_UPPER_STELLAR_TURNOVER) | 0); /* check that 'FlagOptions.USE_UPPER_STELLAR_TURNOVER' is an integer */ -} -struct _cffi_align__FlagOptions { char x; FlagOptions y; }; - -_CFFI_UNUSED_FN -static void _cffi_checkfld__GlobalParams(GlobalParams *p) -{ - /* only to generate compile-time warnings or errors */ - (void)p; - { float *tmp = &p->ALPHA_UVB; (void)tmp; } - (void)((p->EVOLVE_DENSITY_LINEARLY) | 0); /* check that 'GlobalParams.EVOLVE_DENSITY_LINEARLY' is an integer */ - (void)((p->SMOOTH_EVOLVED_DENSITY_FIELD) | 0); /* check that 'GlobalParams.SMOOTH_EVOLVED_DENSITY_FIELD' is an integer */ - { float *tmp = &p->R_smooth_density; (void)tmp; } - { float *tmp = &p->HII_ROUND_ERR; (void)tmp; } - (void)((p->FIND_BUBBLE_ALGORITHM) | 0); /* check that 'GlobalParams.FIND_BUBBLE_ALGORITHM' is an integer */ - (void)((p->N_POISSON) | 0); /* check that 'GlobalParams.N_POISSON' is an integer */ - (void)((p->T_USE_VELOCITIES) | 0); /* check that 'GlobalParams.T_USE_VELOCITIES' is an integer */ - { float *tmp = &p->MAX_DVDR; (void)tmp; } - { float *tmp = &p->DELTA_R_HII_FACTOR; (void)tmp; } - { float *tmp = &p->DELTA_R_FACTOR; (void)tmp; } - (void)((p->HII_FILTER) | 0); /* check that 'GlobalParams.HII_FILTER' is an integer */ - { float *tmp = &p->INITIAL_REDSHIFT; (void)tmp; } - { float *tmp = &p->R_OVERLAP_FACTOR; (void)tmp; } - (void)((p->DELTA_CRIT_MODE) | 0); /* check that 'GlobalParams.DELTA_CRIT_MODE' is an integer */ - (void)((p->HALO_FILTER) | 0); /* check that 'GlobalParams.HALO_FILTER' is an integer */ - (void)((p->OPTIMIZE) | 0); /* check that 'GlobalParams.OPTIMIZE' is an integer */ - { float *tmp = &p->OPTIMIZE_MIN_MASS; (void)tmp; } - { float *tmp = &p->CRIT_DENS_TRANSITION; (void)tmp; } - { float *tmp = &p->MIN_DENSITY_LOW_LIMIT; (void)tmp; } - (void)((p->RecombPhotonCons) | 0); /* check that 'GlobalParams.RecombPhotonCons' is an integer */ - { float *tmp = &p->PhotonConsStart; (void)tmp; } - { float *tmp = &p->PhotonConsEnd; (void)tmp; } - { float *tmp = &p->PhotonConsAsymptoteTo; (void)tmp; } - { float *tmp = &p->PhotonConsEndCalibz; (void)tmp; } - (void)((p->PhotonConsSmoothing) | 0); /* check that 'GlobalParams.PhotonConsSmoothing' is an integer */ - (void)((p->HEAT_FILTER) | 0); /* check that 'GlobalParams.HEAT_FILTER' is an integer */ - { double *tmp = &p->CLUMPING_FACTOR; (void)tmp; } - { float *tmp = &p->Z_HEAT_MAX; (void)tmp; } - { float *tmp = &p->R_XLy_MAX; (void)tmp; } - (void)((p->NUM_FILTER_STEPS_FOR_Ts) | 0); /* check that 'GlobalParams.NUM_FILTER_STEPS_FOR_Ts' is an integer */ - { float *tmp = &p->ZPRIME_STEP_FACTOR; (void)tmp; } - { double *tmp = &p->TK_at_Z_HEAT_MAX; (void)tmp; } - { double *tmp = &p->XION_at_Z_HEAT_MAX; (void)tmp; } - (void)((p->Pop) | 0); /* check that 'GlobalParams.Pop' is an integer */ - { float *tmp = &p->Pop2_ion; (void)tmp; } - { float *tmp = &p->Pop3_ion; (void)tmp; } - { float *tmp = &p->NU_X_BAND_MAX; (void)tmp; } - { float *tmp = &p->NU_X_MAX; (void)tmp; } - (void)((p->NBINS_LF) | 0); /* check that 'GlobalParams.NBINS_LF' is an integer */ - (void)((p->P_CUTOFF) | 0); /* check that 'GlobalParams.P_CUTOFF' is an integer */ - { float *tmp = &p->M_WDM; (void)tmp; } - { float *tmp = &p->g_x; (void)tmp; } - { float *tmp = &p->OMn; (void)tmp; } - { float *tmp = &p->OMk; (void)tmp; } - { float *tmp = &p->OMr; (void)tmp; } - { float *tmp = &p->OMtot; (void)tmp; } - { float *tmp = &p->Y_He; (void)tmp; } - { float *tmp = &p->wl; (void)tmp; } - { float *tmp = &p->SHETH_b; (void)tmp; } - { float *tmp = &p->SHETH_c; (void)tmp; } - { double *tmp = &p->Zreion_HeII; (void)tmp; } - (void)((p->FILTER) | 0); /* check that 'GlobalParams.FILTER' is an integer */ - { char * *tmp = &p->external_table_path; (void)tmp; } - { char * *tmp = &p->wisdoms_path; (void)tmp; } - { float *tmp = &p->R_BUBBLE_MIN; (void)tmp; } - { float *tmp = &p->M_MIN_INTEGRAL; (void)tmp; } - { float *tmp = &p->M_MAX_INTEGRAL; (void)tmp; } - { float *tmp = &p->T_RE; (void)tmp; } - { float *tmp = &p->VAVG; (void)tmp; } - (void)((p->USE_ADIABATIC_FLUCTUATIONS) | 0); /* check that 'GlobalParams.USE_ADIABATIC_FLUCTUATIONS' is an integer */ -} -struct _cffi_align__GlobalParams { char x; GlobalParams y; }; - -_CFFI_UNUSED_FN -static void _cffi_checkfld__HaloBox(HaloBox *p) -{ - /* only to generate compile-time warnings or errors */ - (void)p; - { float * *tmp = &p->halo_mass; (void)tmp; } - { float * *tmp = &p->halo_stars; (void)tmp; } - { float * *tmp = &p->halo_stars_mini; (void)tmp; } - { int * *tmp = &p->count; (void)tmp; } - { float * *tmp = &p->n_ion; (void)tmp; } - { float * *tmp = &p->halo_sfr; (void)tmp; } - { float * *tmp = &p->halo_xray; (void)tmp; } - { float * *tmp = &p->halo_sfr_mini; (void)tmp; } - { float * *tmp = &p->whalo_sfr; (void)tmp; } - { double *tmp = &p->log10_Mcrit_ACG_ave; (void)tmp; } - { double *tmp = &p->log10_Mcrit_MCG_ave; (void)tmp; } -} -struct _cffi_align__HaloBox { char x; HaloBox y; }; - -_CFFI_UNUSED_FN -static void _cffi_checkfld__HaloField(HaloField *p) -{ - /* only to generate compile-time warnings or errors */ - (void)p; - (void)((p->n_halos) | 0); /* check that 'HaloField.n_halos' is an integer */ - (void)((p->buffer_size) | 0); /* check that 'HaloField.buffer_size' is an integer */ - { float * *tmp = &p->halo_masses; (void)tmp; } - { int * *tmp = &p->halo_coords; (void)tmp; } - { float * *tmp = &p->star_rng; (void)tmp; } - { float * *tmp = &p->sfr_rng; (void)tmp; } - { float * *tmp = &p->xray_rng; (void)tmp; } -} -struct _cffi_align__HaloField { char x; HaloField y; }; - -_CFFI_UNUSED_FN -static void _cffi_checkfld__InitialConditions(InitialConditions *p) -{ - /* only to generate compile-time warnings or errors */ - (void)p; - { float * *tmp = &p->lowres_density; (void)tmp; } - { float * *tmp = &p->lowres_vx; (void)tmp; } - { float * *tmp = &p->lowres_vy; (void)tmp; } - { float * *tmp = &p->lowres_vz; (void)tmp; } - { float * *tmp = &p->lowres_vx_2LPT; (void)tmp; } - { float * *tmp = &p->lowres_vy_2LPT; (void)tmp; } - { float * *tmp = &p->lowres_vz_2LPT; (void)tmp; } - { float * *tmp = &p->hires_density; (void)tmp; } - { float * *tmp = &p->hires_vx; (void)tmp; } - { float * *tmp = &p->hires_vy; (void)tmp; } - { float * *tmp = &p->hires_vz; (void)tmp; } - { float * *tmp = &p->hires_vx_2LPT; (void)tmp; } - { float * *tmp = &p->hires_vy_2LPT; (void)tmp; } - { float * *tmp = &p->hires_vz_2LPT; (void)tmp; } - { float * *tmp = &p->lowres_vcb; (void)tmp; } -} -struct _cffi_align__InitialConditions { char x; InitialConditions y; }; - -_CFFI_UNUSED_FN -static void _cffi_checkfld__IonizedBox(IonizedBox *p) -{ - /* only to generate compile-time warnings or errors */ - (void)p; - { double *tmp = &p->mean_f_coll; (void)tmp; } - { double *tmp = &p->mean_f_coll_MINI; (void)tmp; } - { double *tmp = &p->log10_Mturnover_ave; (void)tmp; } - { double *tmp = &p->log10_Mturnover_MINI_ave; (void)tmp; } - { float * *tmp = &p->xH_box; (void)tmp; } - { float * *tmp = &p->Gamma12_box; (void)tmp; } - { float * *tmp = &p->MFP_box; (void)tmp; } - { float * *tmp = &p->z_re_box; (void)tmp; } - { float * *tmp = &p->dNrec_box; (void)tmp; } - { float * *tmp = &p->temp_kinetic_all_gas; (void)tmp; } - { float * *tmp = &p->Fcoll; (void)tmp; } - { float * *tmp = &p->Fcoll_MINI; (void)tmp; } -} -struct _cffi_align__IonizedBox { char x; IonizedBox y; }; - -_CFFI_UNUSED_FN -static void _cffi_checkfld__PerturbHaloField(PerturbHaloField *p) -{ - /* only to generate compile-time warnings or errors */ - (void)p; - (void)((p->n_halos) | 0); /* check that 'PerturbHaloField.n_halos' is an integer */ - (void)((p->buffer_size) | 0); /* check that 'PerturbHaloField.buffer_size' is an integer */ - { float * *tmp = &p->halo_masses; (void)tmp; } - { int * *tmp = &p->halo_coords; (void)tmp; } - { float * *tmp = &p->star_rng; (void)tmp; } - { float * *tmp = &p->sfr_rng; (void)tmp; } - { float * *tmp = &p->xray_rng; (void)tmp; } -} -struct _cffi_align__PerturbHaloField { char x; PerturbHaloField y; }; - -_CFFI_UNUSED_FN -static void _cffi_checkfld__PerturbedField(PerturbedField *p) -{ - /* only to generate compile-time warnings or errors */ - (void)p; - { float * *tmp = &p->density; (void)tmp; } - { float * *tmp = &p->velocity_x; (void)tmp; } - { float * *tmp = &p->velocity_y; (void)tmp; } - { float * *tmp = &p->velocity_z; (void)tmp; } -} -struct _cffi_align__PerturbedField { char x; PerturbedField y; }; - -_CFFI_UNUSED_FN -static void _cffi_checkfld__TsBox(TsBox *p) -{ - /* only to generate compile-time warnings or errors */ - (void)p; - { float * *tmp = &p->Ts_box; (void)tmp; } - { float * *tmp = &p->x_e_box; (void)tmp; } - { float * *tmp = &p->Tk_box; (void)tmp; } - { float * *tmp = &p->J_21_LW_box; (void)tmp; } -} -struct _cffi_align__TsBox { char x; TsBox y; }; - -_CFFI_UNUSED_FN -static void _cffi_checkfld__UserParams(UserParams *p) -{ - /* only to generate compile-time warnings or errors */ - (void)p; - (void)((p->HII_DIM) | 0); /* check that 'UserParams.HII_DIM' is an integer */ - (void)((p->DIM) | 0); /* check that 'UserParams.DIM' is an integer */ - { float *tmp = &p->BOX_LEN; (void)tmp; } - { float *tmp = &p->NON_CUBIC_FACTOR; (void)tmp; } - (void)((p->USE_FFTW_WISDOM) | 0); /* check that 'UserParams.USE_FFTW_WISDOM' is an integer */ - (void)((p->HMF) | 0); /* check that 'UserParams.HMF' is an integer */ - (void)((p->USE_RELATIVE_VELOCITIES) | 0); /* check that 'UserParams.USE_RELATIVE_VELOCITIES' is an integer */ - (void)((p->POWER_SPECTRUM) | 0); /* check that 'UserParams.POWER_SPECTRUM' is an integer */ - (void)((p->N_THREADS) | 0); /* check that 'UserParams.N_THREADS' is an integer */ - (void)((p->PERTURB_ON_HIGH_RES) | 0); /* check that 'UserParams.PERTURB_ON_HIGH_RES' is an integer */ - (void)((p->NO_RNG) | 0); /* check that 'UserParams.NO_RNG' is an integer */ - (void)((p->USE_INTERPOLATION_TABLES) | 0); /* check that 'UserParams.USE_INTERPOLATION_TABLES' is an integer */ - (void)((p->INTEGRATION_METHOD_ATOMIC) | 0); /* check that 'UserParams.INTEGRATION_METHOD_ATOMIC' is an integer */ - (void)((p->INTEGRATION_METHOD_MINI) | 0); /* check that 'UserParams.INTEGRATION_METHOD_MINI' is an integer */ - (void)((p->USE_2LPT) | 0); /* check that 'UserParams.USE_2LPT' is an integer */ - (void)((p->MINIMIZE_MEMORY) | 0); /* check that 'UserParams.MINIMIZE_MEMORY' is an integer */ - (void)((p->KEEP_3D_VELOCITIES) | 0); /* check that 'UserParams.KEEP_3D_VELOCITIES' is an integer */ - { float *tmp = &p->SAMPLER_MIN_MASS; (void)tmp; } - { double *tmp = &p->SAMPLER_BUFFER_FACTOR; (void)tmp; } - { float *tmp = &p->MAXHALO_FACTOR; (void)tmp; } - (void)((p->N_COND_INTERP) | 0); /* check that 'UserParams.N_COND_INTERP' is an integer */ - (void)((p->N_PROB_INTERP) | 0); /* check that 'UserParams.N_PROB_INTERP' is an integer */ - { double *tmp = &p->MIN_LOGPROB; (void)tmp; } - (void)((p->SAMPLE_METHOD) | 0); /* check that 'UserParams.SAMPLE_METHOD' is an integer */ - (void)((p->AVG_BELOW_SAMPLER) | 0); /* check that 'UserParams.AVG_BELOW_SAMPLER' is an integer */ - { double *tmp = &p->HALOMASS_CORRECTION; (void)tmp; } - { double *tmp = &p->PARKINSON_G0; (void)tmp; } - { double *tmp = &p->PARKINSON_y1; (void)tmp; } - { double *tmp = &p->PARKINSON_y2; (void)tmp; } -} -struct _cffi_align__UserParams { char x; UserParams y; }; - -_CFFI_UNUSED_FN -static void _cffi_checkfld__XraySourceBox(XraySourceBox *p) -{ - /* only to generate compile-time warnings or errors */ - (void)p; - { float * *tmp = &p->filtered_sfr; (void)tmp; } - { float * *tmp = &p->filtered_xray; (void)tmp; } - { float * *tmp = &p->filtered_sfr_mini; (void)tmp; } - { double * *tmp = &p->mean_log10_Mcrit_LW; (void)tmp; } - { double * *tmp = &p->mean_sfr; (void)tmp; } - { double * *tmp = &p->mean_sfr_mini; (void)tmp; } -} -struct _cffi_align__XraySourceBox { char x; XraySourceBox y; }; - -static AstroParams * *_cffi_var_astro_params_global(void) -{ - return &(astro_params_global); -} - -static CosmoParams * *_cffi_var_cosmo_params_global(void) -{ - return &(cosmo_params_global); -} - -static FlagOptions * *_cffi_var_flag_options_global(void) -{ - return &(flag_options_global); -} - -static GlobalParams *_cffi_var_global_params(void) -{ - return &(global_params); -} - -static _Bool *_cffi_var_photon_cons_allocated(void) -{ - return &(photon_cons_allocated); -} - -static UserParams * *_cffi_var_user_params_global(void) -{ - return &(user_params_global); -} - -static const struct _cffi_global_s _cffi_globals[] = { - { "Broadcast_struct_global_all", (void *)_cffi_f_Broadcast_struct_global_all, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 384), (void *)_cffi_d_Broadcast_struct_global_all }, - { "Broadcast_struct_global_noastro", (void *)_cffi_f_Broadcast_struct_global_noastro, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 380), (void *)_cffi_d_Broadcast_struct_global_noastro }, - { "ComputeBrightnessTemp", (void *)_cffi_f_ComputeBrightnessTemp, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 291), (void *)_cffi_d_ComputeBrightnessTemp }, - { "ComputeHaloBox", (void *)_cffi_f_ComputeHaloBox, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 255), (void *)_cffi_d_ComputeHaloBox }, - { "ComputeHaloField", (void *)_cffi_f_ComputeHaloField, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 309), (void *)_cffi_d_ComputeHaloField }, - { "ComputeInitialConditions", (void *)_cffi_f_ComputeInitialConditions, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 365), (void *)_cffi_d_ComputeInitialConditions }, - { "ComputeIonizedBox", (void *)_cffi_f_ComputeIonizedBox, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 321), (void *)_cffi_d_ComputeIonizedBox }, - { "ComputeLF", (void *)_cffi_f_ComputeLF, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 351), (void *)_cffi_d_ComputeLF }, - { "ComputePerturbField", (void *)_cffi_f_ComputePerturbField, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 302), (void *)_cffi_d_ComputePerturbField }, - { "ComputePerturbHaloField", (void *)_cffi_f_ComputePerturbHaloField, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 281), (void *)_cffi_d_ComputePerturbHaloField }, - { "ComputeTau", (void *)_cffi_f_ComputeTau, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 170), (void *)_cffi_d_ComputeTau }, - { "ComputeTsBox", (void *)_cffi_f_ComputeTsBox, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 336), (void *)_cffi_d_ComputeTsBox }, - { "ComputeZstart_PhotonCons", (void *)_cffi_f_ComputeZstart_PhotonCons, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_O, 236), (void *)_cffi_d_ComputeZstart_PhotonCons }, - { "CreateFFTWWisdoms", (void *)_cffi_f_CreateFFTWWisdoms, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 177), (void *)_cffi_d_CreateFFTWWisdoms }, - { "EvaluateFcoll_delta", (void *)_cffi_f_EvaluateFcoll_delta, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 19), (void *)_cffi_d_EvaluateFcoll_delta }, - { "EvaluateMcoll", (void *)_cffi_f_EvaluateMcoll, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 25), (void *)_cffi_d_EvaluateMcoll }, - { "EvaluateNhalo", (void *)_cffi_f_EvaluateNhalo, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 25), (void *)_cffi_d_EvaluateNhalo }, - { "EvaluateNhaloInv", (void *)_cffi_f_EvaluateNhaloInv, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 10), (void *)_cffi_d_EvaluateNhaloInv }, - { "EvaluateNionTs", (void *)_cffi_f_EvaluateNionTs, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 14), (void *)_cffi_d_EvaluateNionTs }, - { "EvaluateNionTs_MINI", (void *)_cffi_f_EvaluateNionTs_MINI, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 19), (void *)_cffi_d_EvaluateNionTs_MINI }, - { "EvaluateNion_Conditional", (void *)_cffi_f_EvaluateNion_Conditional, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 55), (void *)_cffi_d_EvaluateNion_Conditional }, - { "EvaluateNion_Conditional_MINI", (void *)_cffi_f_EvaluateNion_Conditional_MINI, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 79), (void *)_cffi_d_EvaluateNion_Conditional_MINI }, - { "EvaluateSFRD", (void *)_cffi_f_EvaluateSFRD, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 10), (void *)_cffi_d_EvaluateSFRD }, - { "EvaluateSFRD_Conditional", (void *)_cffi_f_EvaluateSFRD_Conditional, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 34), (void *)_cffi_d_EvaluateSFRD_Conditional }, - { "EvaluateSFRD_Conditional_MINI", (void *)_cffi_f_EvaluateSFRD_Conditional_MINI, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 44), (void *)_cffi_d_EvaluateSFRD_Conditional_MINI }, - { "EvaluateSFRD_MINI", (void *)_cffi_f_EvaluateSFRD_MINI, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 14), (void *)_cffi_d_EvaluateSFRD_MINI }, - { "EvaluateSigma", (void *)_cffi_f_EvaluateSigma, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_O, 0), (void *)_cffi_d_EvaluateSigma }, - { "EvaluatedFcolldz", (void *)_cffi_f_EvaluatedFcolldz, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 19), (void *)_cffi_d_EvaluatedFcolldz }, - { "EvaluatedSigmasqdm", (void *)_cffi_f_EvaluatedSigmasqdm, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_O, 0), (void *)_cffi_d_EvaluatedSigmasqdm }, - { "Fcoll_General", (void *)_cffi_f_Fcoll_General, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 14), (void *)_cffi_d_Fcoll_General }, - { "FreePhotonConsMemory", (void *)_cffi_f_FreePhotonConsMemory, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_N, 500), (void *)_cffi_d_FreePhotonConsMemory }, - { "FunctionThatCatches", (void *)_cffi_f_FunctionThatCatches, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 231), (void *)_cffi_d_FunctionThatCatches }, - { "FunctionThatThrows", (void *)_cffi_f_FunctionThatThrows, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_N, 500), (void *)_cffi_d_FunctionThatThrows }, - { "InitialisePhotonCons", (void *)_cffi_f_InitialisePhotonCons, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 181), (void *)_cffi_d_InitialisePhotonCons }, - { "Mcoll_Conditional", (void *)_cffi_f_Mcoll_Conditional, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 138), (void *)_cffi_d_Mcoll_Conditional }, - { "Nhalo_Conditional", (void *)_cffi_f_Nhalo_Conditional, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 138), (void *)_cffi_d_Nhalo_Conditional }, - { "Nion_ConditionalM", (void *)_cffi_f_Nion_ConditionalM, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 122), (void *)_cffi_d_Nion_ConditionalM }, - { "Nion_ConditionalM_MINI", (void *)_cffi_f_Nion_ConditionalM_MINI, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 105), (void *)_cffi_d_Nion_ConditionalM_MINI }, - { "Nion_General", (void *)_cffi_f_Nion_General, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 67), (void *)_cffi_d_Nion_General }, - { "Nion_General_MINI", (void *)_cffi_f_Nion_General_MINI, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 92), (void *)_cffi_d_Nion_General_MINI }, - { "ObtainPhotonConsData", (void *)_cffi_f_ObtainPhotonConsData, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 239), (void *)_cffi_d_ObtainPhotonConsData }, - { "PhotonCons_Calibration", (void *)_cffi_f_PhotonCons_Calibration, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 250), (void *)_cffi_d_PhotonCons_Calibration }, - { "SomethingThatCatches", (void *)_cffi_f_SomethingThatCatches, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_O, 228), (void *)_cffi_d_SomethingThatCatches }, - { "UpdateXraySourceBox", (void *)_cffi_f_UpdateXraySourceBox, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 187), (void *)_cffi_d_UpdateXraySourceBox }, - { "adjust_redshifts_for_photoncons", (void *)_cffi_f_adjust_redshifts_for_photoncons, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 373), (void *)_cffi_d_adjust_redshifts_for_photoncons }, - { "astro_params_global", (void *)_cffi_var_astro_params_global, _CFFI_OP(_CFFI_OP_GLOBAL_VAR_F, 7), (void *)0 }, - { "atomic_cooling_threshold", (void *)_cffi_f_atomic_cooling_threshold, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_O, 160), (void *)_cffi_d_atomic_cooling_threshold }, - { "conditional_mf", (void *)_cffi_f_conditional_mf, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 147), (void *)_cffi_d_conditional_mf }, - { "cosmo_params_global", (void *)_cffi_var_cosmo_params_global, _CFFI_OP(_CFFI_OP_GLOBAL_VAR_F, 6), (void *)0 }, - { "determine_deltaz_for_photoncons", (void *)_cffi_f_determine_deltaz_for_photoncons, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_N, 500), (void *)_cffi_d_determine_deltaz_for_photoncons }, - { "dicke", (void *)_cffi_f_dicke, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_O, 0), (void *)_cffi_d_dicke }, - { "dsigmasqdm_z0", (void *)_cffi_f_dsigmasqdm_z0, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_O, 0), (void *)_cffi_d_dsigmasqdm_z0 }, - { "expected_nhalo", (void *)_cffi_f_expected_nhalo, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 3), (void *)_cffi_d_expected_nhalo }, - { "flag_options_global", (void *)_cffi_var_flag_options_global, _CFFI_OP(_CFFI_OP_GLOBAL_VAR_F, 8), (void *)0 }, - { "free", (void *)_cffi_f_free, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_O, 497), (void *)_cffi_d_free }, - { "get_delta_crit", (void *)_cffi_f_get_delta_crit, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 163), (void *)_cffi_d_get_delta_crit }, - { "global_params", (void *)_cffi_var_global_params, _CFFI_OP(_CFFI_OP_GLOBAL_VAR_F, 506), (void *)0 }, - { "init_FcollTable", (void *)_cffi_f_init_FcollTable, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 394), (void *)_cffi_d_init_FcollTable }, - { "init_heat", (void *)_cffi_f_init_heat, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_N, 371), (void *)_cffi_d_init_heat }, - { "init_ps", (void *)_cffi_f_init_ps, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_N, 168), (void *)_cffi_d_init_ps }, - { "initialiseSigmaMInterpTable", (void *)_cffi_f_initialiseSigmaMInterpTable, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 440), (void *)_cffi_d_initialiseSigmaMInterpTable }, - { "initialise_FgtrM_delta_table", (void *)_cffi_f_initialise_FgtrM_delta_table, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 407), (void *)_cffi_d_initialise_FgtrM_delta_table }, - { "initialise_GL", (void *)_cffi_f_initialise_GL, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 440), (void *)_cffi_d_initialise_GL }, - { "initialise_Nion_Conditional_spline", (void *)_cffi_f_initialise_Nion_Conditional_spline, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 444), (void *)_cffi_d_initialise_Nion_Conditional_spline }, - { "initialise_Nion_Ts_spline", (void *)_cffi_f_initialise_Nion_Ts_spline, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 483), (void *)_cffi_d_initialise_Nion_Ts_spline }, - { "initialise_SFRD_Conditional_table", (void *)_cffi_f_initialise_SFRD_Conditional_table, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 424), (void *)_cffi_d_initialise_SFRD_Conditional_table }, - { "initialise_SFRD_spline", (void *)_cffi_f_initialise_SFRD_spline, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 472), (void *)_cffi_d_initialise_SFRD_spline }, - { "initialise_dNdM_inverse_table", (void *)_cffi_f_initialise_dNdM_inverse_table, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 399), (void *)_cffi_d_initialise_dNdM_inverse_table }, - { "initialise_dNdM_tables", (void *)_cffi_f_initialise_dNdM_tables, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 415), (void *)_cffi_d_initialise_dNdM_tables }, - { "photon_cons_allocated", (void *)_cffi_var_photon_cons_allocated, _CFFI_OP(_CFFI_OP_GLOBAL_VAR_F, 65), (void *)0 }, - { "set_alphacons_params", (void *)_cffi_f_set_alphacons_params, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 390), (void *)_cffi_d_set_alphacons_params }, - { "sigma_z0", (void *)_cffi_f_sigma_z0, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_O, 0), (void *)_cffi_d_sigma_z0 }, - { "single_test_sample", (void *)_cffi_f_single_test_sample, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 209), (void *)_cffi_d_single_test_sample }, - { "test_filter", (void *)_cffi_f_test_filter, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 198), (void *)_cffi_d_test_filter }, - { "test_halo_props", (void *)_cffi_f_test_halo_props, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 268), (void *)_cffi_d_test_halo_props }, - { "unconditional_mf", (void *)_cffi_f_unconditional_mf, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 154), (void *)_cffi_d_unconditional_mf }, - { "user_params_global", (void *)_cffi_var_user_params_global, _CFFI_OP(_CFFI_OP_GLOBAL_VAR_F, 5), (void *)0 }, -}; - -static const struct _cffi_field_s _cffi_fields[] = { - { "HII_EFF_FACTOR", offsetof(AstroParams, HII_EFF_FACTOR), - sizeof(((AstroParams *)0)->HII_EFF_FACTOR), - _CFFI_OP(_CFFI_OP_NOOP, 161) }, - { "F_STAR10", offsetof(AstroParams, F_STAR10), - sizeof(((AstroParams *)0)->F_STAR10), - _CFFI_OP(_CFFI_OP_NOOP, 161) }, - { "ALPHA_STAR", offsetof(AstroParams, ALPHA_STAR), - sizeof(((AstroParams *)0)->ALPHA_STAR), - _CFFI_OP(_CFFI_OP_NOOP, 161) }, - { "ALPHA_STAR_MINI", offsetof(AstroParams, ALPHA_STAR_MINI), - sizeof(((AstroParams *)0)->ALPHA_STAR_MINI), - _CFFI_OP(_CFFI_OP_NOOP, 161) }, - { "SIGMA_STAR", offsetof(AstroParams, SIGMA_STAR), - sizeof(((AstroParams *)0)->SIGMA_STAR), - _CFFI_OP(_CFFI_OP_NOOP, 161) }, - { "CORR_STAR", offsetof(AstroParams, CORR_STAR), - sizeof(((AstroParams *)0)->CORR_STAR), - _CFFI_OP(_CFFI_OP_NOOP, 161) }, - { "UPPER_STELLAR_TURNOVER_MASS", offsetof(AstroParams, UPPER_STELLAR_TURNOVER_MASS), - sizeof(((AstroParams *)0)->UPPER_STELLAR_TURNOVER_MASS), - _CFFI_OP(_CFFI_OP_NOOP, 1) }, - { "UPPER_STELLAR_TURNOVER_INDEX", offsetof(AstroParams, UPPER_STELLAR_TURNOVER_INDEX), - sizeof(((AstroParams *)0)->UPPER_STELLAR_TURNOVER_INDEX), - _CFFI_OP(_CFFI_OP_NOOP, 1) }, - { "F_STAR7_MINI", offsetof(AstroParams, F_STAR7_MINI), - sizeof(((AstroParams *)0)->F_STAR7_MINI), - _CFFI_OP(_CFFI_OP_NOOP, 161) }, - { "t_STAR", offsetof(AstroParams, t_STAR), - sizeof(((AstroParams *)0)->t_STAR), - _CFFI_OP(_CFFI_OP_NOOP, 161) }, - { "CORR_SFR", offsetof(AstroParams, CORR_SFR), - sizeof(((AstroParams *)0)->CORR_SFR), - _CFFI_OP(_CFFI_OP_NOOP, 161) }, - { "SIGMA_SFR_INDEX", offsetof(AstroParams, SIGMA_SFR_INDEX), - sizeof(((AstroParams *)0)->SIGMA_SFR_INDEX), - _CFFI_OP(_CFFI_OP_NOOP, 1) }, - { "SIGMA_SFR_LIM", offsetof(AstroParams, SIGMA_SFR_LIM), - sizeof(((AstroParams *)0)->SIGMA_SFR_LIM), - _CFFI_OP(_CFFI_OP_NOOP, 1) }, - { "L_X", offsetof(AstroParams, L_X), - sizeof(((AstroParams *)0)->L_X), - _CFFI_OP(_CFFI_OP_NOOP, 1) }, - { "L_X_MINI", offsetof(AstroParams, L_X_MINI), - sizeof(((AstroParams *)0)->L_X_MINI), - _CFFI_OP(_CFFI_OP_NOOP, 1) }, - { "SIGMA_LX", offsetof(AstroParams, SIGMA_LX), - sizeof(((AstroParams *)0)->SIGMA_LX), - _CFFI_OP(_CFFI_OP_NOOP, 1) }, - { "CORR_LX", offsetof(AstroParams, CORR_LX), - sizeof(((AstroParams *)0)->CORR_LX), - _CFFI_OP(_CFFI_OP_NOOP, 1) }, - { "F_ESC10", offsetof(AstroParams, F_ESC10), - sizeof(((AstroParams *)0)->F_ESC10), - _CFFI_OP(_CFFI_OP_NOOP, 161) }, - { "ALPHA_ESC", offsetof(AstroParams, ALPHA_ESC), - sizeof(((AstroParams *)0)->ALPHA_ESC), - _CFFI_OP(_CFFI_OP_NOOP, 161) }, - { "F_ESC7_MINI", offsetof(AstroParams, F_ESC7_MINI), - sizeof(((AstroParams *)0)->F_ESC7_MINI), - _CFFI_OP(_CFFI_OP_NOOP, 161) }, - { "M_TURN", offsetof(AstroParams, M_TURN), - sizeof(((AstroParams *)0)->M_TURN), - _CFFI_OP(_CFFI_OP_NOOP, 161) }, - { "R_BUBBLE_MAX", offsetof(AstroParams, R_BUBBLE_MAX), - sizeof(((AstroParams *)0)->R_BUBBLE_MAX), - _CFFI_OP(_CFFI_OP_NOOP, 161) }, - { "ION_Tvir_MIN", offsetof(AstroParams, ION_Tvir_MIN), - sizeof(((AstroParams *)0)->ION_Tvir_MIN), - _CFFI_OP(_CFFI_OP_NOOP, 161) }, - { "F_H2_SHIELD", offsetof(AstroParams, F_H2_SHIELD), - sizeof(((AstroParams *)0)->F_H2_SHIELD), - _CFFI_OP(_CFFI_OP_NOOP, 1) }, - { "NU_X_THRESH", offsetof(AstroParams, NU_X_THRESH), - sizeof(((AstroParams *)0)->NU_X_THRESH), - _CFFI_OP(_CFFI_OP_NOOP, 161) }, - { "X_RAY_SPEC_INDEX", offsetof(AstroParams, X_RAY_SPEC_INDEX), - sizeof(((AstroParams *)0)->X_RAY_SPEC_INDEX), - _CFFI_OP(_CFFI_OP_NOOP, 161) }, - { "X_RAY_Tvir_MIN", offsetof(AstroParams, X_RAY_Tvir_MIN), - sizeof(((AstroParams *)0)->X_RAY_Tvir_MIN), - _CFFI_OP(_CFFI_OP_NOOP, 161) }, - { "A_LW", offsetof(AstroParams, A_LW), - sizeof(((AstroParams *)0)->A_LW), - _CFFI_OP(_CFFI_OP_NOOP, 1) }, - { "BETA_LW", offsetof(AstroParams, BETA_LW), - sizeof(((AstroParams *)0)->BETA_LW), - _CFFI_OP(_CFFI_OP_NOOP, 1) }, - { "A_VCB", offsetof(AstroParams, A_VCB), - sizeof(((AstroParams *)0)->A_VCB), - _CFFI_OP(_CFFI_OP_NOOP, 1) }, - { "BETA_VCB", offsetof(AstroParams, BETA_VCB), - sizeof(((AstroParams *)0)->BETA_VCB), - _CFFI_OP(_CFFI_OP_NOOP, 1) }, - { "N_RSD_STEPS", offsetof(AstroParams, N_RSD_STEPS), - sizeof(((AstroParams *)0)->N_RSD_STEPS), - _CFFI_OP(_CFFI_OP_NOOP, 120) }, - { "brightness_temp", offsetof(BrightnessTemp, brightness_temp), - sizeof(((BrightnessTemp *)0)->brightness_temp), - _CFFI_OP(_CFFI_OP_NOOP, 174) }, - { "SIGMA_8", offsetof(CosmoParams, SIGMA_8), - sizeof(((CosmoParams *)0)->SIGMA_8), - _CFFI_OP(_CFFI_OP_NOOP, 161) }, - { "hlittle", offsetof(CosmoParams, hlittle), - sizeof(((CosmoParams *)0)->hlittle), - _CFFI_OP(_CFFI_OP_NOOP, 161) }, - { "OMm", offsetof(CosmoParams, OMm), - sizeof(((CosmoParams *)0)->OMm), - _CFFI_OP(_CFFI_OP_NOOP, 161) }, - { "OMl", offsetof(CosmoParams, OMl), - sizeof(((CosmoParams *)0)->OMl), - _CFFI_OP(_CFFI_OP_NOOP, 161) }, - { "OMb", offsetof(CosmoParams, OMb), - sizeof(((CosmoParams *)0)->OMb), - _CFFI_OP(_CFFI_OP_NOOP, 161) }, - { "POWER_INDEX", offsetof(CosmoParams, POWER_INDEX), - sizeof(((CosmoParams *)0)->POWER_INDEX), - _CFFI_OP(_CFFI_OP_NOOP, 161) }, - { "USE_HALO_FIELD", offsetof(FlagOptions, USE_HALO_FIELD), - sizeof(((FlagOptions *)0)->USE_HALO_FIELD), - _CFFI_OP(_CFFI_OP_NOOP, 65) }, - { "USE_MINI_HALOS", offsetof(FlagOptions, USE_MINI_HALOS), - sizeof(((FlagOptions *)0)->USE_MINI_HALOS), - _CFFI_OP(_CFFI_OP_NOOP, 65) }, - { "USE_CMB_HEATING", offsetof(FlagOptions, USE_CMB_HEATING), - sizeof(((FlagOptions *)0)->USE_CMB_HEATING), - _CFFI_OP(_CFFI_OP_NOOP, 65) }, - { "USE_LYA_HEATING", offsetof(FlagOptions, USE_LYA_HEATING), - sizeof(((FlagOptions *)0)->USE_LYA_HEATING), - _CFFI_OP(_CFFI_OP_NOOP, 65) }, - { "USE_MASS_DEPENDENT_ZETA", offsetof(FlagOptions, USE_MASS_DEPENDENT_ZETA), - sizeof(((FlagOptions *)0)->USE_MASS_DEPENDENT_ZETA), - _CFFI_OP(_CFFI_OP_NOOP, 65) }, - { "SUBCELL_RSD", offsetof(FlagOptions, SUBCELL_RSD), - sizeof(((FlagOptions *)0)->SUBCELL_RSD), - _CFFI_OP(_CFFI_OP_NOOP, 65) }, - { "APPLY_RSDS", offsetof(FlagOptions, APPLY_RSDS), - sizeof(((FlagOptions *)0)->APPLY_RSDS), - _CFFI_OP(_CFFI_OP_NOOP, 65) }, - { "INHOMO_RECO", offsetof(FlagOptions, INHOMO_RECO), - sizeof(((FlagOptions *)0)->INHOMO_RECO), - _CFFI_OP(_CFFI_OP_NOOP, 65) }, - { "USE_TS_FLUCT", offsetof(FlagOptions, USE_TS_FLUCT), - sizeof(((FlagOptions *)0)->USE_TS_FLUCT), - _CFFI_OP(_CFFI_OP_NOOP, 65) }, - { "M_MIN_in_Mass", offsetof(FlagOptions, M_MIN_in_Mass), - sizeof(((FlagOptions *)0)->M_MIN_in_Mass), - _CFFI_OP(_CFFI_OP_NOOP, 65) }, - { "FIX_VCB_AVG", offsetof(FlagOptions, FIX_VCB_AVG), - sizeof(((FlagOptions *)0)->FIX_VCB_AVG), - _CFFI_OP(_CFFI_OP_NOOP, 65) }, - { "HALO_STOCHASTICITY", offsetof(FlagOptions, HALO_STOCHASTICITY), - sizeof(((FlagOptions *)0)->HALO_STOCHASTICITY), - _CFFI_OP(_CFFI_OP_NOOP, 65) }, - { "USE_EXP_FILTER", offsetof(FlagOptions, USE_EXP_FILTER), - sizeof(((FlagOptions *)0)->USE_EXP_FILTER), - _CFFI_OP(_CFFI_OP_NOOP, 65) }, - { "FIXED_HALO_GRIDS", offsetof(FlagOptions, FIXED_HALO_GRIDS), - sizeof(((FlagOptions *)0)->FIXED_HALO_GRIDS), - _CFFI_OP(_CFFI_OP_NOOP, 65) }, - { "CELL_RECOMB", offsetof(FlagOptions, CELL_RECOMB), - sizeof(((FlagOptions *)0)->CELL_RECOMB), - _CFFI_OP(_CFFI_OP_NOOP, 65) }, - { "PHOTON_CONS_TYPE", offsetof(FlagOptions, PHOTON_CONS_TYPE), - sizeof(((FlagOptions *)0)->PHOTON_CONS_TYPE), - _CFFI_OP(_CFFI_OP_NOOP, 120) }, - { "USE_UPPER_STELLAR_TURNOVER", offsetof(FlagOptions, USE_UPPER_STELLAR_TURNOVER), - sizeof(((FlagOptions *)0)->USE_UPPER_STELLAR_TURNOVER), - _CFFI_OP(_CFFI_OP_NOOP, 65) }, - { "ALPHA_UVB", offsetof(GlobalParams, ALPHA_UVB), - sizeof(((GlobalParams *)0)->ALPHA_UVB), - _CFFI_OP(_CFFI_OP_NOOP, 161) }, - { "EVOLVE_DENSITY_LINEARLY", offsetof(GlobalParams, EVOLVE_DENSITY_LINEARLY), - sizeof(((GlobalParams *)0)->EVOLVE_DENSITY_LINEARLY), - _CFFI_OP(_CFFI_OP_NOOP, 120) }, - { "SMOOTH_EVOLVED_DENSITY_FIELD", offsetof(GlobalParams, SMOOTH_EVOLVED_DENSITY_FIELD), - sizeof(((GlobalParams *)0)->SMOOTH_EVOLVED_DENSITY_FIELD), - _CFFI_OP(_CFFI_OP_NOOP, 120) }, - { "R_smooth_density", offsetof(GlobalParams, R_smooth_density), - sizeof(((GlobalParams *)0)->R_smooth_density), - _CFFI_OP(_CFFI_OP_NOOP, 161) }, - { "HII_ROUND_ERR", offsetof(GlobalParams, HII_ROUND_ERR), - sizeof(((GlobalParams *)0)->HII_ROUND_ERR), - _CFFI_OP(_CFFI_OP_NOOP, 161) }, - { "FIND_BUBBLE_ALGORITHM", offsetof(GlobalParams, FIND_BUBBLE_ALGORITHM), - sizeof(((GlobalParams *)0)->FIND_BUBBLE_ALGORITHM), - _CFFI_OP(_CFFI_OP_NOOP, 120) }, - { "N_POISSON", offsetof(GlobalParams, N_POISSON), - sizeof(((GlobalParams *)0)->N_POISSON), - _CFFI_OP(_CFFI_OP_NOOP, 120) }, - { "T_USE_VELOCITIES", offsetof(GlobalParams, T_USE_VELOCITIES), - sizeof(((GlobalParams *)0)->T_USE_VELOCITIES), - _CFFI_OP(_CFFI_OP_NOOP, 120) }, - { "MAX_DVDR", offsetof(GlobalParams, MAX_DVDR), - sizeof(((GlobalParams *)0)->MAX_DVDR), - _CFFI_OP(_CFFI_OP_NOOP, 161) }, - { "DELTA_R_HII_FACTOR", offsetof(GlobalParams, DELTA_R_HII_FACTOR), - sizeof(((GlobalParams *)0)->DELTA_R_HII_FACTOR), - _CFFI_OP(_CFFI_OP_NOOP, 161) }, - { "DELTA_R_FACTOR", offsetof(GlobalParams, DELTA_R_FACTOR), - sizeof(((GlobalParams *)0)->DELTA_R_FACTOR), - _CFFI_OP(_CFFI_OP_NOOP, 161) }, - { "HII_FILTER", offsetof(GlobalParams, HII_FILTER), - sizeof(((GlobalParams *)0)->HII_FILTER), - _CFFI_OP(_CFFI_OP_NOOP, 120) }, - { "INITIAL_REDSHIFT", offsetof(GlobalParams, INITIAL_REDSHIFT), - sizeof(((GlobalParams *)0)->INITIAL_REDSHIFT), - _CFFI_OP(_CFFI_OP_NOOP, 161) }, - { "R_OVERLAP_FACTOR", offsetof(GlobalParams, R_OVERLAP_FACTOR), - sizeof(((GlobalParams *)0)->R_OVERLAP_FACTOR), - _CFFI_OP(_CFFI_OP_NOOP, 161) }, - { "DELTA_CRIT_MODE", offsetof(GlobalParams, DELTA_CRIT_MODE), - sizeof(((GlobalParams *)0)->DELTA_CRIT_MODE), - _CFFI_OP(_CFFI_OP_NOOP, 120) }, - { "HALO_FILTER", offsetof(GlobalParams, HALO_FILTER), - sizeof(((GlobalParams *)0)->HALO_FILTER), - _CFFI_OP(_CFFI_OP_NOOP, 120) }, - { "OPTIMIZE", offsetof(GlobalParams, OPTIMIZE), - sizeof(((GlobalParams *)0)->OPTIMIZE), - _CFFI_OP(_CFFI_OP_NOOP, 120) }, - { "OPTIMIZE_MIN_MASS", offsetof(GlobalParams, OPTIMIZE_MIN_MASS), - sizeof(((GlobalParams *)0)->OPTIMIZE_MIN_MASS), - _CFFI_OP(_CFFI_OP_NOOP, 161) }, - { "CRIT_DENS_TRANSITION", offsetof(GlobalParams, CRIT_DENS_TRANSITION), - sizeof(((GlobalParams *)0)->CRIT_DENS_TRANSITION), - _CFFI_OP(_CFFI_OP_NOOP, 161) }, - { "MIN_DENSITY_LOW_LIMIT", offsetof(GlobalParams, MIN_DENSITY_LOW_LIMIT), - sizeof(((GlobalParams *)0)->MIN_DENSITY_LOW_LIMIT), - _CFFI_OP(_CFFI_OP_NOOP, 161) }, - { "RecombPhotonCons", offsetof(GlobalParams, RecombPhotonCons), - sizeof(((GlobalParams *)0)->RecombPhotonCons), - _CFFI_OP(_CFFI_OP_NOOP, 120) }, - { "PhotonConsStart", offsetof(GlobalParams, PhotonConsStart), - sizeof(((GlobalParams *)0)->PhotonConsStart), - _CFFI_OP(_CFFI_OP_NOOP, 161) }, - { "PhotonConsEnd", offsetof(GlobalParams, PhotonConsEnd), - sizeof(((GlobalParams *)0)->PhotonConsEnd), - _CFFI_OP(_CFFI_OP_NOOP, 161) }, - { "PhotonConsAsymptoteTo", offsetof(GlobalParams, PhotonConsAsymptoteTo), - sizeof(((GlobalParams *)0)->PhotonConsAsymptoteTo), - _CFFI_OP(_CFFI_OP_NOOP, 161) }, - { "PhotonConsEndCalibz", offsetof(GlobalParams, PhotonConsEndCalibz), - sizeof(((GlobalParams *)0)->PhotonConsEndCalibz), - _CFFI_OP(_CFFI_OP_NOOP, 161) }, - { "PhotonConsSmoothing", offsetof(GlobalParams, PhotonConsSmoothing), - sizeof(((GlobalParams *)0)->PhotonConsSmoothing), - _CFFI_OP(_CFFI_OP_NOOP, 120) }, - { "HEAT_FILTER", offsetof(GlobalParams, HEAT_FILTER), - sizeof(((GlobalParams *)0)->HEAT_FILTER), - _CFFI_OP(_CFFI_OP_NOOP, 120) }, - { "CLUMPING_FACTOR", offsetof(GlobalParams, CLUMPING_FACTOR), - sizeof(((GlobalParams *)0)->CLUMPING_FACTOR), - _CFFI_OP(_CFFI_OP_NOOP, 1) }, - { "Z_HEAT_MAX", offsetof(GlobalParams, Z_HEAT_MAX), - sizeof(((GlobalParams *)0)->Z_HEAT_MAX), - _CFFI_OP(_CFFI_OP_NOOP, 161) }, - { "R_XLy_MAX", offsetof(GlobalParams, R_XLy_MAX), - sizeof(((GlobalParams *)0)->R_XLy_MAX), - _CFFI_OP(_CFFI_OP_NOOP, 161) }, - { "NUM_FILTER_STEPS_FOR_Ts", offsetof(GlobalParams, NUM_FILTER_STEPS_FOR_Ts), - sizeof(((GlobalParams *)0)->NUM_FILTER_STEPS_FOR_Ts), - _CFFI_OP(_CFFI_OP_NOOP, 120) }, - { "ZPRIME_STEP_FACTOR", offsetof(GlobalParams, ZPRIME_STEP_FACTOR), - sizeof(((GlobalParams *)0)->ZPRIME_STEP_FACTOR), - _CFFI_OP(_CFFI_OP_NOOP, 161) }, - { "TK_at_Z_HEAT_MAX", offsetof(GlobalParams, TK_at_Z_HEAT_MAX), - sizeof(((GlobalParams *)0)->TK_at_Z_HEAT_MAX), - _CFFI_OP(_CFFI_OP_NOOP, 1) }, - { "XION_at_Z_HEAT_MAX", offsetof(GlobalParams, XION_at_Z_HEAT_MAX), - sizeof(((GlobalParams *)0)->XION_at_Z_HEAT_MAX), - _CFFI_OP(_CFFI_OP_NOOP, 1) }, - { "Pop", offsetof(GlobalParams, Pop), - sizeof(((GlobalParams *)0)->Pop), - _CFFI_OP(_CFFI_OP_NOOP, 120) }, - { "Pop2_ion", offsetof(GlobalParams, Pop2_ion), - sizeof(((GlobalParams *)0)->Pop2_ion), - _CFFI_OP(_CFFI_OP_NOOP, 161) }, - { "Pop3_ion", offsetof(GlobalParams, Pop3_ion), - sizeof(((GlobalParams *)0)->Pop3_ion), - _CFFI_OP(_CFFI_OP_NOOP, 161) }, - { "NU_X_BAND_MAX", offsetof(GlobalParams, NU_X_BAND_MAX), - sizeof(((GlobalParams *)0)->NU_X_BAND_MAX), - _CFFI_OP(_CFFI_OP_NOOP, 161) }, - { "NU_X_MAX", offsetof(GlobalParams, NU_X_MAX), - sizeof(((GlobalParams *)0)->NU_X_MAX), - _CFFI_OP(_CFFI_OP_NOOP, 161) }, - { "NBINS_LF", offsetof(GlobalParams, NBINS_LF), - sizeof(((GlobalParams *)0)->NBINS_LF), - _CFFI_OP(_CFFI_OP_NOOP, 120) }, - { "P_CUTOFF", offsetof(GlobalParams, P_CUTOFF), - sizeof(((GlobalParams *)0)->P_CUTOFF), - _CFFI_OP(_CFFI_OP_NOOP, 120) }, - { "M_WDM", offsetof(GlobalParams, M_WDM), - sizeof(((GlobalParams *)0)->M_WDM), - _CFFI_OP(_CFFI_OP_NOOP, 161) }, - { "g_x", offsetof(GlobalParams, g_x), - sizeof(((GlobalParams *)0)->g_x), - _CFFI_OP(_CFFI_OP_NOOP, 161) }, - { "OMn", offsetof(GlobalParams, OMn), - sizeof(((GlobalParams *)0)->OMn), - _CFFI_OP(_CFFI_OP_NOOP, 161) }, - { "OMk", offsetof(GlobalParams, OMk), - sizeof(((GlobalParams *)0)->OMk), - _CFFI_OP(_CFFI_OP_NOOP, 161) }, - { "OMr", offsetof(GlobalParams, OMr), - sizeof(((GlobalParams *)0)->OMr), - _CFFI_OP(_CFFI_OP_NOOP, 161) }, - { "OMtot", offsetof(GlobalParams, OMtot), - sizeof(((GlobalParams *)0)->OMtot), - _CFFI_OP(_CFFI_OP_NOOP, 161) }, - { "Y_He", offsetof(GlobalParams, Y_He), - sizeof(((GlobalParams *)0)->Y_He), - _CFFI_OP(_CFFI_OP_NOOP, 161) }, - { "wl", offsetof(GlobalParams, wl), - sizeof(((GlobalParams *)0)->wl), - _CFFI_OP(_CFFI_OP_NOOP, 161) }, - { "SHETH_b", offsetof(GlobalParams, SHETH_b), - sizeof(((GlobalParams *)0)->SHETH_b), - _CFFI_OP(_CFFI_OP_NOOP, 161) }, - { "SHETH_c", offsetof(GlobalParams, SHETH_c), - sizeof(((GlobalParams *)0)->SHETH_c), - _CFFI_OP(_CFFI_OP_NOOP, 161) }, - { "Zreion_HeII", offsetof(GlobalParams, Zreion_HeII), - sizeof(((GlobalParams *)0)->Zreion_HeII), - _CFFI_OP(_CFFI_OP_NOOP, 1) }, - { "FILTER", offsetof(GlobalParams, FILTER), - sizeof(((GlobalParams *)0)->FILTER), - _CFFI_OP(_CFFI_OP_NOOP, 120) }, - { "external_table_path", offsetof(GlobalParams, external_table_path), - sizeof(((GlobalParams *)0)->external_table_path), - _CFFI_OP(_CFFI_OP_NOOP, 516) }, - { "wisdoms_path", offsetof(GlobalParams, wisdoms_path), - sizeof(((GlobalParams *)0)->wisdoms_path), - _CFFI_OP(_CFFI_OP_NOOP, 516) }, - { "R_BUBBLE_MIN", offsetof(GlobalParams, R_BUBBLE_MIN), - sizeof(((GlobalParams *)0)->R_BUBBLE_MIN), - _CFFI_OP(_CFFI_OP_NOOP, 161) }, - { "M_MIN_INTEGRAL", offsetof(GlobalParams, M_MIN_INTEGRAL), - sizeof(((GlobalParams *)0)->M_MIN_INTEGRAL), - _CFFI_OP(_CFFI_OP_NOOP, 161) }, - { "M_MAX_INTEGRAL", offsetof(GlobalParams, M_MAX_INTEGRAL), - sizeof(((GlobalParams *)0)->M_MAX_INTEGRAL), - _CFFI_OP(_CFFI_OP_NOOP, 161) }, - { "T_RE", offsetof(GlobalParams, T_RE), - sizeof(((GlobalParams *)0)->T_RE), - _CFFI_OP(_CFFI_OP_NOOP, 161) }, - { "VAVG", offsetof(GlobalParams, VAVG), - sizeof(((GlobalParams *)0)->VAVG), - _CFFI_OP(_CFFI_OP_NOOP, 161) }, - { "USE_ADIABATIC_FLUCTUATIONS", offsetof(GlobalParams, USE_ADIABATIC_FLUCTUATIONS), - sizeof(((GlobalParams *)0)->USE_ADIABATIC_FLUCTUATIONS), - _CFFI_OP(_CFFI_OP_NOOP, 65) }, - { "halo_mass", offsetof(HaloBox, halo_mass), - sizeof(((HaloBox *)0)->halo_mass), - _CFFI_OP(_CFFI_OP_NOOP, 174) }, - { "halo_stars", offsetof(HaloBox, halo_stars), - sizeof(((HaloBox *)0)->halo_stars), - _CFFI_OP(_CFFI_OP_NOOP, 174) }, - { "halo_stars_mini", offsetof(HaloBox, halo_stars_mini), - sizeof(((HaloBox *)0)->halo_stars_mini), - _CFFI_OP(_CFFI_OP_NOOP, 174) }, - { "count", offsetof(HaloBox, count), - sizeof(((HaloBox *)0)->count), - _CFFI_OP(_CFFI_OP_NOOP, 217) }, - { "n_ion", offsetof(HaloBox, n_ion), - sizeof(((HaloBox *)0)->n_ion), - _CFFI_OP(_CFFI_OP_NOOP, 174) }, - { "halo_sfr", offsetof(HaloBox, halo_sfr), - sizeof(((HaloBox *)0)->halo_sfr), - _CFFI_OP(_CFFI_OP_NOOP, 174) }, - { "halo_xray", offsetof(HaloBox, halo_xray), - sizeof(((HaloBox *)0)->halo_xray), - _CFFI_OP(_CFFI_OP_NOOP, 174) }, - { "halo_sfr_mini", offsetof(HaloBox, halo_sfr_mini), - sizeof(((HaloBox *)0)->halo_sfr_mini), - _CFFI_OP(_CFFI_OP_NOOP, 174) }, - { "whalo_sfr", offsetof(HaloBox, whalo_sfr), - sizeof(((HaloBox *)0)->whalo_sfr), - _CFFI_OP(_CFFI_OP_NOOP, 174) }, - { "log10_Mcrit_ACG_ave", offsetof(HaloBox, log10_Mcrit_ACG_ave), - sizeof(((HaloBox *)0)->log10_Mcrit_ACG_ave), - _CFFI_OP(_CFFI_OP_NOOP, 1) }, - { "log10_Mcrit_MCG_ave", offsetof(HaloBox, log10_Mcrit_MCG_ave), - sizeof(((HaloBox *)0)->log10_Mcrit_MCG_ave), - _CFFI_OP(_CFFI_OP_NOOP, 1) }, - { "n_halos", offsetof(HaloField, n_halos), - sizeof(((HaloField *)0)->n_halos), - _CFFI_OP(_CFFI_OP_NOOP, 317) }, - { "buffer_size", offsetof(HaloField, buffer_size), - sizeof(((HaloField *)0)->buffer_size), - _CFFI_OP(_CFFI_OP_NOOP, 317) }, - { "halo_masses", offsetof(HaloField, halo_masses), - sizeof(((HaloField *)0)->halo_masses), - _CFFI_OP(_CFFI_OP_NOOP, 174) }, - { "halo_coords", offsetof(HaloField, halo_coords), - sizeof(((HaloField *)0)->halo_coords), - _CFFI_OP(_CFFI_OP_NOOP, 217) }, - { "star_rng", offsetof(HaloField, star_rng), - sizeof(((HaloField *)0)->star_rng), - _CFFI_OP(_CFFI_OP_NOOP, 174) }, - { "sfr_rng", offsetof(HaloField, sfr_rng), - sizeof(((HaloField *)0)->sfr_rng), - _CFFI_OP(_CFFI_OP_NOOP, 174) }, - { "xray_rng", offsetof(HaloField, xray_rng), - sizeof(((HaloField *)0)->xray_rng), - _CFFI_OP(_CFFI_OP_NOOP, 174) }, - { "lowres_density", offsetof(InitialConditions, lowres_density), - sizeof(((InitialConditions *)0)->lowres_density), - _CFFI_OP(_CFFI_OP_NOOP, 174) }, - { "lowres_vx", offsetof(InitialConditions, lowres_vx), - sizeof(((InitialConditions *)0)->lowres_vx), - _CFFI_OP(_CFFI_OP_NOOP, 174) }, - { "lowres_vy", offsetof(InitialConditions, lowres_vy), - sizeof(((InitialConditions *)0)->lowres_vy), - _CFFI_OP(_CFFI_OP_NOOP, 174) }, - { "lowres_vz", offsetof(InitialConditions, lowres_vz), - sizeof(((InitialConditions *)0)->lowres_vz), - _CFFI_OP(_CFFI_OP_NOOP, 174) }, - { "lowres_vx_2LPT", offsetof(InitialConditions, lowres_vx_2LPT), - sizeof(((InitialConditions *)0)->lowres_vx_2LPT), - _CFFI_OP(_CFFI_OP_NOOP, 174) }, - { "lowres_vy_2LPT", offsetof(InitialConditions, lowres_vy_2LPT), - sizeof(((InitialConditions *)0)->lowres_vy_2LPT), - _CFFI_OP(_CFFI_OP_NOOP, 174) }, - { "lowres_vz_2LPT", offsetof(InitialConditions, lowres_vz_2LPT), - sizeof(((InitialConditions *)0)->lowres_vz_2LPT), - _CFFI_OP(_CFFI_OP_NOOP, 174) }, - { "hires_density", offsetof(InitialConditions, hires_density), - sizeof(((InitialConditions *)0)->hires_density), - _CFFI_OP(_CFFI_OP_NOOP, 174) }, - { "hires_vx", offsetof(InitialConditions, hires_vx), - sizeof(((InitialConditions *)0)->hires_vx), - _CFFI_OP(_CFFI_OP_NOOP, 174) }, - { "hires_vy", offsetof(InitialConditions, hires_vy), - sizeof(((InitialConditions *)0)->hires_vy), - _CFFI_OP(_CFFI_OP_NOOP, 174) }, - { "hires_vz", offsetof(InitialConditions, hires_vz), - sizeof(((InitialConditions *)0)->hires_vz), - _CFFI_OP(_CFFI_OP_NOOP, 174) }, - { "hires_vx_2LPT", offsetof(InitialConditions, hires_vx_2LPT), - sizeof(((InitialConditions *)0)->hires_vx_2LPT), - _CFFI_OP(_CFFI_OP_NOOP, 174) }, - { "hires_vy_2LPT", offsetof(InitialConditions, hires_vy_2LPT), - sizeof(((InitialConditions *)0)->hires_vy_2LPT), - _CFFI_OP(_CFFI_OP_NOOP, 174) }, - { "hires_vz_2LPT", offsetof(InitialConditions, hires_vz_2LPT), - sizeof(((InitialConditions *)0)->hires_vz_2LPT), - _CFFI_OP(_CFFI_OP_NOOP, 174) }, - { "lowres_vcb", offsetof(InitialConditions, lowres_vcb), - sizeof(((InitialConditions *)0)->lowres_vcb), - _CFFI_OP(_CFFI_OP_NOOP, 174) }, - { "mean_f_coll", offsetof(IonizedBox, mean_f_coll), - sizeof(((IonizedBox *)0)->mean_f_coll), - _CFFI_OP(_CFFI_OP_NOOP, 1) }, - { "mean_f_coll_MINI", offsetof(IonizedBox, mean_f_coll_MINI), - sizeof(((IonizedBox *)0)->mean_f_coll_MINI), - _CFFI_OP(_CFFI_OP_NOOP, 1) }, - { "log10_Mturnover_ave", offsetof(IonizedBox, log10_Mturnover_ave), - sizeof(((IonizedBox *)0)->log10_Mturnover_ave), - _CFFI_OP(_CFFI_OP_NOOP, 1) }, - { "log10_Mturnover_MINI_ave", offsetof(IonizedBox, log10_Mturnover_MINI_ave), - sizeof(((IonizedBox *)0)->log10_Mturnover_MINI_ave), - _CFFI_OP(_CFFI_OP_NOOP, 1) }, - { "xH_box", offsetof(IonizedBox, xH_box), - sizeof(((IonizedBox *)0)->xH_box), - _CFFI_OP(_CFFI_OP_NOOP, 174) }, - { "Gamma12_box", offsetof(IonizedBox, Gamma12_box), - sizeof(((IonizedBox *)0)->Gamma12_box), - _CFFI_OP(_CFFI_OP_NOOP, 174) }, - { "MFP_box", offsetof(IonizedBox, MFP_box), - sizeof(((IonizedBox *)0)->MFP_box), - _CFFI_OP(_CFFI_OP_NOOP, 174) }, - { "z_re_box", offsetof(IonizedBox, z_re_box), - sizeof(((IonizedBox *)0)->z_re_box), - _CFFI_OP(_CFFI_OP_NOOP, 174) }, - { "dNrec_box", offsetof(IonizedBox, dNrec_box), - sizeof(((IonizedBox *)0)->dNrec_box), - _CFFI_OP(_CFFI_OP_NOOP, 174) }, - { "temp_kinetic_all_gas", offsetof(IonizedBox, temp_kinetic_all_gas), - sizeof(((IonizedBox *)0)->temp_kinetic_all_gas), - _CFFI_OP(_CFFI_OP_NOOP, 174) }, - { "Fcoll", offsetof(IonizedBox, Fcoll), - sizeof(((IonizedBox *)0)->Fcoll), - _CFFI_OP(_CFFI_OP_NOOP, 174) }, - { "Fcoll_MINI", offsetof(IonizedBox, Fcoll_MINI), - sizeof(((IonizedBox *)0)->Fcoll_MINI), - _CFFI_OP(_CFFI_OP_NOOP, 174) }, - { "n_halos", offsetof(PerturbHaloField, n_halos), - sizeof(((PerturbHaloField *)0)->n_halos), - _CFFI_OP(_CFFI_OP_NOOP, 317) }, - { "buffer_size", offsetof(PerturbHaloField, buffer_size), - sizeof(((PerturbHaloField *)0)->buffer_size), - _CFFI_OP(_CFFI_OP_NOOP, 317) }, - { "halo_masses", offsetof(PerturbHaloField, halo_masses), - sizeof(((PerturbHaloField *)0)->halo_masses), - _CFFI_OP(_CFFI_OP_NOOP, 174) }, - { "halo_coords", offsetof(PerturbHaloField, halo_coords), - sizeof(((PerturbHaloField *)0)->halo_coords), - _CFFI_OP(_CFFI_OP_NOOP, 217) }, - { "star_rng", offsetof(PerturbHaloField, star_rng), - sizeof(((PerturbHaloField *)0)->star_rng), - _CFFI_OP(_CFFI_OP_NOOP, 174) }, - { "sfr_rng", offsetof(PerturbHaloField, sfr_rng), - sizeof(((PerturbHaloField *)0)->sfr_rng), - _CFFI_OP(_CFFI_OP_NOOP, 174) }, - { "xray_rng", offsetof(PerturbHaloField, xray_rng), - sizeof(((PerturbHaloField *)0)->xray_rng), - _CFFI_OP(_CFFI_OP_NOOP, 174) }, - { "density", offsetof(PerturbedField, density), - sizeof(((PerturbedField *)0)->density), - _CFFI_OP(_CFFI_OP_NOOP, 174) }, - { "velocity_x", offsetof(PerturbedField, velocity_x), - sizeof(((PerturbedField *)0)->velocity_x), - _CFFI_OP(_CFFI_OP_NOOP, 174) }, - { "velocity_y", offsetof(PerturbedField, velocity_y), - sizeof(((PerturbedField *)0)->velocity_y), - _CFFI_OP(_CFFI_OP_NOOP, 174) }, - { "velocity_z", offsetof(PerturbedField, velocity_z), - sizeof(((PerturbedField *)0)->velocity_z), - _CFFI_OP(_CFFI_OP_NOOP, 174) }, - { "Ts_box", offsetof(TsBox, Ts_box), - sizeof(((TsBox *)0)->Ts_box), - _CFFI_OP(_CFFI_OP_NOOP, 174) }, - { "x_e_box", offsetof(TsBox, x_e_box), - sizeof(((TsBox *)0)->x_e_box), - _CFFI_OP(_CFFI_OP_NOOP, 174) }, - { "Tk_box", offsetof(TsBox, Tk_box), - sizeof(((TsBox *)0)->Tk_box), - _CFFI_OP(_CFFI_OP_NOOP, 174) }, - { "J_21_LW_box", offsetof(TsBox, J_21_LW_box), - sizeof(((TsBox *)0)->J_21_LW_box), - _CFFI_OP(_CFFI_OP_NOOP, 174) }, - { "HII_DIM", offsetof(UserParams, HII_DIM), - sizeof(((UserParams *)0)->HII_DIM), - _CFFI_OP(_CFFI_OP_NOOP, 120) }, - { "DIM", offsetof(UserParams, DIM), - sizeof(((UserParams *)0)->DIM), - _CFFI_OP(_CFFI_OP_NOOP, 120) }, - { "BOX_LEN", offsetof(UserParams, BOX_LEN), - sizeof(((UserParams *)0)->BOX_LEN), - _CFFI_OP(_CFFI_OP_NOOP, 161) }, - { "NON_CUBIC_FACTOR", offsetof(UserParams, NON_CUBIC_FACTOR), - sizeof(((UserParams *)0)->NON_CUBIC_FACTOR), - _CFFI_OP(_CFFI_OP_NOOP, 161) }, - { "USE_FFTW_WISDOM", offsetof(UserParams, USE_FFTW_WISDOM), - sizeof(((UserParams *)0)->USE_FFTW_WISDOM), - _CFFI_OP(_CFFI_OP_NOOP, 65) }, - { "HMF", offsetof(UserParams, HMF), - sizeof(((UserParams *)0)->HMF), - _CFFI_OP(_CFFI_OP_NOOP, 120) }, - { "USE_RELATIVE_VELOCITIES", offsetof(UserParams, USE_RELATIVE_VELOCITIES), - sizeof(((UserParams *)0)->USE_RELATIVE_VELOCITIES), - _CFFI_OP(_CFFI_OP_NOOP, 120) }, - { "POWER_SPECTRUM", offsetof(UserParams, POWER_SPECTRUM), - sizeof(((UserParams *)0)->POWER_SPECTRUM), - _CFFI_OP(_CFFI_OP_NOOP, 120) }, - { "N_THREADS", offsetof(UserParams, N_THREADS), - sizeof(((UserParams *)0)->N_THREADS), - _CFFI_OP(_CFFI_OP_NOOP, 120) }, - { "PERTURB_ON_HIGH_RES", offsetof(UserParams, PERTURB_ON_HIGH_RES), - sizeof(((UserParams *)0)->PERTURB_ON_HIGH_RES), - _CFFI_OP(_CFFI_OP_NOOP, 65) }, - { "NO_RNG", offsetof(UserParams, NO_RNG), - sizeof(((UserParams *)0)->NO_RNG), - _CFFI_OP(_CFFI_OP_NOOP, 65) }, - { "USE_INTERPOLATION_TABLES", offsetof(UserParams, USE_INTERPOLATION_TABLES), - sizeof(((UserParams *)0)->USE_INTERPOLATION_TABLES), - _CFFI_OP(_CFFI_OP_NOOP, 65) }, - { "INTEGRATION_METHOD_ATOMIC", offsetof(UserParams, INTEGRATION_METHOD_ATOMIC), - sizeof(((UserParams *)0)->INTEGRATION_METHOD_ATOMIC), - _CFFI_OP(_CFFI_OP_NOOP, 120) }, - { "INTEGRATION_METHOD_MINI", offsetof(UserParams, INTEGRATION_METHOD_MINI), - sizeof(((UserParams *)0)->INTEGRATION_METHOD_MINI), - _CFFI_OP(_CFFI_OP_NOOP, 120) }, - { "USE_2LPT", offsetof(UserParams, USE_2LPT), - sizeof(((UserParams *)0)->USE_2LPT), - _CFFI_OP(_CFFI_OP_NOOP, 65) }, - { "MINIMIZE_MEMORY", offsetof(UserParams, MINIMIZE_MEMORY), - sizeof(((UserParams *)0)->MINIMIZE_MEMORY), - _CFFI_OP(_CFFI_OP_NOOP, 65) }, - { "KEEP_3D_VELOCITIES", offsetof(UserParams, KEEP_3D_VELOCITIES), - sizeof(((UserParams *)0)->KEEP_3D_VELOCITIES), - _CFFI_OP(_CFFI_OP_NOOP, 65) }, - { "SAMPLER_MIN_MASS", offsetof(UserParams, SAMPLER_MIN_MASS), - sizeof(((UserParams *)0)->SAMPLER_MIN_MASS), - _CFFI_OP(_CFFI_OP_NOOP, 161) }, - { "SAMPLER_BUFFER_FACTOR", offsetof(UserParams, SAMPLER_BUFFER_FACTOR), - sizeof(((UserParams *)0)->SAMPLER_BUFFER_FACTOR), - _CFFI_OP(_CFFI_OP_NOOP, 1) }, - { "MAXHALO_FACTOR", offsetof(UserParams, MAXHALO_FACTOR), - sizeof(((UserParams *)0)->MAXHALO_FACTOR), - _CFFI_OP(_CFFI_OP_NOOP, 161) }, - { "N_COND_INTERP", offsetof(UserParams, N_COND_INTERP), - sizeof(((UserParams *)0)->N_COND_INTERP), - _CFFI_OP(_CFFI_OP_NOOP, 120) }, - { "N_PROB_INTERP", offsetof(UserParams, N_PROB_INTERP), - sizeof(((UserParams *)0)->N_PROB_INTERP), - _CFFI_OP(_CFFI_OP_NOOP, 120) }, - { "MIN_LOGPROB", offsetof(UserParams, MIN_LOGPROB), - sizeof(((UserParams *)0)->MIN_LOGPROB), - _CFFI_OP(_CFFI_OP_NOOP, 1) }, - { "SAMPLE_METHOD", offsetof(UserParams, SAMPLE_METHOD), - sizeof(((UserParams *)0)->SAMPLE_METHOD), - _CFFI_OP(_CFFI_OP_NOOP, 120) }, - { "AVG_BELOW_SAMPLER", offsetof(UserParams, AVG_BELOW_SAMPLER), - sizeof(((UserParams *)0)->AVG_BELOW_SAMPLER), - _CFFI_OP(_CFFI_OP_NOOP, 65) }, - { "HALOMASS_CORRECTION", offsetof(UserParams, HALOMASS_CORRECTION), - sizeof(((UserParams *)0)->HALOMASS_CORRECTION), - _CFFI_OP(_CFFI_OP_NOOP, 1) }, - { "PARKINSON_G0", offsetof(UserParams, PARKINSON_G0), - sizeof(((UserParams *)0)->PARKINSON_G0), - _CFFI_OP(_CFFI_OP_NOOP, 1) }, - { "PARKINSON_y1", offsetof(UserParams, PARKINSON_y1), - sizeof(((UserParams *)0)->PARKINSON_y1), - _CFFI_OP(_CFFI_OP_NOOP, 1) }, - { "PARKINSON_y2", offsetof(UserParams, PARKINSON_y2), - sizeof(((UserParams *)0)->PARKINSON_y2), - _CFFI_OP(_CFFI_OP_NOOP, 1) }, - { "filtered_sfr", offsetof(XraySourceBox, filtered_sfr), - sizeof(((XraySourceBox *)0)->filtered_sfr), - _CFFI_OP(_CFFI_OP_NOOP, 174) }, - { "filtered_xray", offsetof(XraySourceBox, filtered_xray), - sizeof(((XraySourceBox *)0)->filtered_xray), - _CFFI_OP(_CFFI_OP_NOOP, 174) }, - { "filtered_sfr_mini", offsetof(XraySourceBox, filtered_sfr_mini), - sizeof(((XraySourceBox *)0)->filtered_sfr_mini), - _CFFI_OP(_CFFI_OP_NOOP, 174) }, - { "mean_log10_Mcrit_LW", offsetof(XraySourceBox, mean_log10_Mcrit_LW), - sizeof(((XraySourceBox *)0)->mean_log10_Mcrit_LW), - _CFFI_OP(_CFFI_OP_NOOP, 207) }, - { "mean_sfr", offsetof(XraySourceBox, mean_sfr), - sizeof(((XraySourceBox *)0)->mean_sfr), - _CFFI_OP(_CFFI_OP_NOOP, 207) }, - { "mean_sfr_mini", offsetof(XraySourceBox, mean_sfr_mini), - sizeof(((XraySourceBox *)0)->mean_sfr_mini), - _CFFI_OP(_CFFI_OP_NOOP, 207) }, -}; - -static const struct _cffi_struct_union_s _cffi_struct_unions[] = { - { "AstroParams", 502, _CFFI_F_CHECK_FIELDS, - sizeof(AstroParams), offsetof(struct _cffi_align__AstroParams, y), 0, 32 }, - { "BrightnessTemp", 503, _CFFI_F_CHECK_FIELDS, - sizeof(BrightnessTemp), offsetof(struct _cffi_align__BrightnessTemp, y), 32, 1 }, - { "CosmoParams", 504, _CFFI_F_CHECK_FIELDS, - sizeof(CosmoParams), offsetof(struct _cffi_align__CosmoParams, y), 33, 6 }, - { "FlagOptions", 505, _CFFI_F_CHECK_FIELDS, - sizeof(FlagOptions), offsetof(struct _cffi_align__FlagOptions, y), 39, 17 }, - { "GlobalParams", 506, _CFFI_F_CHECK_FIELDS, - sizeof(GlobalParams), offsetof(struct _cffi_align__GlobalParams, y), 56, 61 }, - { "HaloBox", 507, _CFFI_F_CHECK_FIELDS, - sizeof(HaloBox), offsetof(struct _cffi_align__HaloBox, y), 117, 11 }, - { "HaloField", 508, _CFFI_F_CHECK_FIELDS, - sizeof(HaloField), offsetof(struct _cffi_align__HaloField, y), 128, 7 }, - { "InitialConditions", 509, _CFFI_F_CHECK_FIELDS, - sizeof(InitialConditions), offsetof(struct _cffi_align__InitialConditions, y), 135, 15 }, - { "IonizedBox", 510, _CFFI_F_CHECK_FIELDS, - sizeof(IonizedBox), offsetof(struct _cffi_align__IonizedBox, y), 150, 12 }, - { "PerturbHaloField", 511, _CFFI_F_CHECK_FIELDS, - sizeof(PerturbHaloField), offsetof(struct _cffi_align__PerturbHaloField, y), 162, 7 }, - { "PerturbedField", 512, _CFFI_F_CHECK_FIELDS, - sizeof(PerturbedField), offsetof(struct _cffi_align__PerturbedField, y), 169, 4 }, - { "TsBox", 513, _CFFI_F_CHECK_FIELDS, - sizeof(TsBox), offsetof(struct _cffi_align__TsBox, y), 173, 4 }, - { "UserParams", 514, _CFFI_F_CHECK_FIELDS, - sizeof(UserParams), offsetof(struct _cffi_align__UserParams, y), 177, 29 }, - { "XraySourceBox", 515, _CFFI_F_CHECK_FIELDS, - sizeof(XraySourceBox), offsetof(struct _cffi_align__XraySourceBox, y), 206, 6 }, -}; - -static const struct _cffi_typename_s _cffi_typenames[] = { - { "AstroParams", 502 }, - { "BrightnessTemp", 503 }, - { "CosmoParams", 504 }, - { "FlagOptions", 505 }, - { "GlobalParams", 506 }, - { "HaloBox", 507 }, - { "HaloField", 508 }, - { "InitialConditions", 509 }, - { "IonizedBox", 510 }, - { "PerturbHaloField", 511 }, - { "PerturbedField", 512 }, - { "TsBox", 513 }, - { "UserParams", 514 }, - { "XraySourceBox", 515 }, -}; - -static const struct _cffi_type_context_s _cffi_type_context = { - _cffi_types, - _cffi_globals, - _cffi_fields, - _cffi_struct_unions, - NULL, /* no enums */ - _cffi_typenames, - 77, /* num_globals */ - 14, /* num_struct_unions */ - 0, /* num_enums */ - 14, /* num_typenames */ - NULL, /* no includes */ - 519, /* num_types */ - 0, /* flags */ -}; - -#ifdef __GNUC__ -# pragma GCC visibility push(default) /* for -fvisibility= */ -#endif - -#ifdef PYPY_VERSION -PyMODINIT_FUNC -_cffi_pypyinit_c_21cmfast(const void *p[]) -{ - p[0] = (const void *)0x2601; - p[1] = &_cffi_type_context; -#if PY_MAJOR_VERSION >= 3 - return NULL; -#endif -} -# ifdef _MSC_VER - PyMODINIT_FUNC -# if PY_MAJOR_VERSION >= 3 - PyInit_c_21cmfast(void) { return NULL; } -# else - initc_21cmfast(void) { } -# endif -# endif -#elif PY_MAJOR_VERSION >= 3 -PyMODINIT_FUNC -PyInit_c_21cmfast(void) -{ - return _cffi_init("py21cmfast.c_21cmfast", 0x2601, &_cffi_type_context); -} -#else -PyMODINIT_FUNC -initc_21cmfast(void) -{ - _cffi_init("py21cmfast.c_21cmfast", 0x2601, &_cffi_type_context); -} -#endif - -#ifdef __GNUC__ -# pragma GCC visibility pop -#endif diff --git a/py21cmfast/c_21cmfast.o b/py21cmfast/c_21cmfast.o deleted file mode 100644 index fa989aa195457ec2dc4fe4f2c8253ee9d4898ef2..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 430416 zcmeFa2Ut``_c*+Ev8~u3Q6nlwVg+Ntm?&kzML`Hfv93rFlonVF7*Ui3cf~?XRANlT zZt|+ZB$g12sHj+COEYRLNh~WGj6Ih4oipX`y?2|wZ{G6zf8lx9yLZl+nKNfjn=^Co z&hQKLuUVspg8$V}3{;rzc&S2BXs9dPc#&^jigt<@6zYOHc(+H{PDe`r>U$%$8W-1t zx9>edZMgSEZ73|o5AWgXd3_KiB21lo%~L&3?3e!OyydiqNokktgI=N z(dxMso};qUmTt8gQT~ za6phxIlUl2hI=06U(|$22`j}_)_)2Sw#tCI6G2j9~hso4TovWw;BXk?>J#J5&vK`DX-obcK(Bf-pc`pf&?^8|h;_3SLvuGn+bc$^hVdaqo)R?Rb!}HMS|vmClAy&Y-L5Eg zUjYm%BvNHo3lazgx^lx_Ey^u?CpAHx5D(-wOH?YBTL1%+t1HaOW1QHZ@Z($jIQn$+ z90{5{#+xg94iG2O_dS0SeUBvi&an!C6qC9z0+l*9LEkOQ2ip2l{4kdfHOhx4kq_x7 zl?+dm3_39&iY_P_BiM9Fbsk16wYcsKO{O+fGU_m(x!6b~;~J2RQ*JIm63N)h&}0In zbUUxqeF-pNS`3|XPhrZT0H%BonR58fryTKM%BRpOq*N_3=m1u3eTcJbbF!&bmnyYd zG}$KqM|(iFUZ?Y|wl&|BI;F0wc)stN+XDv}bSZwA&vz=$_joej_CBcuj7JHGq;%$1 zcp<5G_|#Hv8uoxERO*h)=Cu0BP35Tj%AT(Q9f~+_<@7(rYPy+W^Qo;q0N9nvo{c&7 zQ89Kd!{#$weFCtLDSHxcjBS6X)V;xQxP7QT4LB#2J=3pb=TEeEMo=1mjo(fQz#2`Ft`y_H*U8_rQ|_ymf8 zG0ETI$5FV40pV8~gwP&&_#qY4c#)tM(tdE#+f(_Bm&&IJJ`>&$E0p;SFj=ei!QAhrMYFDUeLNpFGBSuylVhnQP29=Wgq zLJFKSE;d%e%{+$2#l?gMZZVPpxo}V*wd`Uc@?v;Y`9S8xLJy1%ckL!dcVy_}3Blak zl4hHV@r+EA&|9I<_X8q-{R zi);9>8hTL;GadC5*J#RW6pCt?sp+dqoioEL0z8QYQ!+rY39y%#wz>?kUI2I-_Ug*D zq6Nk~03xgiFjO$@%Dq+}bn-jHZv`q>6TKbs~3qlJmE9O z)g8*mxCX-aaOV8b`_gs;4VOtrBCV}@p0B4U1SNIp<0ntz3O&lzGb>PsL`sc%Xu|7< zdVA8m{wXXkH{tz)_a&zLYj|G_@rDKe(+D~&#Se3R)RloiOJWKkuU#4FT(MOgjw;nLL zNLc^T_1*K(K`DNi>ysz)f1b|O4X(sL{y+4E`eGaq`eGDEI_onl%7GWC^huEIL2(j9 z)MgYph+zw|YUWTIDRm7Pk|3e7FmjBmL`kY@Qwjt>Xy&6@h*hwRpiBLrnU9ia%p6t& zg^Uazl*UZW+lc#!WHrQo&`i_8g{v7J`a#~Z8J;9){<9Viw#>CyRKtuGH!&@TS&j9g z8fN2_gfUB5jS^7}GoDIfi+EN8-JI2NNMeh=tVX%0hS@kIu|;iGqf%7EY#gtOM}C5M zhcee=q8etRVQE-<7%67u7K9V~kR_n6-=%AvNp<8pwUX z02l%a&V__{?=T2j8CGSF4whV_SvguEUSprs9SK|w5V@l<;Es+cw|=8CTryOnE835) zXgq$z;72@uB;rRZerWL{6F;Vf6zE)YfQfk3v8$e4=dvrRHhQ%(*LUF?eEHlJi{Q%! zSHyuYt6i}Yd|5_XfiLg6=FpNr*J*^`5IUvOF$J3Yy{cByIPuB@A)Z8^W7Xe%{Lr$$ zb4EW{PWwgEJw_H!E+O%x2_Av>g{FHA-sh0Tqj>+|8S}lXfdl9;nv#YsM0nq4n9{ye z5!0Dtid{=aD_F1unilBr71KrmqEaDK2%HBniAG3Ml;diH)yyyQ9j1ooeNDlKvYlDJ zp2i0j@_RJ<>-Egpy(}W{CI*>Su9E$fP=?02V?u)kZf6E`VXR|7wQ{;7ZP$dM@z~td zMuk}Ec{J!QZ4I(}8P2e}t?f$6VX5TmN|Geo*c8J`%F&eqO^abb!c=gE3jvbFaeY&L zvH~LszzhJ9{4Q@om`;jAy2Cu+nK(5DoKK(h^&*;MzQ1z_}~QLRZ*5!u7DUJ+enaB|ymYvIhCijgTeFt*Eb# zemHt8OJJcfRhZ@5aelslU_nue9}#T#N!ZPF51|WKcU-_y!KeF}KLNJb+?bR>qIX*v znoPA|GfJt;0T^&0kfyp#Ta{2Cq`Kid7k9)1n>CSEA*BKffN=n3sc!ozLwAtrab&-e z&VsZHr7jNEHZTkBM7CjT%Vg3l+Vg*ge_t@hI8zB=%l|E`RK_m(UIpe=k@<8&xU~R7 zQK=nfojuRCt4n!gi9gz9kfj0cFec^zF$y{z=qFzT-g%T)ou&JMa(>yn|6kjm{cqDF z*Z(jk_iQ(sslL>Th>`)L#j=ZjTF!s!k6{U^D8&!cdY$@XvfJiGQeXU&+5<1N2cDD| zw$J8d)x}CI0!HX4yXGPeo_H?<$^=`!1G|BtnYq}LO5I9;fz*znR>ib|1_kT@M6rOu zm}xoy0Z8Z?zZGXc2@=%<&Sy)fAguYx?$nmZ{G?Q`kXY0m73Ng!u=8c zsmL4^%I8xeuds(x&JLK601$i&X%QkkbTeQ~)er#kQo%t{u!Uh;KH|Xl{hNZd+#{z) zelPbw5#WB%*{}m923W&rO#y9xLWE%D!M>9Md!9}O5(dr>t+ze-m(wS|r~1(lB&HNU zO!`63E6lux!i4!OvcmbRzR*MxHi?NOJzOaxSn7B%S?tPB7J%SNceGs?nir`ocnP|r zUB$FjEd|PB-ZKm>M2aR?$Sn*l(hN-!LC|iU3_>CxhqIY;is-0|IJ7^)Q1x=ujOPim8|jKfac(5G{{Rq05JAN- zDnvtb5kJn53ZACGoG=6-`<`fB-SHnVrjbvrs~dbzWY&{X2(s?NEt8<4Y?)EzLj5oy zjM$iZ5VxD+8M5Dk2{o(hj$#xS@%zUxW)lB|CRhFW3~)lQ1w^D&fkOnl!b1g4Yn75;z*j9l*RM|Ab%hi|BM97K#6fDyxYqdl6M;Cjkg^ z$ppFMBGIa2aw2gf!6tqw-LyY>5=gVE&y6JdY^v{sv2nTS-UaVVEc+`P=XWZRU!ZWv zd!q0E<}Xyuu@h8tK{$YQX=`L}(VXyN9h60U2m~fFQx7gLl7of(kTN||?_#-`zLKdR_TV1Ei!JMK zJ?QW+K0nS+W73F&&k8y@iJ}c5vS1~9(T)M#Bm#wPNED5p&VqQ}3{9eF=ZGZ8S9vf~ z(!~-A3I=gr($>iN@l61O3W+oqA~`>fxsbhZK2X4*oaakxA|dtR6ZRj_A((`VN*v9x z5@M`bc#tOXfpPgd?TRzQpK9-mJGRnABO5PUT?Aw;JXhoGY|dO8E- ziBBS-U4RBn8?TMX#;YzEtOi&hd_qllUP;bRNj{Z(1^IJSFczf!Kk~eomFEvJhw6?q zZ0GSqx(czoT~4DA(_sSB?jFRG$E*p#G1&%p3%bFLUWf}u3p2rJfr4?82u2H;VBkH= zeP<-X3_=f{bv$pw5hwsYA=5}&Sq`n3 zVF^~nLWbT=oYe$kT%a+nEA#=2six3UZDM}}#{?-`Guy)-?f|Zj&dU82mDl>vQYn7O^O?EV;!$2BiM(3>q>>tml1ir*s3lL? ze#7QU=KtlZqzw$sT%bWx`IEXbF_ueeJZnQH&~i!jW@zRDEm=%G&(Nd-O{QOxwYUjP zLtiD8Xz)-sl)4)X775j~!ihYd3II`P3V8R4=)rd?T3mEV0muv$Jv{OVaJUsUC5Rr8 zR1_<05j@&OB4H%EZt?UM>4|NiO0wtmskG!)h#!%YufyJ;awdIF4v?rklC}i}1cHd{ zk+u6li|egtJn!{id!F4s)5ufpAo%=TBgt2aV)-1HME1PH|JeSp6Z0qLsw43yH>|3T z$e&0R2ErlHV;n=1x&k^?Vu#X~;l&dH=V3XORi)0IK~v44 zk}UzQ&za_UlB{AS%(L|kVmGDk2Zo@BE6>?Uk6h;gg!CdPIi%*?P`PPlh2I?o`Ff1S z`-l;Mr4$$e^fQUX&%`jRLq}@EyI9ySLw~~d%P{9D@jYIQIP@#K(XR;my_9GyZ;d&Y za?A4npW6Sm9sx-Ndp{EZbNL6sUdVvv4nvZp`&T0%2yMg?+ne=jrT`ETi18XRJPR5C zsc@b-v>^8qiQPv8()83C_fZgVfw-PzQ1-Z*{K-&*nXF=>St6_ML^|EEj7TgIQzFhN zw{SWEKH+gSB0NM^ZSL*1g>{ul&c>sh3h&#ozrRbK>$D)R&l7pI93Rz&Axhuh^Y{8$ z`CnN-Lx3tifBbj8>R-%P5q|z(c%Kk;>K0s>5VTs1zbxzXzahU*cO5Ndz$C6_4zt%T z#DMWg1Pmh{HPSGkn-nNt>G(4=cPW}Y)Z)8K=3Y&*(z(y9yUf4gLnOG`Fl9lz(d=9$ z!&d{an^|_Q8)oNjWME>hn=Ctr_Yj?6>Qk9F3t9mjxHUB+#5xoYKA|;S&)|o(l}v3{ zGnp`@b8PJ=y+hYAvi+pL_K(wj4;5D$PY%NJLF4y2TuzX$0i^QO5f)w!2%s14aCJ?;h?ekML8&~P=-a93^EPi-q0 zZ2canN?@kVnn;O`Uf-K@ zJ5=xY!{vqo29XKDT_#N>fh<9R)&~pJ`w_MHI9c_;`u>*e#7+m8{cgPnI3_uXTQq8c|qmg z5rOmkwo}pe3So~d(WeNTlh7x7JLL~~af&hzpOy#amGeS)bCEKSoMW69QKBx$sFbRQ zVK>nb+}R8)$gX5s!d2uD%9b@1De<^VCL+-AK&l^^*=&OLQ6kg@5hd_|{2lnGQzx3! zdlXeVr*OJ42YR+NuP06O>aBqXRAlEM@f;&?x#~Mv`BluzTmpTXo%&aTODZs?iHt!TDy(3(4C`sTG6SJ_&PujLtQUi7|IX zGY%QY*u;UcW0Q-v40g^Fz{_@FtaQFLFa)xM4 z3IWz7jwZtU^U4&H$DHj{jn}LSk#75LG7&wBef_uf}m6vGv zatJvfau#fIrH$q?w=kLG@8bd$KxrqXlplhcg4JkA~T4v4}E|) zAzxyXmAY=M&}5g(jd&K1%IQBh|7W2~7|K6A|5Z>QCtf;p`M84z3L(t@RLT4&%I4ny z*p}v>JpW_OLfD^w!7d`dO{$HOprV5Hta9)flLNtzpvB8BC#530W675&byHyap3R?> z17FadDE0j;$pbkLA+jlk>~iB}i9A@e|0JgI18Wcj)&OimB?dO0V(YI>7uj+!3Csyl zo!2JAe1k0~I?pwgtf#Hb)OH)HmDPd!E3m8<=ZP2~Yx1d$GB+Ff&CpBj_v75lTq6;_ zB@*GYDMk8tWIIGS+}~Be*-UMmnGk?6$lk7E5uIH*X!Ue*Kzf16*A^Ntz@!$!h_(yz za1=^RqaraIJU_(>P4;2C5#Oar^#Bd21$i$wJ^&PuOA;>#%PEWT!gdlblu#d{hMj7^ za?Vv4`YWQ zqFfS0OnC*kQY>)g05&2-vgqEnu@bd`Ih*3DcDDUMu~=ATv4>JmpMMnM93XSOl*}2F zOlhn$xR_1HHP5LlmXW}WuM?vc$MkPyiA#HxIo>|rIbhvjGw@tJzRV`AynP6qy2lTTuYKsF^taTJQZ zHR>gDCJSG(TAGC}UJ7Cdd4Me!JnWwbVU;`xBhPc01TXVr(^P<2Stb=YF*9Z4DD_Fi z4vM1$LB&yhwF#!)kMZ8iP^L{Wu~Lt(78g^X)S)GmPytcDX<=+e1^PCa15q$4aZE}b zraH}nktuzF3X|nr$&4g7uS#7ZD-mqO_NvxQ?kw0#Th2_WS3z4$|4gxBvAQ4#Y#&)6 z(G>x;8RD4HvCvH4?8l46Ov#k`!yxS@-ZFo%jTM^wL9sC(b<0-#v)*_UP^^!?#25)< zwB;|JR-ck>=@Up)hFqT}{nPq{`hDnGCTTpmC!&ug9ZCiVhn+?a{a0U z6p+`ySd7O?NIbTl#$$NK72+}Y*v{P+9FcMX>zo0T3o!r>tn}iDMVktWwqEAA-nOM^ zdK)%6%hhx~fa;qpWPouIKL*JnPEMhO9FZR@;hw~Y>RTb34V&?Np$+PkJf0VlTJ(8a zqR$fFUL>IjY>_PPROTO{TFAp3rM|J57Cw~Azs-O}e;CnCDKzs4SO?=bOQTKkl*s99 z)~87eGvrhhSAjLOzW$ZOC1&w&XK0P-ArS^@X5{dV&D2ugRMRJdwIO zc!3Qmy#?>T;q5@86@K!;q&G6XG;U@3vfocH=;LxIGxK+ISfR<^>5ajrb}g@e3XKO@ z`MH$Solk*DAVWP{G(18#YOM9Ok$0ePKyoP5!W)#FyJKzQhy05MRParA%8f z%oMGZr0OvNv=yRlNtpSIEp4@>4_U3u!c340qMxETSE>uR-j@00{FPKcxh`aWWSSYU zKJOC~$=6>J9We8UiKs-@bpYc`dFmlwMSCw6WTUg0CSsCRLWM-g&8WD9DkI5>Nfvn) zqusOA9;_FrSi$tpBsL3JiQo@y#|C)X3aAi8yONk&siXElmU<)hfajr=`qpMtILN6e zv271*hvZ*^7LXA_Zis3+7ywA+`GJ9_>rNN~U35wf0q(9mTb>l`9QOc9y)X1@k}~Fx zd$2;2{ude_KeFSGi;R(ol)!jOLy|FKF6yR3%-?ZQIbpBeYdmTl(Ae*ig zWG7cT5m<)Ef$HVClr9v-su2;b*0ysW`ytgL6NkS>ffknTX!gkObU=w9zqV|PQa=|P zu>#evJ13Y%8O4}iNFin z79|JJ*jLkz|Ad+@k*n!GGX?pa zl?b9~TTSWte{o_<>hLWr#PiAfNK83CrQXYoiXL(*Y^&)Llr|fVKh}cdC%+dWUZj;` zBNCIfo|Oo^u%)KL&a!ERNmUeIxh_#rJIhRMKTG-3s`hfdM8PygkpDIPH2zEa`HEfr z%%=KhMSqKozlikH0Va$*&S`PITS(TsdR*_)*E@<-6e8&oyj_nU#rRPq3asH_PVE$! zL%p!>ToftTSkb;3P@B)_ znFulup_Dk*&%ay?=-Ho#xWFl5pqGn9Ui&t&&MeyAfp|73RO&T=0Bk8Tk%1{G2^lFs zMv;t+97+b_GBT9<7TANrftAgB(eY;irh1F>upJr~<8$SxuNp9hZ^RH&h*_<@#ain!ej5l@rA`;gFrL*j#S zA50$E%KMX)isPvqK0W){PJr18jXc-0-!DgCh7p2tDKmaxC7c=AIXK1#gtS0|*$Qnv zpc9zzI~lN~uKy$>M_sT8g$$ylc{mC`!vzeSaO8mnWJ1&h^KjbL1zS)z)CEL|NmI6r zdgLgQr07i-6S(jK-pKP5tpJK~$o{-YXj&NTBlr~y(_qz1t-j`08)_ z4fFoD{r=m2gMIkhe*bO1%~rc4B>UgB-}S+Kk=IjLjK7OW{9Q=nZ#|$|kH6VjV*YlD zb7ZZXT}Ye?{8p0q7m(ohjHVw|>Q>X`uqYHoH!9Cz# zlsILOG;UVvhhkHb2(dpa;ZX>gEWU2DqR-g9&3Nor8Zn82NQ_XV6ysYb%!A0+fMUc2 zAUy^69yTSRcs45$C`Nod;^bw`{vd))dg=4fq6Fh23>hT!=H<{anKF!Kfw2~s4YHkz z{P$>_jv=ftroA{g$(TN4CBm5OhP*Seh9uP}1f^6W@Wm!TPq}Q}Kz_N~6qshdDkenR z1MfHy5Er$EMkI03Fo=JiYW*%>KGi@ulZ1(xyG&ivG+PPjo$oBu@bD5)Agm8yJkq?K zGT)K5i}NM$Hx_P8ZDSrcjFs@A?Ld56B4KcZh|t&uu&m1!4cPfJEoK4xIl7Xn6as?? z=`OUTjh&7#T2Urasb7kXNJK@?O88K+ZRQg7BLSUQAlb43vI1n;5LghDLw;cIuUITp zgLGKR0~x<30HU9k@}c(93QEHx`29TNJkkop`X^0SzHnGhi=M-dm zG&=ycTdihiumu}ZozhE|$=Q05m>G7wiV1W%ElvFpoNe#W%(^QKV#QbvHXn44WTKMlVj$FPFP46Jw<-To|)8t3MwgaLzcaw+zUlp@QM?$DT%3z zU?tqt*-r#@q~wTfuEZ-^-1@plJcwV8AT?)6CV`*eMfY)-m2g7rSpE+XU9y>OZuyVN z{4zOzWZHvL>Lbml@Rw6z+wwb6Do&dGkMvvx`k#|BDhjCoF>Q&SU`ZR|LP%z$oRtW? zu)WvvHlSPbKak~}hd~awA)?+HdPrq~4?>B9gca%mB&^U_K*9>`8l?bEciBo>k(5L@ET<*l%e@m*4EDW60G7iYpJi*OvUakh0h z4&HdzDm=T;Xt3|+{D`zz>>v4V{M;%W;yyg-W7Us>pUtIktNzin|Nk%TUnq4$P&xVe zD$9!~`hGFoP38%7HH=TbhQisoEfl^3aEu|3J204jo%ShjwWdgCd$enMP9q#YP(CIu$H=OHE*@olc3 z5WyzBtD!sFE?b#*gW}lmqe|U)Al5>R-x$XrSr=yZF{vp8QuO%>z5_(m2VWsgm)^u` zod$umc^*K%>l8pe6p9k#rQP)k;u?EEJ<)oF#|7*8p_Bf!T}hbo6zdgYGMu5VjdP6l z_nIDo*?@Vn5j-e$Os=*|JOh!dR1+_h`pwvsLq|e zR1xa|2?fg~v?}x4(lsICvbj_0YnaiCixr8|BEC&>c3=(3j=dzhQ0nraJCNT>2|ZK# z!-_6PAS9D@Yf{KK86nv@RWC6AWpRCCC;D)L=A<5%ZW5suvy~p%4|=i%2MjJ3^#xom ziWshPJQJ6_9w%je-Bf_mIi|2BT^9jWIf(3iA#d1RcuKUoH*^+yt+-55}@TJBl z@L}^gI1*kO>2Y}-&KEu3CcIA2Vt!Ch@`E`vKe!(1;A+oo-3MjG2q?mjLg^;GQnv`# z3I*9Ygx|7XRfjXhi(ZBCiFYw$DLy@P4EU$vr!`M1d$vWA0sjL}z=z*wBUsk`;yyHt zc^(w2$fROozlb=fVoBf=J7M$OjLpGnGQW5PO`F6dl(7=QBw*V(Q#Z*_$%{ZPiJxKD z&%qj2J)b}hcs&U@9Vs~|;)F^VnSPKNIo;*tAihm<9@3~ml*mL`;|53|2iBqza=yim zBu5sM`VwqP!X0w{Q0$1g99!j1A|=O)4;9FP9GQfiSV|7+xxgLre5!;TB_mLnEX23T zouAMz!=sR#9~Jb`*S^@DI5jEBrjjjl%#_NlHp!RoCkp_9%D1ow36%+~M4*y85vv<7 z{Row!?R&`*RUk)dw3YefeYCO@4fx(0DLJQDi9in8RAib}a?nN)=@$}m)DQ<5F2Gs{ zC6bxsCtGl6Qo{_-63p|2QhcTWt->=IYVFJN^$^eV6M9ITbO?;()8u!PAYKy5*pxmX z?r6-}%w_I1v5Dq(KgR;o5(;sv+FDIyA=sAa83NFvj|D7SegUnGaUL=IIwTC{&fo3F=$Q{XuGpX2MIk|{$GfqYX zoAlnMenGSZ_JS&w=q2y$0xlO)M^55;#1-iM$jnWV^S7wHe0_!ZHt9tKoAi?BR|>M( zsatOYeI{*O9hg~otdYyuK63g>kEQb#p+x|v!aXs?QPa0A<;4)97llw9?j9vL)6H*L@Df26nC82I=qts0yuJL84$JS%?`E-yUc$1Y>exhjO zD5kV=m=*L-7RIa8sR)3olz7Zo+vcnoT7j1^)&Vr;_k1k8--I23wIzmoh%+NQ88Ep# zT`rwmeEWz=VJhVVR@2Fi2U>&yCz56@h+!B3m=73h+Z?d|IjBiS62RXV_+$&#@-rAA zPC`oq5t1ido{-axD#K-vA~f(%Sn_y_8@WN-@MMb`KD>uh&cKg|h}O2O6Ulqil)4Xr z-RMW;=N|Eu0?fZy+|R^5ZCOA=^?jJf*GMZWVDmhS^N8a#{sas#osYDLhodr9Kx0?Zp}u*1BK-V>rT8Q$hXf~?&@v59 z%0YOZIyfJzThJbx`C$l4MRTvrH&8E(62<$pcb)}tjmc0?lpmID zHYoG6XuHTCVUC_6KaBXsNT_RTJ%j_dIZrG<%!4clyF#FfbPkwYLCQnK4TXD)Yx;1TNbrd5Zinwr!IfmLKL(9gs8L z5;@Y;x-$PR4IL0yU|$-s=Hfl*@vKBJC5Ug69P(_iBo7mBiJWp7cV<&^5LY1Q7qWO2 zlXIMv2;?BXO>&lF4O6fI+)0&@qtq>c9?cWzUNBjRE07azMvjl19K^4J7wzScN|x9C zh+~uFYusmway3{9_O3(VLLOv=C@gH&|G~UYxFUSjumZlNBVzfzv}%9LE6nsZ$(e|3y8IJh&^`hZ&Da8?w$VAjf=IzQb(mzYX*% zJ}N-+zXfFLhFAF&P79n@_9VM^a9UuEGgMLPR^WgNWLr5d05Wam;Qq2Qub3dpvnqVe zMR0WZZeKVw0BdH_3ty-2z^c*&>}%qDiMA)nzVdwr;=j<93-PZh(0Lswv$bB|mm^r{Z!VuX=dW( z=-ZIFiUir6gNJ z=p8Sn*9fa{DZM|i5+PHA_%`W91e^5gph=?fqPz%Jg5STYfWX5GzQPHWMX0zu`@!~T zaBhVS8k9TmIc{*lgDuYVn%9OD?{I+&H7ieZ=>Ik(G76>pG)mn$D3q3EoG~7RLfd%? zPw3f*Crqs7RggC1kTr{MICUSI7)yFmf!(s~H_%WULrL01hXN;h|3owB-Po4bH(6Il zEU&}MN&e%Rr;lg*n#b}Ylh>*=gSP#na! zHS797N{-0SqoA0PBlq(c%>olRuWUw6HRff-=P>LEp?(M5+7fLOp{kjYBR$Hc%%4KZ zL0o}5X{>V-cgD-fL42Fs=|Ra69W_M-kdwp1jv^j4)}!Pgu0YN&W;<=iE$y_~BxgC+ zkQ_Djv_MW$u`DPnqCpwr3gnReIEj<+k&}b?HpwBshhaM4A`4^$QHFgN!XeHb@;vB% zvJozjvlqTeeSq2X2U850z?Q=BqVLcs?y_teZ}Nx-6UeH22zY4 zK%F?z^BRAGoqOi^9#v+49@+_L?Wy8>#5FdDdN$*Gq|oNvvG`uJ@e=}7#FEXQt4Mb+FD)Sqo!4aQ7g4FOwW-kp@-v_K{Y?Ib;k0)Y_m4_iUNryLHzSE6pG>>i|1FcYg;p7n0+4^Wx*VvSVxfb_}a6*hTtiy4_jSYy{TC9)LV>|-&NxbCi9&RNDE{6@YCcdlqOc2b^oaO2iRy_p zXtt4(!B2h9#|~*ukpt)f-gyX^w@85;k)#qS8a=J6%?_kTwq&O>6?7O_Ss2C!!8GTc z1Zqs9c}D=?p;3g<1Gre5ohP(#ta~h}wTR4Efv{+h5I2=kg$L1ji1i{}Enqc)vStDS zGRk;91{V!t%CNR|%J!h4Ha+ItvkrEIFR;X+iI-4IDI$NVlnD`EQ=!zI##nhCJ&|lW zZzUfg9y+2@#(ui=8V#|Vibv>N;PHUgP4&eswEQu?vBefkm0I{oX2<0of&L2|a+;y(n}a{ArH^p7D; zS>V5~0Fcs;we8c7Sk~#kd><8wY}x_+Jm$8@e+aha^m~fwUq+g;KtH6y1?w)Be5`Gs ze#Ej)e`nG*Nvn%uaEtUq04}HBOHBWndz?&}{Ol0`QvPFY`}8B0b^0el+r)DO82Il) z!i#hL4CkM)xO*PD!Y5mh3vj9&-=mAh^a8qm3uz z=?Ut;F9rOygQo-C}%x6{w$_dhj`|JZo= zxU5}arGkI|?s?9Y^9s6beUJXW?HB z)=n)B-$cuWdYSZIUe6xfk+ths|LNM3oFZ@Ed`4Mp0Im-%P;)Z!k^55KgV)%G1()|CuYCw^~2}W z>a`y0$BO>cV0vt~pE`_w`~eX7b7bwB@#ksVtIyiO9WrbWf07OUYTcvPr*4pYx-fbf<9S<6`iCwO>B|8iJi=?+qEel?TNqpY6nYStg02YG*tPC34CUzh5cTrbV&kn07nC+p9w zT^U^7j~t)d4Sux;=l0Pq9b8|=+FOZxs`ES9Y4qaXUAi0UjNm0t$*nI52zWz#4MEciq9C13puSS7x6a7~Qq%&<>cJS1__E zGJIwvN2esmBxn;-lA{teNpXtwSgj^1F*QC4%F{KeQE5?0>57D8z=%prNRQRT42ucU zXrm@1#+nx=Bu|M=0~`VtR|@kFRcX>w6BCkS#l^#*S50`j1<2q)F`oZKZCa2fCN@zU zW!83B!o;Mgpnzm;Y+7nKl0_d{dHAGqkY* zR5XQwr7L{WQles_qtdmSbZuHjv{o}QF=YZwZd78Tf?`v?VpGmq&C`9I&fLw1$6GXrkld5;PN{q9?~D z$0&jo-oVO~5NZs}h)qx8ApgXuiNUEjJ?V?j7pkuJ8^FQG1gc(N`+kVF81K7|1s` zG?XGmgoOCvSHHkO?@MCh>K;ZP9d9Tujr0t*F5RlcqG-2uhzd)5@SOoMEt`7AJQ!64hq241kVIh9Lj3!zj)brK^j!^jcg_End zX1K2py~8iVG@)TCWKu|&VuB#YAY&lNbZDN6_{0RQ7KAN0C<*=(;eUc6BzOeylCmHo z%ul2C4h+`#6VmV&4Gt*-R>Q4tkdH>~=N%p}Y%sYG9O*5&$2rlc{6hSI|HHyHqkvXL zScH!+OdZmv@D2$H9Esn;6amB3!9l^AP(R;b(o}eu#y>E^H(Wt%yCxx76BL!6uJ8|t z#DQwOhYwZ|+QPzvebwG!;Q_t@;Uh^+zsL}vI56A~WGo=kPesQtI5Yryp*=$%K|XMl zC^#IZ!8dqVm?nH=h##qJS%8>eMhy2+_ze#Z9PS5$8Wsi}Y61g>`FV#1j#PvN1qX+# zH59B8;FNWt8qmfRZ9I6qmI8GlG&o|IO5+!bWWpCF9o~V1gF^$t)j=?YA;AG* zVZp-`;TmN2aKFG{7-WE7m;yvdqZ+OXRjB*|VfsTg*dEg>Uc_z*gmMGsEOG~-pCga zZv{dHE(MPOP6Y)(m$A_)NfS)=R^&&*v{7kV(Y+toI1{3GdR9`ZHbom7o+2s*9O8YZ zips(WtHGu!P&BzH_y$GSAQ;wE^+>tLn0l+|TCLtQ% zi11*4e~|nUsvyNgO(tk&GW<`5|1|gqE+|H7)Uk>;6BS`>8~Yl@<$~0Wcr{ z#EK|EY$iBGj3Y?^2*IN^UhyXN;`GL3nZOuPbqg34?l(BpI}mR{)`Z(|5FW*FR6dkQ zZ&iS|4`{zHRn-Xi3d*D)o-YZtm3&Kwc@=-vBthT`0bfS4R*_6NLeb*VD1Ih5ZdNlc zjUvA(U?iqY^l;Y%Va%xU_8qK=ni8uJmj$u1WDQnHS4_x=i;GRuq=Tg4VLzDVfkE0ZhpfzD*UrWomi_hJ*{ZS<>67 z^fo?0+%Ua^cG(vgKY>q~%D=EiIb7Vx6xJlCDSCn;DK;uu6Q{vYSNxs2FUBs2osv^v zWgupuP)6)L@-`EM3nGNUQAtTr9=*u-Apa0@ovKNTC0}BOrNu^*Z|Gh$lVLrfO^C+G zPBSqIOg15dMwr;Hf*Zjpv56_sU=2aQg|AtruTvEe;bP~R8ro0zWO6$M0wVByF1o{nA3`3(H;e%31ts06dhOU|s0b#1(AcZ=}pNMa$Um!><7;}+jrT!0n zZg^+}+F-rzYP=73PcM+1$k0|u)>55k})O(^qxs3D;t!Pp>RgnNUdN1sNth!h2Z zo(8Kl-r>PP0lt5@fsiSKO;1(6XF_( zd&eLMxP>M^#5;5-7%^bLV0Y0?mWTMJmx6}r5YB_XnQn;Mi$PTNi71OT9AX|)hL!mj zips+5QK-*ajmdQgs~oZ_74BllZTcHANech8SkclWC^ji2ElaWv3Xe}qc{5!RmXZ{! zjR)5vUK>G>o(7RV3B%c{MiHe=NdgrB0|Tq;i5hJ@XnTALtW%;>AQF%v$0R^NuZT&< zH8YZ2XIPkl2Bt*gS{9ZV z&^ET40nxdvd&oC{LFSnRvqdfO8Z0v*!jmsedAfjLT3hTTrx0c-qGBdxz+xvYHYPnj zAx@jFiAw>h2<1q)0>VA8?_{;(p>fv;9xN#(CIbRiupr>lz>PrS1Je&v<@Fi4Pkjjj zOqPliu2{Jq%L7?!h#l7uz-(JWxM#;YgnM?ZL%4q+OZ>2HLjlgVQG|Q8T_oJI?IGcw zZ668uY+FgV=lLdHz>c5rTLs&e6z*|+$bxcK!qQv{7k1AtIBvw_aJ;&}KNL&;kROQW z%t|q>)*k*@I*pYo6v!tzUT1*f(*oi@p72i&8DRnYL%t$jB)dnv4ESg1 zIaaDrAioh0+rU4>%YlEE=7Vqa59a~#GTA-i&4z!TPh`akoFBv+&k7I^59hLUAS+cU zaNgwot^}yOUqnJY7giJ7`w0G7dXkj_-!0&6w19{6kL~5Kn%LfE_;1VZ`32Y~98h`b6>d&U73%*V zvk!a}+$=bH^AdXH<@_p#qfNy>YjC#xWEIZ&X01NkuCiL6+iz?2x&N^S=ksa}&iB=< z!Pz#MRr|$$!Y?oBI!I^;vaO{S~}~Uitj-tD~%a z&R=VAK7Ii%Yu}818JyEE)R*<|Y-jv!?7&^*a9RJZcHnMy;N9eK8U5~ZxUB!)cHo|J zxUBtG?ZCb4z*Ta%j6Ss;c%U74h#W4XKU@x%^&e>mKE@7wyd8Lq94@0T-VQvK!MUEu z;~(CfAm6g_J*|Hi>hlI!zdAX6vi9|M=qt1XFOs(}>wlRXF6+P84t%v8_!QkwokjG!NwkdDH&;7~E z1^&w7*QN~78r)e9XXlD}`z~_0tbI2-aCbX!PdjigJ8-oe&Kb$dL+rpASjY=Gz9wye z^NnBS@c(uGnvG93e|%j0DwnTk9-pjz7diiBa5p>f|G50f=rfZa89dSs{p0PxB)?Me70QMAmBG#CR|Yq;4>Gum9s2%XpItAiZc>jX`&g3QZ%FFrH?)rFzw0W0-^7=CRIDduqW$-q3 z>buy1yE3>n`a0Qxzi0=}?T0n`yV;@7-A?-+cHq72z8O6%I?9^Au z;j;Nt+kuBLxV8Qx?bIK`;GAr^{EfHMe!LxcsvS5Vt~L5ub-~Zc^`F<{S2>*Pmo>Pv zoW7&HM|yR!19!6nXJCa>4(Bj0kzSpq*@1Vo11GPHkhUqlbLwNKev%!yw;i~~4!nmQ z_;YsPAB#t0{@p3WcKz1Z%9e>T^SG16@F6X6A+>!7C+1069Rv^1|dLS#1-M0Q!Rv^1|`chUPyLHO2 zJ%63~Mw6@_zinO2zgfOIy<@qYmv-hGUA#bcwI?6;ghy##QzU}&U6Pp-6x5kCN2E>?zQ}59ctHC2lTu>>`M3BFZy*0Z|T^(j$`kf zcSCx&XzvygQZg|7_U|RXZ_v+jbX>KvV*b3@*Z)|4IxgkCpee0Bt$sYP`siifxFbKl zHz57DGq+y5xOKzB#tQ>_MRvNV>33t@ii*c?-QQ9EN5t@suR32?QR9B|?1RgdGd34% z&ae3LPM7dsBVN}hxpzERyUA;F*S3y38kX$w(HiFk-|zJ8@4Bhsm}QNQH19X_v!q{p zedBd>!hQF5T4cuk+~wwHS&5IQr6qr{;_SH8dCkXO{e1n=Iakk~?{~4V-jUWb2OU}8 zdRg~I3tQBh@xcXM&5je+)_XL5QL~+4)gQKe?35WlIqzKaUUOd`=vA)SIIdcKvFwM{ zS=-!>Hu`qqiZ7#Gv&JlpD!Aj4wd_KTANt;TBY)%O^If`j*qXJ@ZA5>^*K5oOC|o$P zb*hJNqm+>o{ofvNZvMGe8(XE99he`#rRdE~r&_~Jhl{W0sl6(`3ah2YEHn?Twur%cgZ$|B33xBUS2~KR3B} zaB*?evXJ$Ci?9E%Wc*v_#+S5L{Dvmw_P4=}7Qa5^ABi>A2RHukc(Z{|4xj%( zb>!%CXL1kxak*~r!2VBm7p4|1Ti&lC?(JKXayNGG|K!B^=hu~O__kieAN}f9b-4J( z^%{3JuBhQTxZ=(Hwciy-3^AO~2>JS8tIH!hZmKgf|6+89y3O{*<<>j0aKeQzBKj|X z>BsY51odCuVehW=+dst&a4l2ciFbx4?K8h_s5Hu-nlj)WX|l%{z+%}yPwQbO;+`uxupGf@4oNTp|RtELiJ!(!}}-H2^U-3pILa~VD}3>)xj5s zmt6T!zkO-ws4x1IO|0MHRJq2#)7-XtkGs+)x#P;S!}^^Z5byfZ z>bbMVsQR6pP`YdJv5-&o`3namxQ%R7y=&vW9_sOKBYpZ^T+@5}nV^0b--?fPTa)-j z+1yVr-b_=EpBq`DdYAL&2FRKctKZt_}sRICuizEC@dfKY3o)c?Hu1~KQrP|ROXNGe7))Y&%I97JF0vs zf6~^^+kMz>_>qa~1?9Io9{l6$6RU1K>ZH9@L*2%=!dqQ-{My#{PDJ0h?fLVdCWd=W zf3NM@#qoF77MmM52JhPTv46$62{X4%tKm4?H27lMN z-|FRO^wZO#9~=m{{m%O*f`_eLex}i&=U)3f>$dZN=~u24wYc1U#r8hTieSOgn zOQ*-Tez4-B+rHminZ5S(`b%$n_gfIy`ny-t-1fV?=+wXNeZN~P&vodz^!aag{jur6 z%C<(Q>#Cjc6Ce0}xo_s^8wFExYfnG^TcP^xW~V=1zH;#Gq$XPe?xgj7bK%XmzEM{B z4(hgidZecHPpcpJE$_HyZ1b1Pv;4a^?(bRTT-#yHsqyObC-Ot)M;;C-oj?0{#;eOV z{Zg}Z{@`O7ub%M#=)7C&HM3T=_jrEiouVt+&eJDPKI}R1^MbM~nc?;Bf9i1jyJ2(t zY))&F)5|$>%kUo64FkXG-@tG9*p=7rv>fO1=_H4hpGEa(GvlS#li$49X51+^k1Xf6 zUeaHfQ@N&k$bru)*L>Fe{Z^Zo%u=T)zvw+|NRQ4-rk^hU?uEOdO&feQbnJr;Uwr$0 z$sb=HUQ>9wB&uuE+r5EBAEuaIZm56)6rk?yhpksq8eSqdxS)^5IuzZFjga z%lmxIkdgYyDGk5)=FI&GA35Cr#d*TG2A<{3oNGL2UE}B0H4gsLuviYTM9;a80`LS=L z;l_x_$a>0KIh$Sxxf>eN!E383r}z4k&XI{9%`S^Swk|&1zY9M$U|!tHpy9 z+L|*`qlQPGY&-7vufLtMcf_d+i-ISOIS}Ez;U7(AK7S>;#t+Ny^txA4o&Sk;&*0Vj zei$|4#_W;JK61~jx1l8QMBUXDL+-q_Zr6l|`#)Q#oOQy2lf7(3b zhTp9VelwJ5Z{K}w+wQlfpU60xbmN6DgMV4~+>LcxzZ$>oh;FCSX~w#qr4Hk?n!4Ypc(Nf>wKGf6}tuktgptWnLI?;k8e$lwR(U8N6p|)RraZ_nvKB zFgDX~&c#FH>K+={X84cW^({;4M^89eoqH%jIVkp?%8;);-{ObBpT7*d@Ke~yFJJQh zVb?h)*H4%JSTk+;uy2zGRu0>Ir>^&N0~$Sd>1D&=J2h6RRy=tja?qezT|cW?{n_(r zt-t-m*nHQa^suY-mOgBfIbvd()1}{LovAthOQ*Dg?>fgD>)kF}wsGap2g_q?I33*F z`s|~~><$^{quU?f|K7P6P21sJu6$8gclRr=H9Yrxue#-*el@$1_FnzZzL=?c&iC*O zlZQw$ZUkp}0n02O|b7*as zgWuF${MF2NbI<-X`scHaJb-y1k*}JJM<+8)QI@1e$ z2Hj3@Zr-OLuIFmQnie$%7R8Jl`bM|W2|q{8x$@+5AJw9awyEz)y%{GtP{^8d|kuTh3cYL+-82d61C%QOz?CTizKu z@quI9?c#Qg(ym_|I(&B>@4M$dnYFWahkNBmUDPw$J>KwbP_g^YQAN8#nr=Tn-g!v# zJ>&gy5|`fmWZcS6GMe9UxZ1JN=w{vz^Os~cZ=1WN=gbe@Ie+HuD_@R$xbVX2cZ*a{ z>RunZb>UEWKfCX#B`p>XU9EaD`_vLer`t1cML1~pbnji*HSudNr`}~Q?|oJCRrY;VVZbgg$q4$cQ#$=Pzo%acIfmRgvYlhgSV~$R)+sr_JNQqD@~Ye_wg1 z$zrbu(|y|cX#1~R^huKg^K^&ry))tFjW=R?O+UQK;ds-C8-upAzm^iX#VzN#pB8#o z6}8#%&DrnA7d6}Q&5AZ_mma$Nu3Haf^2$j=Yu)e*AJV;c`SGcdmHI2Q8`YWCx_3;M zB;}*$8dtpRyif5_&FWjB(N`}vIbJdJ)fEFBi+}Hb`_z5E+!pPdhQ5|MeChe(K?R1g zYm2joM0<3dSKNC=+w7Sm6KbzF%q0Kke1xd#_T*if>wOEZb;Yapi*^Azz=b zXx!!bWtpc|e%yNI`!}nnm(D!AW#P7}59hysz536!K4E|Uw0u{;i-R5qR;9lb`sj3Z z?FKK59X{?_sO$Yh)m?Nkc|LJ&cPq9uTliLsUSSTM#*S{E71I8*)$4VUq+9W-aW$t6rlC+L3E~)~*@S>SlU`N8-lsYY*sCyYZIs z{o6Y>d2vOS$MZv4{bNN)-<4k9?Jb<&@pc{6X@_iw<4t^e`ZT!gs(oqYSlzhX_e)`otLfT&}H3L!?GifH)*!(`bIVE_oDaw$)oS>?z4CQ!M>$=4?9h} zu-3Iw;}^1h-Kndsep8(;$$S58mz}42r0w3Av~KkFTGw~CN**_>tmBs7@+Qw)kRF=R z@XeG-@w>kpG-SuHngh?iccXLRZ=QF;_PpG>V*luYD~mpU|8|Y%m0s$mYqjczFRaxn zqK4FSUK{ST$Ni#@N9!4x^*w5|dbvgB@fw%D){XS`sBVAwwHE0Oqh45Dw7~JwiV1yV zUjHO>$(T2Er5l&JFL?d%ActF?d$sGL<2vn4^e&3nJ99x+%Lhf@`y5#MZeq}+`K#7y z9`!wSuZb#XXs?iOZx=k7G;q>`z31M@pBJ~L>jv+UfJ`{iC!(5MO?Z)%jH+6Cx@== zuias8`5tvIzHvNu;mH1@Rvp@K z{>t1Q>9eAJv?FIdS8vLi!14E%8N$z>obRdcdZ%BjnqP*_7_jtmXqTz|4L^HrSoGpX zm)Fm{;r#7SRr_Bb)2Y$b;Sb(uF``axX~gt4SrJZ$hOF6iH{CEdF`@C%gKr;v6nL^u z&9W`iTUYL?*?s1@gby@x&wu}LZ~f$4`10GW0>?9tr}y>gdhavGQ~rZmuSmKHU*hn~ z@9?D-fKFeX*zxnE=^j4s&s{L%(C(gp<_*%-Zya;9-QnFo&hg#rzOmQd4Q_*8ZCxwG z1o@0!QXX<@^DYB>0-5&PTxw0qtGhtb<7KE7Fe zrt*vSchWK%F8QGK*3_O4F8El4@r6Z%T_gZ_70l*{;@SV!OX?I_LboR6(-{=O;x}tQ*ZghNN z{p>c!+c}mtJ3i52Y~Gax*@>U17A?5G=zhii2W872L@vMga=WTqAKuzLu=?`A?=L^z z4aJwUHyj;(Jv#erPiX(k7uvO}n$z{S#UazixzCRoI-+>!s+I+{%g%K8^~noeJN#Mg znEK+>uM~&-H2QAAz^sUEm04d!mm0V3zV&5ZQr?Vv2VO26eB{>Gzik@5{lZUsS{=Ev z%l4?gy}{2j=Y>ma z%W}R`c@6)3g2RBhOIL2Waj2bt$mfrD>~3~_@3<)a;;JRfe{0+S_;1Fs|A?R6TXp)> z=&;WhKHoZ~-_9}LulvpK(7ydIW$$j56t&!4RdspE#7jTyh;RG*=CJ9JD_j0LyZx!u zooCji{HhvyrS;ksnVpUT{MX0_&5)x(!wJwD+Jf#x!}@>yKrJqckZ4JEx!QxAZ80eyY3-#mQ^i)lKU)JA2z_KQ2zp%h-0x zW67$J$zE>lBI5e=tc+Oyb>deq21i|=bm-l}y5k5xvLF(ZRnTbHtF{0?Cw_@e!Khmek-SD zzY**7@}jmy^?yI;K4$aYuUf45bkY0w500x?cjUzC0VDrEzRoecvL5XB?X*+dQ)_D5 z#?-cL+s4$kZQHhOb85RiyU%;xkLP??|NL?%JJ;3SwUVs+PO{NFrw4hs3Opc)uLuuJ z8y`pUBb57XFRCF6AA2`)p7mt8xcb%=lko0|n4A>iQS%lU?JqvM5XI#p<9mc7{*qy# zZ3q>BAjiTX@f!*95tdjsT=s4wEXVb%{=ieP*g@1#hZWUv@uQ8z^RDSxmb(kkIEAmG zp!&SMNTsz2C!Ds69MEa)tWLZ>Tz^};)9K28KIFWqa4cn<9*b$au8=IHoF0p7yQ#1& zZI{V1fUB7i$k3dsAx&mHA3TKdaA5D}(VU{o9{=^YIP&XpVAuzJr^wxaObT_v^{vj= zy3Cb9;9ff2J_W6lQJP)Tf~MWqeX2NpoF=y(vFIi4p?aWZ?Ov)YiHK?a-igS+`4oGm z$RTH4A*_h00@6lYFf{V3&KprHkk~eV!xx}SBeLT8lXK%h0DuW&rY2mH@) z_tjG8_PHn}-2F|ABxG(YuusXe5-GWHXwA>I_g1Yt1aB-^w-|3K#JpLu7B7$Y)Gm0i zr_G=4ZYrF+5dg5gFMNR;8qGOma8VTn&)?n~@w~YIms*%3E69-x`k!lteTp$nImMRI zgul5rTqb!_D`uva3C$Z=CTa>)zw*b`{x)uP>8l;GBiu-fJ;w)L{Wj)Gtpo_CWITfN zd6Z_$fSul-{$*U@;DmV3|jn>(tEO{m+me-(}`A<>9 zmbPH&$)NRSQTcPLfqK+IJQ^a7!(-yG-2|`{=g9E`MRG8jIufTTE?$;mxN!`dK$eyd_6*6FI;w2*Fc_{U3#Y3x|dO<+0{(m z2UvH$5cmhJAZ7`$YXHQ~jvYb(WaKbE3;`1^IDNv5EA{}Lz`h+qKRXch4v-@#cn%Re z5Y8a*i76c4uq_Zr63C1lCJ+Y}9<9_6kUB1uuYaQ$PkO~JZtD1$+<9Kku^uWn<-&+D zJV(1U4yk8n>p4UloXow7t^zQ-LM%%Wns}rBzozFR1txl@+k~BzwO_jCCZVOnh(*f5 zT#r0)bHzjO5Xr*3iuxdX{5r_@hIZb$wp{?#LC{mwsy;m7J7 zyzi&tEjG_a9CQ?GY&IA$LII@pgA`Itz+QzOO%T=U$6pPFRL}&e*gi#LVH0VER0nj` zQt-4mMu6YVQAuj_f>pQPZfISE?zc%trmKSe5+z#`p2yGOrZ5wuKiPoZbU!f_S1P zVW_TGx1FaA&%w(UgkLRZ0YpBczVA2$w<-h&@LmfyK8Rd8kAhBuujI75KRxfRY=mOG z$I^S>j*0<)%=UXiGyGM0_AT5Y!AF7%_BU#ef0sFYm$?vp8@2}XJhS@LOY=N2UGLuL zCj>Ql54~iOtNG8m<)=EJ)(6U$f0)BePyTWh;S}@>)3Id?-aFc|1kOtL@VL9qAlyk` zkPkdoCgjqg?sKp}%vuk<-cSxab|&O14!h1M0-ip_yv`uFo-33DL|}{{J0!_9c8tW> zf5_NjgK9@5kY3vtKK`JU`G>z?Q??+tQz(`Rs@=Ab+FW${OCH@XncT@pDxq&YuFWi^ z^0G{gLZFkpOijR1W|4}3aZ!RSDqg99<~8e65O~pgnzzv+y((+fOb0{ z8YvVjnB1h_9CoxfltMx62Zsbh;_uvg6a_UX4hiPO!n}G-1vMHrkVH%6&-^=su@`zz zZ%uaIsA+UGH)-Dxc%s z2Z-({+DB@qbCg_^g6@eXM&iS+%1QS!(QF6j%#HTr`dCbF=d(>?=j*K90U7N zbNo07-s8v|Zh(;$f;_XFz61Ph8!ejUc8X%4t!Z#xdZwE?i>Bk#>H%V;eUgaKCMbk| z>mn;>KIJ~!tH9_)a(i+FUdKZG(>dkjcb!B z7JWfn_CRaG(~ZG>w8qE+@pCMk`yq(7d&Pm7u?6{EZ4WVohgKMmLhaWHKu zlCA>SWVG`B%%K20qvL9x_+8TNb@^#s?DaBZ-*IVjGO7W8^%E0HVE5L%2*Rfbd_1;a z-*ir3cR7X*8zsu__5uNswxaOmNa%)SV}k{n-VKNQ zfKn`{dmuoX6BY8LhuhLJ~` znFvYr;&K!D#t0MVd=}E;qoWfwka1&?u%H;?J*gpEcyn=w4i>^-^PxxvSMcG484V350UYn`dMBNo$pQt3sm7*YJ z0KUNkXev_OHO<08MV7KBc*J7-AS-iuae2|3yt2;8CPERi=V1}AsZ3+0S2Id;Y4rxA z0O#A-q$CZnMUUrx6mWYI*&;aaX|uK6Mf}yq#2P6=&oON;@8>@_;GSM(5p%DAs|}OC z99hKAIBAd4278Co)^5*pZP~735u*8pugkpJVWe8-l}Wxwn*kZroDXwi(p+!J+Lz`E zjlM0tcc1b<_An{g6Ald=gVJgHeEdkV-B`_Uk2h2z6MlRx3~6GUPPCj_8>U@?{KE)2 zebUcmZ4(aNI6=d=uh&=!)eXn0Hg$xoQhV_QKtl;Am0rVQqzV%SHL!)UwZlGY+2IvZob(4yIae8EgOY7>0Ycl|zS_ZWt_hN9n!FitzVPA`z z_-lGs9G+sdAz>$e?zb^9o&9Wn|6_KrWTzoprcuw5u#}_5TrGH8rlH+MW#1R6TBOmE zo*YEIVb|=rc{>7`+@;xU8;~4Al5qs(iPsuU)vYPHP|{euaSKBEuWL#E`sNk2&BE$5 zY{op&E{8`Gal>!y2b?0BBu8R?fx^#ZN2rP~m~i0v-P4Ect3gS3r9QOp*HccUqco(Y z7fdf&=dQBwpU2QI8fP2)@0G8Vu{UbR*8S~|XB(L>%z4YLA=ef{)ebVd%c0uGS)ww+ zDzIw6SU{lJy^)g}I1#DP6XlmRQ0Wbv8~Tr*aLqBDv~zT)d~5ikEn4H`*kwb*)sZ7})PE>EbmyqP~_iw`o|k4QHOh`%!ub#Efm#W2&5k zx^U;#k+x{uCUT4A*HzD!oR9KTybUcr7o7=|TGsHNgf6~Pyj3mkDn6FzCVq^YE3W2m zjHV@K=E<#gri@7|jJtcnR;_4X!sOO@w0g{Nh5C&z@2ri%{|+-npduM5?9_O$e^?{1 zK3FBRSQLKH18L2TUXpFi1v9rMONzY{V;;h6fg49D-z^ymi!#s97=c}NQ*JxhQTr7a z59n`cnp;i^H1cQNnXu()k8Ot>;EJawod=&cB~>K-HBQ0^E=_0E-&Y!B)3{gUo?aW% zwCGuwTyiy+S?s1OVMn_z_#ZEJ5-%0?BGl@gR#Fnrw)v(|B*iMvV&l?;7)|VtZ_R$j zw}WqI>f6Hu;$@PTv1uM-e$mM4+mJZ(UT1k4>x2)J3M{&f&r6HyvWYfYIBrTWTZ8ve zJuFVWzxS@-X{_Eq%(=Txb*d0B30FX058P3y1Dqo0p0PR!u;00_pl;1Lrc1+*c89?*^dRKFUuuZMt3*;@}i+>g(t>~DguP2vIz zl?AC5UOytA`fh{)BRvhxe-BPV~@o^s1jcp9MN>$47KX7{sM- zwP#b4P8kc8_7~d~tBg;J#+jFKn0;T3YD6nf*0T_g8c0W&VXLWh;}R!pm{1OIC7G2D zh~7OypB~U~w_4mQs#=h@T0~8^{9P6d?FrpT2kpkSRKG`Ds0-A}TD>@8|G7hUabTsw z^gCabr7ODjOCoas4f8=VK8f;@KeGLzR@0>!qh z+?1mhrhcDyB>cNA7?Oxrf=|;nnzO^-IJkMzxB`i8sdvbtOe;R*7Z-w=%0Woa3h0lx#wkZ~_e|*iYv{|A_ zWr2GvHftTe=~SdOe7!IDx|inV3<5&5Yk3~EEy$yp&~jI#{lH)y5Sb`?Zn18+-i5P< z*Jv4~aXSu(MYg^SXbaMK(Ov1=Jc&c+Rs41g<2=V$?^=73S?-E!-WK4*-a%?y%yiz# zXXAXkh2Oj`op9FdTCI`yp|v&;H_*%6jtf^ZS6p&kw8aS}-H!{Y`U+gM|B0sDPf5f> zFh*o1v&4J6WF}|%@t&->k&mJpCT?H3>_W0sQF$kB6RY5T>oUfF3iF@#RpF)qd=yF7 zlp@5f%6H#-c?@XX{sM#srt@yjf9Aba)F;$W#H&@O`gp@*cF#wtpVGa(^u_z zBG=7cy^%Q_JxC3PY<+bWKxEi&wUeuYzlIF^xK#Tv^Df^k7=$3CwVl9fxoxgQZEwpM z@3cgBe&~8=J2@MuokEpY8y|~2=^g>?%lLr#V3wV6j)d4gJh_BDS8WXJPxJj?6V>J8 z4UHKF=*nN#3{E(ZSAv6Oaf})G-^8I;^#ytdd;j!n^0#b@+xQt)CBD30{9MLir=68+C-5V`ioC|t(jIG@hwX-juTYyA~Yj-9&-RUB_~Fc zt+2G)kwGl9fdlS5wPvq@9<)od!sVOmLEs$^pi5~O?nkq3RIvUOItYB@@3ws;?L({D zAQ$ER#lE}cF3@bN6GFS;T&#%-yc>6Iw;kUW#QA8r(#L(CknKZZ-MEM)@CNJicH(M((SW_aWk^NGE?w za%49o(4BhDk@OYZiGf4Zr|EYIrJ$1?fd`63UwlN-p(lcNO({jP)8evUcKWklLQdlk zcP2Atx@>r`w_itj(3_2BS)zIssbN;6J-%=yvi=XaL+kt% zLN=jAb+BiULQb;+X>yQ4ghrX(t~P9y+?i~uZ@6<>uh(8+ zbQfEh$@IgaMzFT1kbI^HeSR@(=xkw2v?0u)vOfgaJky7U&D;s7WDz(|nOWRb=lQFg z4sTD?RXuB1s~ddq6bkcK3h!9T@>7Zy7iyt{--zZn7@v3mTy+? z@1oLQEMMJ35hs5jxNXTnsk*k7-9yPu0`c-w!nw%cSi>GR8S0T(Xw=am za|J*%s*Td!!@>+*%Eb@9h|*_S+Y&=!wD-o28JvF*4kd+Sw95^wND&TMqtQEuht|S} zVzes_tXL5aDVypZJHu`TV7k5{gk*EFy>E=}9@Bt_XSd@}d0*PqJhv|Q?j85Tq47Sk ztG;hsA>7=ld%oX7pdEmSLOi$1nz>!!K;rO5mp;2`4o;6D+|&>9`tO@~_-0?szJ@~Z z(tgnHk_NGSp&^M^!3D>SYYUOJuGuk1$Ccx(a*0RV{#5;)ephxi58R# z(ODAu>nR|@#0%3|lKAU6Ai`*b>ZSl}5&$w|%TR&tU@8STo|4xfI$%mGd&U#i*arq& z%fNPu%z&&pfZ00Ct-*3{%34zchsuwP5izUN(kIz_*s*<&?SZm}fn7b|dN^%?wqil9 zrLlcoj(|F#!FRNnf!;|xJ@-&G>0N?Qd{eOL0WX2g5r z<){PR4kYZ+b|B)bAF=Nk0=N5L5`VNqy6HspSLiK?cU)kd>JoxqO3)`f@7-WiquryZ zwmcUc91kZ&xkY`xG2#G#!-n5D@gG`syp$ZPPNW0sCME!otT^-BJu0Lo9(Fmf@|ex{ zPXmc1_pn2#kVW5;Lr2&M3L{XJd(?g!cMfcI0ejzS9A>6dT^I#dKzT_YV@p_>c_-Y8 z**#Z@I_>!SiabtD_-P!7GlAA^idKzw^c~>Yg9JQ#+@pYJulW1fV*)&TpF{Qb1%h0Z zD(=C6XKxy+I*o#x^m?wK{|Bv>yY`OJAKDdRi3gW<)6%GKfakPGjapM1zt78Tl%!n+ zKpg<8R{&53fZ_8Bqu1hZ%Xd*50BpXCioRhrK%>adChkT#SG7n535D~`7$T`uanrye z5>qRoJgHP5*+~7y*mnH>%VlitLw4w(;~kBKy|8iMciZ{2wgg+QE1wbq~8^G0Z~tWIZw(fVZ?VzrJF@4fPF zGtsll1ig6O{(UudZQpu7zi!c`bXAuLy>7`aJO$@hU`t26=(*HmP2j{$zJPqB=WsqCTJe@|)|W!^D7=))1Q9~m((eSMwX-oAJS z)%@&AgtP?r^mW^Vqs7vq)*QW$^KHX$owgk3esW)7@!8LG_VbWoybIJDNAUe<(skn& zQ$p4Y%Pow6S)?tA<*s2n*I7Pbs{V`T$h$zJ1rbkcD265bBQd(_B!*>wb?1lyIb~eZ zlm?#A!7UVXPWg%omqH}k!9pVC5TzbmPg!F9lX`x6XB)cZlsn zUlW3DQv|wLP!;X!fz4g5!(=fD9l*!YrQ|+4^na!fBQnro4sL&D>!l_Q|1H<1iXPwX z{O7UXvdJ)CkVplsx~^pB8Vs#%YTg*^rj@U!Jr z*YnENy!s46MXZVS7}4H=3_YQFWt{epKT=elgqH)wDk#vI**}K`%;htNDA*@?pz*O8 zfvbj1uycO8>{@{I{-%P6D`T*Hxnl(Nm)KjIO9SPz71N{{7gtg3Jn?Iw*c&7m=#O_O zt&B}wwPytf?l76&y|S+lZueq5vlh%$_kcTFsLTb*B9QE%)CaOD0^9NOj{3nB0OiX4 zi+#P^_h}bhwJ6JOPrQqJKQPhLV;IsbkcW#>Yb+YM|Gf*d!y8j~Nyq7vfZaF7R3_Rl z5a#?eHZnR@XGfki*xx*x?1}Z$KWcq446LWe(|pE@J8;MI5}m+Q^_g>Efs5yKl6Hhl z+AdSNKtFTee_(n!LzH5PU3x{P$RVR9Tk^Q&l1V~|UN(ezpu;eT{0IwtiCJ4wN;!V+ znmnzZ=jVp4W3z}rF>@~2|&!q{E~ zU_JE{U)y69fk%VDO{$} z%K$5H)zf7$0@r*9ruqEIa*gixA8QM0w+S-!iU&)tf99NN#MQbGMrngt|45z9wUmh! z0aF$8*vDPhEI1LoH!@sA2kJS9d$%y&HFf%<`n?5{u3@s&x>o25uF5%0`pQ|&ILST7 zIij9nH3Sj7g=d@>X2%C+$_)fGQgj`@9Umy<9*yKjR0Ij+A?K9RCEoq!VH@;%*Bg8r zw2!9>7MLQag+O_?s8mTGNx?d$r9fL}-;zds?j-D}7MX6EcF@}H;tA=-1e9v#P=~eW zJQg`ir=0#g7PWc}Jr|_{=6HQwfgKm62Ie$i#DfHkcxffGHKb}K4 z0dYl0m@es3L`;_~?MX8&>y{k3cFn6!EG)6Mosm++8@dX}kbOTA@ZR|tP;yTc+U&n1 zcuk$t&8A$~s-pgsDT)#+1+i`Sei_(S&THad@NNevhg9+1%V2;-g)o)t$)A~WKC17W;62;@ zqyhd}8GQ@0!eC|O7okPT>c9m^1&iLBF9u?>zVJrf-xgHTy(Fyx>X1+kX~bqtOy7g0 z9XBSvv2j0nE^u)kxP*6Xvfs8jiqRm2LU0NHEPUtaJTtryyG@s6`A$_11*2LqILMUB za~%197D^EK&wYmX@{^T6Swez+7X_t51xkVlv$|=yU94s331euGL ze+;W#io`T!E2k*GNZ#6p!?tesRC_hF30eLY0{WD;bU^g;>3c0jf{BsXI3NPu00?~k zp>$QQa-^Lei&y@h9#w{+q7!LV&U!AJGHutXI+OIM3+$%?o$0F;dw5YCQ5=wiL-8=_Dz)`>l_-$>EUBV|$k@f42UzcL2anu9OanujPNy9hDc zxnABt%)jq=xtw1v`+zMEeck-T!gyqn5z6RO?UkX8Kz+ae z|H58z!0ua6?K*-HGe{>*#AbQx?WLE>OpAPGVDB+Wv%W1$OkxXy6<^94h4THS#vs&| zmj$B9dMMkv^1!My;Z#u=TRO>}Wm{L16>@%&;wm8!~jO{IRr$oYyc6gS8kEOJYFVOpP|s} z`zws1{2N+8Uwqo{XY;sozv1 zT8u7hG4#eamIsnP+E(heXH4sC%&)iROowdhFCJZC=B>B?BCb)NzNHcGJHMGpLR>6W zDN69BS~u_Bds!s~u&!`iy|oc9FHuEF%hq#Y5?N|`Fo~?RJeo?vPqLU+x5Dlf zsq`{+^7)gF2d8TbN3gl)6N+GT-sG&@ijR7-)`b)HFmqmkZ{i;g=u z61*y0Z{P3CV8wMO7=s13g&trs9tgeRW&A+vhE8XImOulfaV`uIyzHo#BT z14?=|fc4>P4$^nm6)qhoIOh&Db_x}Ak)3p|)gMET$$&qCF0B^7i1UgaWx%$}v#4*+ z^MxvC@a1+D%A%=A{7Xxr(20Fl^{cb@emme{yFB+xTkgv%@AFOgLk8iYJFDdDZ{FuJ z`a`#0a24yq4)fg1*HFeMsU4t?EB=yV`>+{1^Q{dgk$Xbyk1L4Vk$`PZ$r;L~36wXb!CL<}pbo>YJT+)0Uf=PB#5ZhY=)xc`8(FL1_duSwX z^SPJgc$6HqhPKy6-P7nE#=I8;;}FV(qmR%K<0gDx>SgqpYtb74)9R%!m2?v)S$O;f zZv5b=bF@XnDd`H!N5NXrqOwF2|5~cA#>JUEgo6L;a0rje2dbL3=Cthb{7%}=KHs65 z`;$+lNlahH&3gxxjCh5|uKJ!qxzJ|cgBXky+oYQJ)mVtHql$IX_VizZpgz#t1$J+H zuK*#IR)y3^h&$Pf`V^Njeq(x@%4h`R;*8#D{VnpRJQMozm08{_N5uG-QHoUJEuu}ios74;`x4BudZ}O(RLgups8kd^r#`eSJ#F4PWNlff zGG2l?XJ=R#ptRfo7ApA)VJ&3wnWWJ0vzvZcCHh#o zwn7y&xyTTK6gGyr)75Lu?^G-Hqbu&ADgPGtFCF-Zj1f;5Sxu`~s{e6;$NFAf=}4rZ z_xtz94AggKPq(_dvQM|#rqVu1sD7^CN);_7@3S+J3LWIhp+oog_+ZZmwogsmq5h6{ z6tDWIJ^dj5%-dfdfvEd>rK~|N0+rXnf858x#_HrmGHs?0zL8O=Il$`@fV;SEC@Yw^ z|AJ(`KtkE4Ni=T8JKpScuKM6ft-PeCTSNHgXFmnSb+&RO4FP2o#EpBd55NI} zPBD5;MgWp)2k(ONEldDumNO<&kDm}}Fiwp$rDnjIRX1YEO9Whzv4f#apm?O3LA_?y zusbLi&c}%b@8vZfZZ1jG<`6zB*w9@>p_~S~EQXuxo)}TuL3VXxQcf z8hxIDX%_>Y3l$H2B#4?Fo-5sTr?>MmO@GBi11wsP(+{HwdBRR5Z`MkQAlh2t<2{_( zPP09D4V$3})i?>g-l#;Sfj~u3w%Lhvf_soZizQOiE~d?jKo2)Br5qxDOZ1MQui$Bx zODJVHd=>vtk&@<5sTI)1u`zdv1K?2=0M$w`<{W$Z zoz!sV(rSVb=2?nechy!f0}4>giMK6EpKVr~Ana-=_oq#53glN!u$;568A?1$RhL0T z;r>+?fc+%_fc+(xDN0+Z>Q8;YclS3GH~T6jE14mck%U@YDsJ~0{tLswNzP|~$v{i7 z4R!zN8LnS~pRY0v>(F+$eIkTzL06mUBooy-xi8DIny+0fWKF!}PYFMH;DxMD(4Qil zVE%3SwF&jyZ)mZ~il161S`Tj1xhS1)iJ!`b53+jt0QgG0y*e8yyTwo3EqJWJd^5Fe zs8vx!>j7(5nVBS*V`SW420D(C=2mJ7>4K?8CAXgWL@ih3D^#F^sR!5mWYnAX7!%Cq z76+F60(ILK(pK8`>GW2E4aq3=`s%1hWtDpajX2{ZE0jkp$rt~TJ>+a;O58BbRdm7O z8Vc(z7FwwG!oX6dl*X!0%+>g>Dzl)b7#Jz|wSL2}+T>%#;(3qSWE3DN&|?&73OP8+~Ps4(QJF% zk8a(h#1oI)sOn70Q*0-zJoUx?mM1+v zMXO??wcM?09BXcf)~r9@&~CQfUkjn$AS~x95IoRae8q8uPF^R70EQn(*D%{$-wktj zIOYU>Ix^Sc_)Yhjn=g`7C^yWPBy41i09fTdK{`PZ#_mT~P7c`p<3^yGX67~Qo2<58 z5>B#&uQd_vETFB2Y{8M;X{LWcA8-eit}2RRznV_6bf)}Y5~R_3E`@BN))ex+hv%XY zWxV`ZQK}+@!*yjb`BGPby0I-mQ~ak#%9V!c*FRupr&}}lbGD zH%yG&1Z;#^*K&e>l6vl2t-K{gM6vc^-R%3QUpK-icwKtjq+P~0TUe7pTiwpTw4nZp z$9qxLzV!JISG|&c$yo?KvlZFO!b1Z-QfRD`n+b`0-NTevKI<|!_75=HZ6_}CgExNs zOVsa`l8USm!!84}@$?B7DlJ!l&4|o~VY_yt2eV%?NJ-K~zU0WpwB!gLs|`0ISED%U ztIZ37#gUr|_NU--|6DMuV<(j+EnnI~I!2ql>B%dCpbD@8`3qWx_$r73xfeBCqJ0g9 zZ1c6mTkg2(t zO-?DnRyb998qiFZXAt^WH!D@KfkOuEB8ZsL7no9}Tul}cK~g6XjWv*&c4#cQj9K-E z$ZQ%svGs@XY#QE?!PMP|cxO+&P46YYK{c!uF>_m^fnD5u5qs4$3Gf-qfo<=oZwq;1 zL0Zt?7Qk$Uxq6fya)GM;sEBtC5N1zAYbqBL>Hr3tW58fjD(u~1a}Y@R4{)nXdG(`R z@`rb{=^P;8A-^pWX0%$$)wYn%RDRU>MwkxO&FMQx^TZAfrO?DsweMpyy{Eg7Xzdk#RN3lGG@9dgk;cy8YK@kN+F$ffMdo>)5&$oL0 zfK`FNvguCgRb-HlaKm0W^yd02${^x#jUS-DQ9sy}7zhjb=tg(6&H5lr+!}wWfPe#B z-w6sWxq}&H@OLJa)JoMlQ&3{>Fn~LsTMFWhP_oC)3|swV4xQ>YgwU^)JYS@ff_#4t zD;$z7$nP}w?<%%JDFu^}Vz@1i)}cz3i2D*%bErKMFhE9i{4;a|izPP)GK8rlO%;Fp zHD2&Aq$^mOU?A_PhQwARlw26|zQe+JaXFnRmWZ1g*k5F2Y^Qw6QeWdR>ReA}b;SQI z1dLGyXuUILA-_cB3+Wu!%!XnYFjt^E$0LYJ?E&d}1S4R7H@Z7K!BbC+S@r4*J+s!4 z9%I(0fARP^;bRj@2L03K4*U~+L;>*tNK=;{d; zl zzJg^gZkQr+76UJpNr=JV1=MSricC#WQ{>oCPd`cre}U zyTAkN*CYw=@RB?TOlKEqwjCb|q|VqRM31iY*iG9v*sg60O6Os*+}&as-yagDF}388 zZnotf3Zl-~COnRc)1 zP9bw4ivefRtTX8DC1y@HcI4&5KIPjrU08h9$m*zI9qL@ldU!6}-v z#manzQ7b9!Fm;}=c}{7HX|oSi655~$T5a68^5xLXi4Nrut^R$dG4`x9)2Z@OUBlbT zv~fsvvCKkHH5{IdrlHKEWS6L0vz)$&*w|fHQVjnK5QEs@iQe4nEEoCnan|&2Tj&yJ-y|dTO4hLl7TWX#| zNc8k>q9p~C?<>Sc!n`f}fV?4$h07%|dZK$OPDn{IP`SbC^Rm-ymG9Xm!n}?9uC>)* z(2zJx_9g_Y?pqu?E)p8~QgeOB?rKeH(Z3Mdw9W*WB!n ze*(N7GpiUmNx4OI7sjg&1_wU^>se3W5WKqy-dh%_bV74%r#DM9TE?*YqctXP(-{d# zHD6#h`?#5jzRP*J814HuITV_0Elh&D&12QNOq34-u~;OY2#FkwgF5}i(d)UaoDTvm zSOHTV5aWZXPjp*lKKua>CI(zM$ZoCsLRJLS83CW%$qlgW6-BJ8j~x= z#oXJeMGVFn`zn#9ygSWE%fahbKg*!8jx4%UF+t%#F&RvCiIl|+U;h9HCpa=TGpIeE zPdnsWUYWG2BLxCdDK$@ugMw1lY$$d!%*kQ_=``zuZ*$PM*^HZ0fJ0R8BHxQ622k!^ zwUA!~sG%*Ip!DG2p|yJs_85NmtJ}4TW5ivH9>7w2+A=RBdez0=B*y%XF@q;I`e17C zbo2og;AtuRO97Jq@GW+b;Yp;g2~LpV=>bQTiSGRI%O&B``M|&?r<*9Kihz9I-6h$$ z)A=g`g_3gp$B*IwFM8%WTGae`IG^c#e-`ue1_OD0o;T?(dD!mr)^$oJtLJ4Lncc?? z>6lih|7AQLfKbP@GK<&A$2KRb zpWNE=Zvr*?vnU%@h=(VmH6KfFl>xIHq%PYtsaq99gs7GI;-fRP%Yk>mi<(88PA9c} zKlbt#U|A|iGoV|9itjMsKXA03`mx!!*^Y0yi%U1^LKBfO1*;0UC$P|hsW)iixo#du z+&-yomZ>$b)&YaJuh^xn=s_Ukb}nEmyh|YU20?s=EC%~7&pd60B4F9h7u{&L5FcBe z%Sf!iW}g>Io^2ZNsYVgMu!^JXbEj~xK@%Ll@fnaolQ~Mg7DQ9rQNA!H&LS-m$)=o^ zSc;?QLvx^*i=rs%@{(MpW(DHwA*w~7cR(1kKzHdNX70BC9^Z+IMzZshuHD8c63Va> zYvbP2<*D_*o+lzlR@j3##I|&j8<76?bn89vU>8>E{bkn^>$R~*onx|!Vvd*`YL(m{ z5;jNV3y=%^7lECtn^H<8teiEsB-cfyoBmabtjuud+N^Va;)QFwvo7r4nI@DU?aPsF zp1My(a;hYI)I^Kzu{51l1@5d21^XgpE<_6M!+##E4aGEOyFa-u8(0Dl3%!yFd7>gxIaD2<(J6o`~+C0AB@EBzs%sL3!^O&ztN+6No%6!5wq z-ruAO7T@t!HRAS{bsI4FExG_ktzlwXHKf)N%WI^~zDZ4_CfjYS!%+z$^wdV}GL4ol ziGDPiA!3&!4=)#RLERnRgT*)N=+_16w+G^3v$11|rn`2a5M>+~x>wHX(j`z68W^I!vc5wU9^0w{Wdt5;0Dmo)mFTo~Fj^x&f)Mf$1Jr zCaU*1W)T2$z9L@Xc+fugi`C9HMp z9x}R>6=^=d-XR(;D45@5HDGx?bpAgMZ3~`lkvvp-V)uR$KW+uk=?8*|L@e}#%4ueB za3B=t;mV*14uxR|aGh;h&;@UxLIq3C`w=Ka-p199SV_{iNcy}hps@r`DDGu9#;&L2 zJ*a=&Os~}hwJ(ynXrs3dq8>WE?q@scL919MoK;6_s(uD}@8&J*fW6YPPHE^2pLF-N zSaq_zR>PZJzwt9Ye;_ayd<;Ke?r%_I{n+17G6w0Z*r6W8)hB;7SA~*Q>XE-^c0QsO z*4an!<_+r_BBx~kYO1!|Ql|D`kg6elKXGQ8AHo;?1(QSHw}s430mA2=Jkc6QJw}g& zwt%`6&`Ty#g+&XmT)2>Z4MIh$ zHZ4M|8{LFy5XZ3K&2WC*Jbt{gCbjVN5(%jyl`SsJ$ z{-&ogRP66YW`eU(5MOmc<5u)|=LBx3G&zLddY62$gUhasm<$kTpX5yGAtLiZeerJ zOjUyYn&?aLWOje7Ht0cZV>ot)Nu*@Q1dt3vwPXNRI`i?O!fN2a2aHY12Z6nJyC}yl&-8=KVF-%54yaZ_IcFE zbgt2e{E-rLHSw~Z@d+EP=Z`SalCk9FN-m$Z!z-x1i z@i@>Rmfj#P>7Mr3FzK+CwUt$17EdzSDYdsP0R>fRwz}fm%wm?-AeJ_-+^+=8f9db* zU!%k)rznde6p<5JQiAU)`cqg)_@^-cNB4Ldvjx~RJNMTh_%`>OSGU?-(9J`Xe0mm9?7kx^C?;WYOaTphL7>=ZLZSg=an+(epqE55)z5$P4)Yny& z!53qEI9HG`?yk)bfj|0$V03@L`-QV!23>oAL;vP)d`%b$XNhxQ*jb z;p~=t{y%WpB1C*GQBKIp`a>c|h%jJ#I^aO$A!nBTV0Ogy|8ZU`Cfa{peWkjIM&uEt zsQYT{;;(>W1HR)kG6vtS{1G5+A~4%UdPAV}C<2fb0hpb-{Slxf98oY^QhHxGoDm@X zhyZ%;muHw02a0C7#n=y}Lg4C9D&(;5M? zJ`(`5-8C}=(f~wuTJ~TVd^1gAfFn*s!02+Ze)aq|i9O}8xX%&>vvq@K@IB-PG)wqj zvlR{r!N)M$?Be|nBGmN0P6I&tB;e6)HVb3~=U-DcmtXlJk+@QYunbeKOz?NKz_-SE zkSW5gKUFX^@#(1hZF(MD)0M z{_lLT@F%fT#`NYe{&Yp@iU3CCE0D}HC`ysF5PFQ1@?u{SCu0}6UFo5|)yv@;sbU|+ z_)RhF&U?I*h_#{@{5$P~x8I#1ifIW(q|-pe!SgmrF&D|jof@x6rB)G1r_!vuDi9i=j7lOA0Y`thF1Q(Ugw~_7n8Jqm!oiYw z?0`Ic?OAXv>>cI&iuQXk>6qIqVjQK?k2z1!jAg{-qaol6nz_bu7MR^G3a)v_HH6%s ztI);vy2jD|K-fmnS6d>dHEK**#L~slLQvGO)#laQCOGWOh6b>1Wp$hVCQ?K;lcEy| z#NbNW4HEfRGlri2tr<=7xDdsZxDX%hu;C)lN1;hqyfd^ccBq`GMB2ZY-wcCHDOtf? zH1gpXaKq+>W+?mh^II@{#_`cVf+x1be*ac&&q|AP?)yhgRh3a&Jn+8;P`)QCwf4tJ&#}n;7eWs6_ za(ig@)q3(3SQFt4eRT$J@Mbt+|J}9($r^qaH+=Q&;36opsD zAsdN3gL!^C5f0Dr9)9Vvfs@XnO%Yg+U5@Z#u>F?KsRyg%`{$#Ot` z&Y8R4f`(%}&H;9uMA}#i(%=5YXIk+BA4)bJ)R^5MDW5zUhI>+xW!npB@UQM`}exI@8C$RHUr;=mcxK!5LPGE{_2d~&rz zDT~ikg?ucYtcfq&cQY_G*cp5b)m#~SCaIM1LAHokeRO#MV=4>)+H(xp=!q~S<2|TU zX)0Ynvc8xZCq)SVS8-E*P%S~XtU$*6P||U0*<)o?AFO4kHs-DM9aO1+_bVEG>0!bm3XojiW(qG#4G5s7f$eby@vzf0f#q6 z@*?QeN8tpejn=L-nU>$SBg&w(sy(uh_mxum>u3F@>i`XJ#TMTbso*m5(^;ugBkG0_ zQ?Tj44Q_0jVh*`IyA$ zz|j1#;^9%#DZiyt?v&^6UD>Ob37~RG&w-`MqWz{< zzyQ#YuD`C05R0;AS`2x7&Fjz?&5uj_rq0?W!d7Mg7pY%p4RK2(gPpzuvbu?(6^fxM z<#In!+m-CaNuy07K6|s?NUw*RtOO6({G}Yq0{7;Yjr?LT;a}~ExM@}oa~aKUEK?C8 zV4c$sd1US14|#C%IXp9`E}qskZX76Co}Vq`U9vU6@$ju&p`1WeJg%4p=9$eOVI%{| zS`bg7XANOv&V==o3F!c-7$a}&=HXD+K(W6EpEuwOa5laZ9qD_rXXPZ=#)VQn*iQwW zZ@$-J!4%*5hW(R?psO%`F2MF!jqC17>Ibv?WB**>dlg99m%w!W3)pm)m+Fpq0CvEi~{3vnwzl zF)Q$_RgMKKml1`$AYt;qwHyQl7n6d!Q+~nef*&Rj7^F79oVd@(n}v)R+F|`Ybn|yq z1d)*;yyXQVYa1>GJYo>C(y^$8X~l^bkFzU?bF{md0gAKhH~!Jfp8NbB3HV~th}y-H zB>sj1y;dT_jgDq0yultxV}SfR39MCY@pqJb1mIM%0iYiffUJ&y zMKpN#HhC&-2Y_=hz~Kl`(mlj{8<}YWr1DYIJk+zTpk)WbZwOHn_`9?W&MKAyAQ?ST znEg`|PW@dxOoUq^**&rj`c3MvoiJ@My^df9gB?+9V0pB?2L_LlyU(2=&K$g9vAR|YlwbrD>5 zi(1ALdD104l>$7+_{<^z+HVoOFy7L=2o5yIy>fCg<+>X^>GZ?Ba+rx9S}BxyJuHWB zdsQ-Yn>ORM^5euKhYdB4Zi#aCAuH>I2Bi7|Fi0>g*v&aXEAii^O_qO5gi_i~hdib- z;+9cbF^}#ep9sdp?z3pcBTez1oy5F6FZ4mnkMr`$8q^xtpArzuU7Cp9G!RYafn@pT z7n{<_K=DD?enAm)cUKe2^|BH{`$xpDzjN0F5nK|jzukNk4g9C*VO$anqzD-Ne=H2z zhlMc~4!-dLmOtV5jX!2($h3_r%PP3NghihWhk{otCZOqg=yOS`4Hcxi3PY z=zrImVr0-bQ?pu9r=R7PWE_xnQ?tTocYP4q#_7-7&g`#8Vk?>nLQ|Qc*ZwJ1%)VBo zJY-)AIyPYC8ICOf{-TCV+S4ywK5orUe`9D=ZOyJQ{pZN@savzGUBlT|Eg~~14nFs< zA9e-7HB{xqAsPLvboZ$b%g_O@_vld4n%m%Bb|8mf=4aI-Cmt`SjJ0$N<~(6V@@EuJ zP+D5PxkY%)Ly4xBKAFFs6i-Z9F+#)Et}5}=N}6}@OdBFH+j z8s$bvw6rhPB88Ne6*`*4nxB)ni6DX?S~YSnV^1QU#6H22QZnVR6jJ9XHt?LELrT~< zvExBm5u)g^@Mnmha}Se5nMq!~fr+HE{0f|LPzyM*q#mSRQNp#oB~U$R75G0vXW*0U zL1!AvLQxvlqMyZ(dSuP>jH9q42+f3g(}L*gZHWOm}ONm zOL=s|ScS+g7hPl9r=Q&Wn`q~>Kgx%;V3fk?rpbHZJrg4MFiI{5Wp)0$6Ar($ljhlW zAU<@t4RH;k62JWze|TS`QOV)#AYlqwWB8!CZC8q^&iMwifIXs;(54{5-1hz8ZfZm9 z`GC_++csfUHEJzsUDYAE7hy{n!>)K{6~nFfmgcMyxWIYq=+?Wsq1U^*nf;0LMVaha zdc`}N`Vn2HWJg*3{70PEYnI1-kN8^0+&x!vr5#j`TjKvtHouCWp6jPUe)13 z)VHbyzB@A%iW6;3pBZ$8j)=ppq zwmsO2r=b%5eqci6leskh@`B#~%=u6n=GWCNul&LRMJUp))|h^7(}A@(@1_3td~j&b ztL;1};{^EanT5#QFW7{a!irD%9w+1WX|(j22u^<84URwVOy8lCg`$nm32|)Pr|6h( zux;l4c0EKn>|>*@DktX4epi>%?|b*)6BA!hnnC+^LOrYIbJzE;kAJV1)Mn$(CV+%| zwSxJ2c$PldU*2f>%%4wUpUbUN{q)CuII26(4}8@2=GIpHQ~%nZM|ou?B0nnhM>|{? zh7lfb6jQEEbjz#0fFtsSy3>h&Kd2X-y zXZJ%a&f&w_h8lB-({Ck2YATAD)uf9h{oTe&Q+UEuElv^ibk5 zOKr&6`cX&`A^8RTFqRAQR!;g{#d9u&&N%0@9dXCXbX%gYv&;lG)0YD={s#-P2KD)| zC1d3Qp#tTBKTnzybi;qth*X(e8?zI)P4Cr>aMjm3H+?%Y#qVT|O7UM5_JuL{|8$2? z`IZM}=ehGk@vB}UaXeUt5$6%-eIrTuInI#8ds5y-G2oKF##?z6&H;7V-^yTA@xEpIA~yf~ zQwTrDv*4r|zs95JLG=V^@u2mY3&cPYa#W4cvpJlTvh%v2%<@aP@s^%Ye>#OSCg`D< zMatDPITXEfZfiNoRjQ{X4`;4F5Vi9fQWzUamqj}Df}VCEOr0BNhLWsOeX0n_>jWf{ zKEahNXnJN&j{vLB;9{bWCG4tiAje|k^$*@Y?O1%%WX%=5>Am26GGw)$hKK4zzGQfC zztAUgp-3A-D&~)1!yK)jyD}T@G|5+F$L+A)<3AB5CBz-tYJbt6&+FD=-O#d_f0rgz zij)}goZNK#gQT<_q2H99pGwN7a%qh3ow7B+^*|M--k<)@+%V*qz1+rsaqj0o*f6Jj zBNl%U?6XQtS`-LJm|X&dH*h61AT97m$R77@MsuXkhpZ6(DZ1U9)Boggn=IkVUcuct zdqplWk;E|ej%zaeQuPQ7_PA(Be5g3H+KgV5J>F$N@P&zULZ%beP?oiNOHTI1iE}@KYWG+_m=%clRfTm$a4y`eyA$)3X6@x2 z;}%j{U3BK}bCu0ItmT@RXMAB9s%5aB>76>!sktA-t(mmlywu6>DwP~j2+jH&(fx%D zFAAK-5`QhA;{lC*uCU2G-^dARhb|oDQJI52g!(ox3{!kE@7?O6BO5+?aF6G@%tOB) z>d3$ZV|z|tH)NKppnUITQ)+Z!I0SS+*0=pZkhfmPDgGZi^4{q}8ROkQFuUl%0nxX0 zi_rOk1^qoH5UJj%Fjy+ugy)mPm^;}!lQ@o?Z@~ddHN9uS#k&3GLT9Y?w`;HWiqmzX zX`A=P14T-Y-RM33J@=t*`1hxiD&ZY-Ki*A6d8soQmd&Xf@lCI%{O||vtRGNGuW?lj z?_6ZVjApn|Lu2FzW&xgVW&&dXJH5b^=CoF~FW_ z^~ISevx#p{K(L{CaZFPS#aBx2s;|Aw7z#(<>VPoL^WUb*y^EFvJX@xZflCjvC!#TW zmWPQ_olj$%b^-al1?t+K4b$OoKD(X5BI zub4Xgx7sD&G+NU#*{p!7_jJv^Uk+uH)(Xf))|)DQ;cB<2HCgjTNAedVVhrnGDUVme zfe2CbPwZ(8qpaK#c8Z?cP3fHoQLOQ6wuh%ugm11pZbT%p>w>SLg@lSv!{-TpQ1sez zo*{*9lpWjL#hu_NoU)$9g>K6@x2xsnMq)l#x3<#f^CVjLtAEagpd(GD!Z9zYJF1DZP&ely?BehxclE-BPSE`4`LQbEK-#3GMtL{HX8H zta6vpqdt9894P<&io~ybiNy7Q6XrMA4TVz0a4En(X$0=V-em;t>>ALoCGR#&Q05Mw zP0N$?)O|oSMc-x$~^SG}6>?1MIu)hA{L%NGK zdiwMRJQrp^dG_tw{vPv|a?a%acC{EcfWP$NgPeNxZDT~|dqq?59Xg+11`k|z4{Qfg zBGp zPTu}>DNeuc?IR3GdFW-4*SPPUCR8I)ee6BuFMxUz$$H!*iVNhA@CqQvs>70}_5HNf z`S5F;Lgj_Y z+7U;7mG^Z%6j!^6&zA8+%|_x&y(}Po{)MF4KK%Al9F`q$jK%LF;}pFl5Z=)K+voQi z6?7Yc#_i|on^HFUA8=M#Tn`DmLhH)k7zgp>G59O0io`(A=rkbV!TD&0qurBL9_S3@=N`12>px9G%onM5! zlCRD!Jo@~RIU=vIo58oYX3T~obKLA7(PEPaEi9HHe%^Zn8{Mgd>U_qc^joP*6C15z z=IlKAPv7O$`6Byo4-w?K3E4>gHMT+QTnl{V$$o8LjX(EjJlC{uIA7YH5EP0jlnUM1 zSIP#j*lr_t>FOc!v8#x!VCy^l5DgnCLwNsfEod z$U=)X5hY*nfAT{*q*Fc*+c$`{G8=H{78|D?kQ~$323zNGqCz(rm1;?8iU_tQWmm&| zR@`ybL+XiV=9ABg4r%>t+5f4sm0*nZMKE^*c~x3oK)V*5#Rw}~bcPzw#QO39URUM;MrV9NXZF9ABd*&)&w7#v!rCC>qcuetPJbHbkNSnse9gh^-vZ)9 z&Q3wFz{Y20e;W!A3dr>fUZ>#^l#sVL2JD0alCcI2r|U*eYhaBk?H8CeG2{gglm9D- z$Y2d{@PpV#Wg4*R5TEj)O}KE_;V^^Tpt>TwFo9zBHy`rrI{P44$lk%^Gesb$QR0xm z15%u?6e+~}3G)M(QbL(156LQiuEw95o=PhEp*n#f3n?M-ju}%>dD{nnK3+qzY>ct! zGNw@dvF5G=PpIyamp-N+iVF9y-MS124*a*& z&%i*Fn=gFyZW8PFLnPgT*4)HK^26GJCCOS`W?krLEs7*{Hjgs3!j<;~ zqCj{-kT*_7A_WE)foRo2E%X#k?vWu~k^%@eIJ>uP(?k^Vuqlqv4KH^-v0l2~o!I`o ze~mH0?e%34WU=`D-x|lF z1N~?r=%=k`;nTfj>EW;;nvv!1a4G$?3jbm3l%Af#n-~|h8TL90w(gZ)r5BTZO5aV* z_3sMrk}sxCrDOTQIIk$kj;|A>{c5);HYUQd3)rq)J=y~R6a;|GbyZ4ppgDYxa%vAfu3=MWyVs??Xd76`+oZk}lpB3L$w2Ae{ znZ9IIzo){hnmCLv~6Vm^@v=VLIn&MX#bOwZT>q+{~b?*T`rN)kw>y~47D!suz zJxHC>Xq4ya$@;2drCFG$0xA4{=u?avK5kzCYFoLMcJ`ht`^cc$F^^=Q0 zT;xU31TYh=gmq9tNe)?^f!(Fs2WSM5p>LEfKhE0I){e@BL8nILuYC44prshrAb}tp zI+Xx3Q(^_%Cj@bi#GqrD?~yy0JS5OTU%?@gDtFaf4=iF32ttxzI3xrFN4q<+W_?N2 zM0DtsDh99U{BK|3PNqMv+1A+0B*%u+)O{K@gC3Q#kF5fd~S7 zw5e-l^QSNe^IUgN;x+rB%}ab%M*84O+dBidayI#9lSQ=X)ECsV8E2!$edD@Oio=&)72;=<_A zKxA#!@?lBRhe4L)+DH(SX$cALp;y$1>pbygQ1qumi0GxEBQWKI^Ex-g30Kivzr;TB z+C`ab`DsT}3I@<|5{Yz;*O&FfIH#;K<2a)ngw(Rm8Hd!c_8*t4UWDP|9x|G%{InqQ z&em$9gEo?{gBPZY#uVthf{o4%IRzaPd7`a#Oy}@`8JT1ED-1gD4(TA2wQID)ud70v z|Bg?Q`-kzeOi+67djnH|Ptx08h~?D$I&2~c%lF-u2x3v- z@KduDG)@=06jm2$?+uxuMn;h{7>^rTs#6C3TMB+)Q!H4!Y$+fG=SRAO0QHU7q$@#3 z2K-wJe9F|-8+5-RW5$6!DY4LLA4JG669f&z8PQEKDot<_x_c!5W*cDTa&m<;`NC>~ ztUWlP8}LBz;tdSnztIm6dJ)2)GC6ej2KKQ^_z(cg6vX^%XS+x-AE%Wq&KEy%$V{l3wnupkWGhk@c_Fh>y| zK7Z9L*cHw)bnsgAh06Y({(KlWkNUT$akIKS;osBZwX}jJ(Hg%1C~bs$s|d5r0~0Mh zqPBdp$?i6ht3toB>0T0L=Poew413C~CUut^s{0wLI`5RQizF#k`_tPAmU_VlefxX} zOgGy2>cQVtBxrHeMxO7W(TIK0`(uTKM#n~oRbnii1{bQ{48uD25-*`5hv4%$VOLnA zUl*8dB#3Qu`vb}(fE#x}vWPC!Hhj@u)27W7#CD~E*e)QzTU!dmc1iwR_Mm)MCA@?1 zuA-lM{?zyXP6Ud>lk`5WeSugc)fa52_h5{$+uwqxWiAI!t7YK>$Kf9OtC4)?W@_3| z*GJvz#CC{4&r{VJQ z#e)59&x=AM`UY75NS=ZK!?G1AN+M-Kslr$6-=3sy zt>@Rnt)e`yLw>e*L}1hnZ-UXsi~}^IK%iB-=6fEvI|gzHV?mU!l{B7!@%tp+0g|?k zTX$<=5Div`@}LZRCRybn1qQs$AyD4z0n!vsyL10=Dms(XG?svo!!B?QV5PO2nH_Qi z-U%2jd3@O_7+PXOLqdb7+MA_d3M1>-KfY2Ol?e5BB65$aoz<~%oj4}A&3x-XXI|Ys zP6A;>Ra+HdLh*WhHm)1ac|dOS=_$j?Z?&RLpE~+vA;<(@kt6oUQjamQ$;huiZFr$?hhh$_1$LWg183YuKf|No7-X;JnvuXOy)IUI(j^V*0z zS`$Dx77@6+H5jx1>e67sF19RZy}51FRB|A^0Y8)fPzxxBeG+gDF?a(`CjYU2t({re zUA{(ucq}CjxX7Cz^l_p52<$FLBY+#oY=Hzyk3sKaDVuWJP=n_O=trHsPV1fN)AYfb>iZn2!7sKqE5>euI4&vMUo*(MBb906|(bya9zN_(rLW3BU_<=Y$zZ z^CZ3maCZPzP@M`7uk>e@P&zx5v;tTt1j|-NdjDKXE9$64cHgYYXD

0dfP}S!Dl=2VX(mp5%D@`zZ*H$ ztWjP=S&%7P#SKVG5n4ICYABJkOtK`YO8;&xsaiQP)~|H5_J?S$(p0-7sV3Di(}IAz zCeMZtcg&PjgxXbH>2mR1Nt| z=&%y-n*z=Gl~1_qdRX4GT3|a4sMevNTKCllX|EucE|Pxzr{s>rZML2Jm%Bc6Y}k>Ds*y_v2NuHa zrPfZLz~ZJ{9-2D@T77la5H7K8jYK)WXTYxS&77hza*IbQTMaB2hSb=4cNmX`{MRZ2 z!)3mIPXk%S@)kOTK`44|V&v4-vdtdHi`evpA{kNn#AxCMG6R;yK< z`@IYA)@VoRzG@d~Db|?njazIPB7Qbsd`YdckiKb8ZnzYcv)%4ZPLTLUjX?V%YB5^< zPFJ|NCYcRWxUMZ)vt7Qt<}EpW^tiFQvBp;&0C%{ao~X+0XX8jGXlZJ9{rrVnDEO+S zE%hcmLVv?+y;FenQ7gOU4J$nc^p+)W;$fc0a9gsrUkP@csZD;dchW>NzM2oxw}0Yr zUbRT{C@&j6jgYbsMpcZD9qCs-lcR6CbbcPzf@T7m^3eapUgui7)cEo6UlA$3XvFT&r``}F%?V4(s#sbQV@_#af~t8xrXUw^~x(kGn;VC-k_L{}Li6a7_+>)wXP&dG(YuJw7S|;vs8=+rU30sSWl96)&i%mUfj>Sv#9;R` zEh50i0brpwuhPAYTMgdu1xyz0uz^D*)6O~zuK6cs@`knNB^@v0H)M7Hm|Runt*>OV zUCpurq-7@MS=9tXw<5#I8Dp7fQKTN;J^e6uB*3CyUQD%K6j_;+47RS_HLAh3a-lzs zSx~9093HG*HIAP2@!ZE+QXv0J7U@4UzMWdQ;m7Td=I<1Y{EIE$DXMvq)lWa)LHziO zB~sh?>Gk$x!2#mwxXmy8oa~*Qe^E2|x88cdQ_@12Z}@Y=OXFEn^UYyX+a;c6SwGil z6`}Bmu?5a(8Z~2+e)J<7Q2xbgvc0_YQ{Bx1je_>&aZu+iNZCjBL9U!vtti+9I`L8k zCh?VYl(W7Z4@w%KaKg~_nEw!E@LZA(-aO(R3W|rFDvud8GEWUG7@+PjoVJ~Hi_fw? z9Fpo}KIA%DkesM>el9!~wR>3W3Q4!i$+A1Vlb+*VTMT?6m=34a?NG6~>vRCW#VbhG zY+WPUe>EGj>oOX$o6JL{%L{biV!=jeC{?K2`o`Az<4PAi_`yoA`>>ni6BNs_=3dV_ zeLtDj)5`Dl;d=IOT{T)SH|d$~Z*n~A%S~j}_*Rx42BJAmmuOeHVT9Woz3BY&-SUd< z*@4-=AE5AGSo@=$E@m*^a{{w-A42fGkuRUniZ{XxyoV^8Nk<>!6zzo>fhOpG_aIuv zq-%Apg41l@%g&7gY1b;dUQW=j`7nyMwB z3)EC@*mIU(_})?C0s7Z$ZRcj_fop6>H&-P^aSrafb!e_cUH%NAuxHe>R!vM!hA$A) ze2@eV6r)v*^HsP_Mf{Ea&aGK}@O%mfF6rGR;QE>X{wh zA%ZE|{po=5{F`2)uGOlSq7KxI!rV1c49DC(u;9d*Uk)~C-5*n9wINq-ew+`RnC#ZXGH}N);u`5AcW` z4_R0ImYJGOv}keszUiDXCLpVt%B!rcXqDM&!li;Ut&-Tn<$~R?S*kc=Nl2n{>l5l& zj3Lu|=kY|`ev^CHF*C;}Qm`OXmL42rybQ`%#j|)l1##VWgWN>%R~kXM`5!rkRQg^v zr;SBw>gS?{%}1M=t72`@Leg@ya8q*)^%N&6rvG6?3HBY={a{HN?fM2`uF`qxt!yv0 z3-4{^D@Hi%Lwl^5{q}saZoI#BZ(}EzMd*3_$@i~s*^`Yr;r6}ur1i)-5)dNQ=b&P| ziBWdalUaMkCu03$(sr)DW18?BKk3mG9Bhw5uwu( zop&F=R>*92mDHT1ToWqPPaj-1=(gq_t?>S)b_x+?EfjqN-x1UPPR;~?Hl*40^Q#Gf zWl*yVqRnF1Ik~w${8MY++EaehK^!d~a8`;bSZwoFcS#= zu9?I00R8YRf-i}dJqx@Y!aLun7wRY(bdi8^R;HBM!wN~Gj=a!%j>BP%JLn>#-5?)k z0~~w~{vS$OVrtY}(u$w{Hchuc_Md3ckI#7cn--?v+3TUDescim)m5Q041L?2x@u3s z$a9C#0<#Sc1k{IV|AIFdlG6HP+GEb?2s)7in~EdIm&r6pLm>>Of1Ctjjb3NSYK98G zJ+Qp*m|M>!m@+s%-v1C?I#d%+6Ff`1w2FhO&`N+?ONt4eeL~T__6vDB8cm;N7y{9< z0Gl=yyP|#iq6Ux?0kNr4!Ns&1oma)hl(pZsE|DmZ6io!_Ck+=AG0l@ju3R>)e3N>f z1hEM0*&Rmx6NZKWrc>rArZ1t-%G25Q*!O2gZkxtz7S1yy zHgnnE70X^{*7%paP((WMLXsTLpJ=67u%asP?Dn(A-(b|SUM(-%e#3U6MJ7l4QtW*Z zCye>uURzV!`?)JH+^61cWqn3F&dqDR{~J>3+!~Mfw=G_8_e;GkubaJ1uha7-I|5)6 zLBs7i+ltP(*W>w*?Z@*|Z+TvOzb1(Meho~$d60+*49gc=Alo&|XKeLD%`bKYNn+-Gh_A+hlF;ocZ- zi;v^pY}O*xK>9R8?9$O%u^sf*5gH*pRbP;n3lh%x7R=>zetpHTniMbN@4@|?rWW|7 z6|QxvfB&XhCP>(yYx-9l7_EX3V5}PW=6P485%|{nK&l$Z7x8CA+zZ$Mn+7!Q!3d z6w1AC_sg%Bv!c@`8^*NR&}>AeobN+dWvY1hYI7}vZHQV}WaRp0!;sZ}Z>Me6V-w2i zlHU5#BsdHC-stl6ucM@L{u+ee#51>rho3%c{8udnmovuZ-Sd?xqvv%zv+4OCSmt;6 z#;+z8W3BkF&zh`U&-cfEP3F2=Z;CHSMB(o>kA!#SBYCMo?w*^Y!KqhVd!8JPS9b6G z%kMvQBmHJaRS<^6=~A}o2pfENGFpWH|Jsi#&`TF5hEswX%;xP2>Y$Q%$}BL?P&kfR zkBSQ`lQSNoJcT? z1pE=7F2b1>TW4*i(7B1lkl@`hkFJlE*BPCK-V7H2vp4=(eg7OR5PtyQ#YU}C7X#GX zwQ&2nNce-7tSe7~jSMjxgBNZkK$EF1!v^esa7F&jxpi-&75x=Kc@hS8C(;(CoIn}X zgUD-ufupJ^rdD-epTL0@yFp4TW<;Xb}zA9}yUHqdLsrmaYK$_Te_r4*s&OaD4%6gB0-3g z$-4EhS-4BxWsg72c1`VUe|~ITZT%Rg_zxmn6dLy-5DrET_(^g7Cf2;DL1dvsUR@B>2&3>%W{K4e4ta6o6+2E9 za#Ho!1wAnCyvv$B1)!!I>u9AEMY>ojvyM#^%Ffk8=`6F#;_XitSuF?B+XNT!?o0<1 z6y3k z!@vi19=fZDj9OdLdClaQM^)UF9265Y?#oB9SgYVtz%TfhC>+oFqkFy#$)ZF2at@b7 z7nRhJSrTI?a`}}PTMjsmZ%)xt^JF@eMNb|&tF5=9Xfq5OsOAy0`Dy)U0&VD8;S)#tJaWq$dSv=DN4EFA4ZLK1Z;TfjVpW(`Zz zKw@hOn zFO*%Vhtge!m-QMfgpicYj*`d37%Bl^7PR86QINNQ;@%k+$7T$f8hDr8xVe zSZX}w&-A^pcHE>}DT$CT0mKRoC85%PvYMbSE*^;$h6b8IFJ_XQbW2=X#pYyncKddG#eAi8s_?s6qi6DBftkq?dQBx{uvMLJj-rm@c# z%1*I97c)-x^4nC7)UMm=_lDo0fEr zGo3+hwD@xhLzMY4=vxx{__MlH5>n;k4AU^KEii6o)%^%y$+%__3zj~7luYTfy*MT{-a5x0%?bHw)7x+&Sec*Q$;E93 zzj14mnJ5>IVS;nc^ZwC^rJF}XXr|M~bIKUUT#x|B3{L&Qe|8e_LWK4I=2*@$>U1O9 zp%hQ*&B@A=Li*%*QpBZvqbvr7XwsVYeO`#oCPEr+roM*WCambOrM@)n&R;sc$Z}g} z*;%$uU1m2dOy|3^Q-7AXdOnB{aV(u0!`OuTho#H7r$Z-)V?qHW@6J>@KIVO0kG#|XYW)1_AM{AQzd(mNCjw4Cq|WJQ63ZdX zx-loJH9j$cHgcr^C?B73d#BJ1}tD2jR(rQ5bqM=jjWtj z^pjNj~pzLx)S(h!y{hkJFXe2M|-6yo1H)Q5USx2x=CR+2wMu z|1!2))k`CU(H6eHGCPXr35Aub`*R_z$5Cs)EBRtTI)jqp^s~M(i5{u(7XpFF&oi33X+sg(kJDUy)Me?I(t#%4MK_#3s;TI4034 zef3^TJt3 z7lrPxL7CYp>ZZrDFGSwjBB@9k)pBD4cFCYM%M=dW5wi!p+ zT3!MxKcsT{&(~(s1a7(;A`2QjzV)?X?X083fQCdjhC)<1QRUFHJji8$>4Oju5u<@4 z)!m(zls+8SdqtKCp}9%}qX8&1B`=C#-=#S_j+426!Dig$)I0tKTmX+Fgc}k-a3D%5 zz+QXqS~r0QvyWi02F9Sn$U_BFH<}V|5gg+AW7&Rf5k5&-wu`+!A``Sf+lSZzFi4DZ zajSVf*&ozSIV}07a~fPChfdXv4jbm%Vsi>`5o_HUjO(*k>i+rmbcci|9*j^y`_lPf zPQY*HoIS7smQTMi;^(i56$55X3b|E%Nzx(gHO1kKg3~2u1X#ES0SProI9)c$F<|X3 zFgRVaG=l(kJrL(Lit$$zsyZ9H>-2RHuyzI_y4(Pu2zNlo5ubt1N1(haC{Vr<6zBF4 zC~pOlymf-KZ;VX-Q-SrTOF2{Qpm4Wo7f?97j`)WdL|_od05B2~io={1!iecl2Vy4? zNWoLMWzJod*8~?1oSwLP-aY-m*O+7|4L`{JH0Unef+N@*~DofE?=L0>cE+srA{ol>&(fJVp8!oQ4-hs+OFxpUe!< zYVgJZ_Q@D_Phb3N_RI+3%<&z9Nf@WgSEHj0LK3V^a5Q1pKT&Ipa%)}6fT}eGywdJX zn>;Y4Zt_*X)n+-Y=J7|OXD}Kn$=P!Oeb(vczYJZK_==uO@XVrc0e%J<7^hHwG=Hig zpg-hK1<-$%HZ&18IE64K+@Iq6m}3WgkIdE$qtbV|3Ct5=kOea*hHk7<;wqf%b$Q)_ z7f8UmJl_kSdURskeJYj%$s$k=-5s}XG$VqP`d{KI7Y5`0Ebz{oqw6VER()Zo7H{^I zv4VjuB{%qp91rlr$;7xFIUqBlx|0fWYo7UOK-KKD0Y8-Tb`k3;`Ls4bA-O)G&pfZw z=*E?UdVKZTK5zDe?o5lQtb^e5J2n^1(ZHl+-_u@VSVw;*dH{%{dZiwkjc&F^*GU4d z@5L9Kn2BMi5Yj^w{ z`wv=$*4%195$i?wBNitKvWU<2KX6?d_=o(bm_cFWK`7q<|1@1-|I0lEaZuEfHl~AL z92odp`Pf%bU~JEzh0gJ}S}01~rN3m5xt6Q`5n|2O6iXqZvtn9P9By-=z`ZEQl;$%< zYctWnzzrKUjo6j)oF-Z!R)HV61u%x@kOUFx4x1ePmtzEvu++I~de zKS7$Za_xNi<PD(sRfU(iusYSuwk0=VrpC$vz2*~y(eNBbC29>T>cdQ)r90%#;89W&z&U9n5eWuE)}y=Eh0Nw+Qazg)ykKNJ>YXYvmZp8W%} z$%k^BZ?40KvcJ8Lf0Vqd@h?wE;`5g3U*ZqsNbZ2neM2~H6@EIMEJ``V0nm2JBM!S0WSAetc-LW#3Ws7rR zQ2ulkHyqU%@-7JE?{$7=n*=p3NLi5XE=V1@)3mkemgV!b^Q8xAjU^% zLJ@9y6`p6-V%sD0di96-+!@pT7CmbcsTTqUz;dRSHi?Ap`8;S&-EBglgfE(uF5=-x zf{-rT^{N;dhF>RWEzjj{!VhGJUh90ca>V5iDdY|syyVqL1`dQCH zG#9Fh?+<4Un4Pr%8wUBikPCg3x$1Q43UG6PTrNTBUD94rAN<|=uXt;}+82KRVg_e>)eSpa|oBy)i1 zAW&^AfaF90k0oFcaMM;4P-Y!M#ZK&DT-zHog|0*nOzgfi01ASD7^0plur*wcDUSSa z?hj}QNfKKc?wRYUB1xLN7?nE!u$UZzi6XK#)r5=;zySkbKmY(H()<1tU4G1P*fjU< z4#1D8qO^sL^e1fot5qE<0rVE1jNkOc7VC9M8|-i^;d@RW}i z+l$S)acNGTA0(U~742I_F5@y@!_l%mBW%ChxOHZ4jb)rtQ%0@C4n{j)R`!lGPzasaMO)|T|YAOym z#EIAs!2*ane)!aHQFml29W#q0I%{VzPsDZ+pU6L-%HnN8nb#;0i+Y50=qwR~Dv{#( zKbis~u)3TcM4oz&%Ppdx=Xpjry^GXo(BeIv%wg zsk0E6q7o&D9$~enu@?Lf?`1>bMY~4dAv*xiU>EoeE5MIEB%4?1)-0k|f7NG@6@+Yv zlIc10-qs~$0MKq!B2R&5Y4tcGb?p<-T8;3lMD6fWU7I5={s+vWql#zE?4aRj0tiU` zJldiLX?3l~f{?Rvbog>d$fJ%TC6>ZiV^&%mJg|5!EhW+H5n?ftwi-o%HMX1>G z*nk&u+v>UG7uzVj`%hw;y>&*d7E`(C#0g(c)wnAMXJ2I14X}vyldCPf980O7Pjy=ozaED#-e4^lDYeoIL2ihAm-oKV1x2D5Cp3o&E+onnzp;8V zKcc&16CkSk?Zfh0Q5hWzC0QQ#f$h5mwK(V76yIRsSlTF>ZbS4;6{+Jgx#o5cbSpkv z0Kv5nvIC`QCoJ5%V`@P+rwWy!7|;SMm(uIUC`1;gP*7XQY-AflEH6%;~iH3Ex|b*i^F0MfeFz5%q-DEwv?b` z@Pcz;%70*>HvoahH`E@me(v>os(uyTY`?$Km2Oy`aSuo?V4uT)4kxz-IBt#tQ2?&e z<^ZxH|1=DwZ1jP_V&z*yA*AFJXBRNSn*%+*5d)Ip{6zL=p(hhakhvYSP)Lu9YMf5} z{l5iS&S{OZb86<|^s_B;69|Km^M%(jAGqb+=_IvL-XarOBICFmDzXp}M~=j_Ywbv4 za_NARn>M;{^hs?yKiT~LBP+>83|@-vZn8;{^HB`dW$Q?DiaW#hj73R#x{Oj{>X0_l z?bE8%>~JWN5e5KuNPsExU-Z21cl3NizRs&(j$&ge%JX)=RQO_S^WbB;19bO08Xqr9&_@)>L58X0X1M;snD&3t*+&4JhYY>!X=KT;nsi) z^AG6>IGWf^+`j$drrO*goq z2Ug;vFr58l96n&A;OorrRvd#UR10{CPXs=o`-}XBF$9?Y53czQ72{?Vq6<8!2z0<7 z6y7=0+9^+mfgQ@ma4I||b{uK8!=Qc14oC(d63aNv4+m-5 z#`g440nj=>z;ru8hn#g{3h1doYG1&_g``wah#5KUO1L*`Ac~pHm#~e1V9);I)z>P7 zshLmKNY)l?S=5!iS8&d?0_1l?Yz zH4Eb+WTns#`aUrkm%c`;S4&U@Yk#?aaq^=s4&$%JZ?wlXw$ ziT2DK{y~#wGK6?@Dv4#pAL_I8_6A}_`wH8rAlEhz%&O|D_~xbU~s^Z~#&n{jYt z(ng>;h=iIFhxphu%H-te2J>$0Zs7y!g}^iG_cyi@l%cQ@&DqpE3Wq;z*Y!;o)H*un zMkG_TIGXIv@W2!UnhGiUTBMLho4BzZYG@Z7**d}&+WO?)i@%xk_f@Q&h5f6^8?$t*eb{P3@ttrr(J6=WB#Ex+F`*xj&i zQPU>k8#WhzBSu7OfCsQpteP*1K57t;XcNaj7wEu*NKEO(+|skqL^gWv!O|C&N~70$ z8B2Gw(okf^SRDOHShM9v^79)JwwSrn9NZH>9%ZN|XzYdIUV`y7(V~4Hv^Xkl$^eBE znNRV;u4-wFG!2)jkZ&;pr$3krLynG@*eG9_tYpCht1B=PuPMNx7mq-tx-O#^F#3KB z7=+)N<{iTNCb0|n+I9b5jIrzEZ5;%ukJu_mGBT$!M>9(Z=A++1`aIVEoBa z{3ibBPSxm*yrOfXKuw?mIR8vkVGeVm8QE0r&sWVfk|BzG#Tei=ckO1v)K$5V{L06p z9y@7?_I$seACl<*^ZaDkunkmqimb989Hh6LZZx;p4n077@Qb{ecE&Cq6?vb<@LeZ? z|Ka01|9=>4EMpa2hO|qf={<%;{#XsH7Zym*~S+qM`0qj4ivy;G7H~)$P5pcVH9+`j*|}qLEP)t%glMV zDcnXs3sAZbZWXz~fUOD8T|a*xQ0t$-Ug&lf5bVBrWB`j@6aarfgo52yN@W66R0IeM z!Hb16&;}TP`~c$*V^RVFNf{U`!u=7?1c-B57@{6PI5VjUkS2ow7lG`55ZCp6fR6zm zuywHG1Muy6{0G=?w_;!k7>We&>`{{Zhj}nG0%|%U20h7=+?~8zK>1H90jx)WS1pVZ zkj<~0k|$D`bSzmBEKNQZu2@s@)Jqv(RDAj_z8UawU(j74ERvauFQg4>LvdFbsX;&*h^THh4VXX54sU)jCfdKz zP9x(}dh!wIwD}ui%LjnE^UqXuSIDJ#GbN=rKI4|{50hkfsuK^k#ANlyTt#n zxekJ9=(hN5(I;;{1Lo z|D%^Zf_d!LZ6@X(JVm$64MpJ4R7k&ThMdPS=(#ha-%k%1^TzI&aCI4Fnp1xGH**WW zf<%$l7W-;A?81antY23d`4)aGEUA~+|F}H*@@kYA_j#kQwYB&IbxRa#fr!EPjlPcIW%mZd!us#Dtv-0-{5NvH*@NHcUs( ze!%Q~Sw#K+FTE@(t7TF&X3^wQBUbm;EDL28Gz#1Uj>k^AOb>E# zA}cVY7rEZRR=Th$V-)N#$Xh&sH1%a!hg%B6(qFaV4tupqZ80>W+0n=hjM?Qa)S}l1 zff!ao;PUNJ>6^F67?45Rn<23K)^J`E196l99*M%K`?r5t;w+wE5dsiJlJbLW4c34y zqMQ5H;dncZ3(N$j51D;`22UUO0qD*vj>*zp$;ZvyVpd`x?W@%(8{yA6!sL=L>h@=8 z0iH39U-%{ttq!c2Ahp}oX1TldnHGI!S#c|LW~lsGmQ@Y;#mcZ|oRQsG8?51&5@fVl zTCCxCA-YVmQLP4P;<`+o(tUYLTlGjJp}ftUizLpd;=IW!Oi=P|Hc3qpyDVCVQ^`rH z^yqcSzpOT9flxK3o4)xigZzI4pg#nIlmdK%06N3&OO0U!=~hw|s!kV)#Hw^1L6g%x1emtzPDp&PGf|nLgaFNW zDox|Ir=1IZ+4rSdH}Ly7TM(G1tDma@Bo7GKt_tASrZvmZL;v<` zld`ofNKp@!2?bdHZL~;(&Rawu1-47hc6Cei<4D_Mmw3m)^8pFd*zss^>z-{OK)j`m zU6V1QiTy2;Nm4Z^q^mOgb?1LHu59c}T{B9{{+PNKfyv}Y-qA3})c{aQ-~em(Tt^%h zS_4}g5PZGGAPV8|VFhE}(qID55+F&<6L4sKV@PgrrJX6J8x0+TI8PJP?0R4Cl%v#& zFattI32_a>9565lWL6^y@wY9PqI((lzt}L95o(TAuy1z6uB5a&HJsTLmw=Z+px1~S z$6JNrgCSLT?1$B_vzJLK>u_{B;RL&T+>H{6X&E01(Dsld#Cj8SK$K*PPXO*XAh1$q z405GDa|`mP?Rn7wXjeVpZ55Ug=*zMO?8j8%;0*l0S**){%oe4?7_aj1Xb)4L##47h zQ$P1yXY;T1sl(%`w_4-68I*Wp=QPI4 zG;jQBG@=aKf5cLs^;Gw&OqAw>6E>}so4Knsr|s(i++Gd;sTBUu?>!YqO;V5f#N2?e zNgB4~__-De&H8uK#VJx%&Ttd-zri~_Y_!@xd6H#I0iwBG^FNJ}tH|<-KqzlI_hh6s z%^guv73*Ae5&5Gs^t)*bi9z%Xsfp=ga02a=^elyVniKx5%#gkb?SCrar#MUZR__{D zmo|WY)S3)(ws}}e%la3#6{Btsr{#qnOpCi(qt2;S0#dBYZvQq263_6W*Z*lNiPS zne<)&NmLfeDs+&&?3cUs^3sBYTZp@Y(p$>7s9%PG5_+wX z$0%dNa$AULbW5<~(l8OWG?Zs>2uStvFlGYS3JNDN5~jDxAf20`#!{SP68o6?@7~J{ z*31T4PtE0p@Qt7U&sF~S`?#mFLzg{;IpjsHHa^_NB5zZz!{{a<$w^ z8r4t97}~Hk%vXh-6&!kZ$n^YNn*%XTOXL|0r5{uZs#nhtdUpUl4e3%?(K7tChFnNvF27H&nYwRvm6^|cX^d!CNUW1A9~BR6RXbGfR@=KMsc%;YW)eVcts zJY%%lGEq)S$m0otrt?Eha{sXir{}hqj#7&tz9=F8wje;S+F%F* zekLh3VP-(?6!W7vG%E8rjEuO~3kp^9oq|kKZ6b`J(@G9nBeEQKn;~n{w7r40(ovni zt)q?(g$U5prGfLOY)Z zE2lX+r$QX_Mf;`h2hug4W{GMi@0*Q$ZQcgd3e`kEg@t#SP_lzIK|>JOq!2RBh=Pf? zsYOa%^8VmE`3{Zj(_MV~%=}oLOC8C9$AMCOXFTR^g3|=?DOSCtzj^t6U}=OfV}|o_ ztLryqTmri^$n=5T>~~Erl7i7z*=mS*y0Qod5`A{RQ5xoX)*brrP389 zTv}N!(k$&wdFbY6$6d5at5Ug5vHg9w;GKOy|J~W8m*~_agYQiv8vAb5d$LW+gcL6D zXnFA3_~9Ihl0dyldRhr_F^!}28%mW*zEt(@m;itlgE27no zlR`n{Av46pc+z`1(yUoxnmKHyQc!hUCeVg^6tYlPA|lTiC?b#bSs$JPfZIh=%ke6g zru4H8rFcR>@6t*^5f%f4&J3F@6eJ5BG2S>)$V@sR;C8s8A-rY^!$^3s86p^pXiZ0s z7$Zg|@}MaFM_gg&+1TU6IoYcdhGdf&0BM!nrYZ6~MW2S`Wa#35TlGa*zPxD9$<^6l zVKSI4?|@2lE_sQ9k+jlAl7X`?$Il-X%qp2^$Xtt}idw(ox{y6uhC0@Xb*x7^bO!(Z zVe{&{FG3K=TLi|?yL>DW4h$1X++)A6jwi;rWT|*dot@h7d=`arSpJQ1nc?mqJ+u=h z&W~^+q3Vyl?8)w%A?*nxd%sC2b-(M;K&vQ(bhdm*siCp^Z&MAj6vCon&4wOKQ9+CT zf|molT@h~9lB7oyU>l#x;}p}CD$o~8TjNjNdNfpYf;}>s`z9;!%FrzsN_QIxWF}&m z6!pJ_rQ+(6!WeLOX?AID0`+L5ohhL?FC)GeRMHj$dX1^k0C8sV@$hOhwoLdYR}9-v zrMsU$5gOnU`oX4WkdoprRZ{pQLmSa#^;3J3EA6TZx6=YP7gyC< z!^x1-jnp@;+|y19g=HIw;u=j}%E=fLZ{+u86=`YP_-CRITdVuvuzCsI>5ap^%UR3O=bWfN0i*;Me{$0L<7sKkF_ zASS?!@9&UVJioIyG!pY+MbKat&hc%U^|#)1irea1~%1i=WI!T~8d!GxFDWi>zv z@WU3!CQWA4u>XFEu}@dpH=Yd_#|c>gv1nTKNqu!D3N@%?69T7XgA>a4_p}yT|WSA>`e&emIGdz=$QlFgo}}@ z|JTJjo7iO~-z}iEGuVWGg3g~?Qyn^6Rg<6SA8Ne!y|ChVe}A}mMSoiQMm8slyphPY zH8Yy>(C@d^ahn?;*g8#n{>I{wH=CB8*=5V%E`uqlEW1ziBD92*bXK_HQxM2-znt37 zpYtq3fZjzW3t5)sdTTo9qS~d$B|nzPAP@1UZ3=N{4l=bsKh_5?n^4VWAqz3bsGqa? z^I{oD99(zPu7X`mN;R@MQ|nKQQLOX|k1`mfo0SYW3*iK0A?eAo^c_!bdF@g= zY2w{BmT9LqVPbl}ZI8{ma!hJl*7uCH$pP@*x1z79d}QP;t?SNSk+lqwCrPf&xew}j z%ZzFD(oL3&8o6*&s;*~7kL#CRIzUnwkpB|HK>bg8nE$)1T=-wr|5xSz*{+}=Z zPu(M1e&JMIMIHmf;8ADM`oUfLUhoa!Rp6e0#8?C@2#y5WApi*~h#*lKn-4}Rf!Z(u zTY(5p9t>od39MyX`=cl;Rs4+G`>JwHP5SdeL-@}&%%{G=XS13G`q^~2 zmiI0C5~bNTrPE7d+9DPyr!JG{28|^z>1Bfq`h|hG8vUhDgUo zKap%`3dPYyFs<{5iwFS+va5t8bI&iW`%}yYv(ZZFNcl!L3A_Pwl}4_iHSAS(9|8Jl zHg0;2c@qkX9<56&VZP{tn`+IU;+zaors=OViQEb;RQqA(&U0Kde#fS6TiL@&x{QvM zYS7h*+j}Td^`Yzy@G~fLos${MWOnIVfcPGj8Y!btx}TWVIW4>>E9B1qjO^>(G;9XW z!{BTi-8A;Xp0Ze4halV!H~u!!KXMeUCv8v&o=*%26j6^AOO2!!7F%2;suCprOxoio zQbA#w9TOeHi@KB^D>l5<g8B?1;UZGxzYLvHz4W9a2aRH%F0*{>)4h z9oTn%ac20tHv?JUg1>(}l|af|g5yc5>ZKcBQ2QHgT<{&*C{-B<=tw$Tyk?sh|e7?C|)@ z!;kf)4ep)e*R3QD|KB$8Ycy6I8u2bP9=)g}7N-WdT&EKQZiw+QH%5i7zs`#h8p-xP zE7!v2@7MgK-ZVSvLh%~AI_O#lBQb=(i*Q<{^*zGQWGocfyTZuXEouV^;7L%~eBu;m z3On>(?BOdUvp`mr;0V7Km;VN%(J#Icjw zam_<*%bv`eW#ck*iMgU;iww)9+KBfV)r3+Kr z7@qtgpnFXCFw9PRAz0{U8OyPV5$cUL5D?*TG3^ z+cHoeCVhoV*R^X2H^A&;XgqQ>IdC15kk}^Gdq^MC$C7;|ImJs6-^aY+2WcJPO%P^n zH4$cjD9i}A>k+Z>70^e8i#N1TZs`Cwjg>r-fKN&}oQQoatw6*l?Dr9Y5e_@mpGn?? zV3e1yVYZ9Y>N7lSPN7)joOzxV!V9g3ElMV#E@x;TzNEZk|b|o{oEcnV?A0^GD-2M_eg1n-Nc1Y_Q?SCC}^*4Gbvt$bQh=V`+;bh#i z$$7nT``ez`LH4sJ)OV!fGT}EVzvuJaUsuoO%#84_on1uUN@exby%~UopGU*5U4l2z zQdU_Zb$UT_h*KJ++N^o%lX|O~EEu&Y6*>3E$`l(TwK7AQLW6d>zDJJR)TCoPEjjgY zoLa-HKp9^n2~|J!hUVg_qCURsnz}0JpL+QsOyf`;!iPzV)1+#y%we7C?^@-S%(YFN z)GDR-Pnxo1K95>$>y95~PVkJ@i$eIEs}+36jm7#?OU{)MPH@LKs@LGjq~q*-UIh@i zt0H1rVXTac(G(?=ovPK*avapOqigvxU}FYS9mLak%Is4IZcUBaShh-$7^t>tcC`;{_!p!hu6&%gYgq_%IEWf zCJ?Gm%?vOa%Dk?Jaq{``;|5Awj68tk0Y=Jq2?B{m07L{ml(#n{hrg+_smYv^wvgUG z#M#aL@hNNj>dkxlJhzJ-8=FdW#)FKMR8%Qt-?QU74`h2ZP#>R_?29E;KO-la^<;54 zPwu&>KM&y=GQ*!`N-*<5v2@4A3JgV8X;=H)sK**aCcQ#803++;0&g zuZf*J3elmBu0MRrKlC{pM5XlG(U6V)fGuMEqlQY)89b_UNo(VYFj_B00~Z4*+Zh^I;Ur^(_u$o^e0 zQ-y^VnaEh>eo;()AqGc2-ox?X^E@Ys^uCXLi6g;hbAO)9Gb@5CyM*OgE4EjMrpQ^% zL)1hDrCECp_JvbW6z)N`PTYM}^`7;9yByci&%UEN)uJxzgj-hmZm5*XNOl<)Zuoyr zXc$9m)xAwSUu1K2656yq+u3X>9yKa@_nGbKDi2IY4Wbk@s)D!$M5-aT?Buh+?HcBf z8WD3O%+Krgn)L`>z~mS6A4vI zK~YbT70Ub^R%>}ed3VIn{k!+$(4c{)&YSJsOH$loWWV4Va0@%eUz_E@qrGg8GD zrd&|!AYigUL8(B0Py!6dh3dhGJcfRqTCZJ5iS&}mLqPw{)D3Lai7SehqkL3Y2rC-e zF6An`v_4@t>Tyyxs?i&LUvhvW3C8ux|G~LRs=AzsET^LQ&KNjFly@l`RpHdP@DlL9 zO2i1AbwaAjkeCFwPP7qnis0RC9{KPim(H>oQ=**t@GM4ou({_wI3edbIH4z6GcYf= z;wk59-e@m`Go;IMb1gVn;c(XUCrzb1Cqg!e)Dj~k{vVn3elyaVMkzexyErAeg-SFQ zzW+{HYg-Nz?t|22Nhr&01wS(6^>6I4TWnM6Y=15OGrc@7(-Is~{P!BEk7~Sp;(Z-S z;k7DNY~QNC)St>@`@Ap!7iFHwY%|uvG1lW}MuTilj=ubFSC$6J7I zf*g!~@Dl9;i%AbE?>50p4{e#njL-tMr>+FCxMIMVfpkPmf+Hz0NjOY;3l&Y?* z+txLaL>@L0smy>_-JrD0K>jK8%lKMvz99W^8yikN{GU1NX>I2;#in`h7!hg<=4$#X zLyhJUXTW51{UqwcmYx#J@t8sWszdO1X?jg95r4yae77Z(1c+i>w@+%i&@z*wQ_rY` zryw8Aj+G+@3ywl*tU|fU32tq$NaoGYarhJe!r6ZQ17!- zaWkz!Om8)ekdf@F^onAkKP(>MZhx2+fypZfG5n9VX1XF97LiYJsxN71(WuoZ^VOg` z_p1NTacwlT>}I>&&u)ERo+ETaJr(kU#!dJN`djP9cB~&V-D*!MN?dT4Fr$C4joM3R z>OICA6c>f9G1E^KD^G*b-?!j=mI&FPm@0v>hJa$MfFzg`${`HUK$-mB)xg7g^^MZ| zfIgr_Q&dhx>Ge5E?vbGtPLZZ&*!Akfk9pt!ZIpf}<$nm||F2R<^nxr8<-uFCH@YU}PAjSL#ns)Arw7ysuFr4%|L zSS?sJI?xgekTrNf3tXQzumL@+|LXq2TR|P7CiZBMXuNwErp8)6rS4$fde0vb4IyTju7hPuJ;2$xoLsH&i>f17Sw@3M?P zOZDxX7NfOGij|bxR99urT865N_07{30Ovup-QJ9qt()FIkvpH_*EZ* z+5RX>v2CVhUmV&yyJF~kW>8PEDSYjJqdI*M=4(Zg=#8KcF>Ee~^}KQ$-L{S=hLRw3 zp+QTq@o&4Wg1E>!{EJ~8vv2UaxnuTS0{MQIHoS#B+PeVC9MO75)YG?{$PlGYr?w^3 zY!qT33tT$GVB?9+_aa*3O(KH8ndPbWrmk=y@=A z?T1d;+U~nheErg_f9=J)az8eqeryInx!K#TqyI|0h+=`)UXC#u&QwA<6@1xyB*rB$ zFZOLrWbqraEj`uk--LWfy{Pi@ykIr|Gl3b)kc9;&_f;kyTIIGoi8PM{RPlbjnH@um{qIASs80F?ADj^XodP{&BH0xLEhiR?ftW} z#l)zI$nK*!a7oM=U+Y<)W!6jM&&%KJd+f`sitSq$6w9nv9b5F5JV#FA&9es$Uc0;G z{3)ukP={0E<3m*To^Z64nN&~J;82!N`LesDC)Coc1<|K%&WQ@>aXW%BkfBr$gAXwL zA{YWNJBX)y80A_pT*_9-k|)yQ}Fx8Z2JG=?*6T8!>HIIVpP?@SbZ=%R8@MCkKk#X z%oB2ptit}#xF$yULal0s?h^YTZ&+kNE+d)lj~b0CpbaO<(K;QrtiUF^(Dvy8G7IF^ z-^m^{V2RWuAH6}8GK}gjMDUH*I6rWD)thotI$yFBh_U6SC@vpztd3#^n$1OaA(+}+ zw{WE*YnYW0nR4CY3gj9r*Wto!_tj3$g7v{t!fbc0oC4pfL&k)P6kcQyZPzKgxo#h@ zTY|;A2GSG!%S}vIX>ge={AIde#H%UTFyI z#X=s)xA!k8@+7fjg=o*8+Wka3gtj>D4PDF}wz!*uUkJldJ<-?6_Ro|X zVZisc{`32_-V?xhEns+_KyhpB9aCM|1oPTpvtHE5qLDle=;mNKyVSwp z!x_9KwIp_OA15aIwy@w(-qV%hEE*gW<`OqV)J34X>aJT zi^99Gb21J`5g%xPAp|ovc9Nne$4D?^`Xi;r|70ffa%H5vPoMtV(e8%W~G8ed?=w5?A56u7Pds*iC7!A2G&A(6tU*`s_1j5YqUrECzR&P2Ka?S4EfWxysMf64b$u)e=mgA0ET%g|dV+H9=a5 zHa5w3IPv24uddDki5Z{R6y{%aumA$S9-5mlopRh-IZe%94a_VMms(*&bYOdi8g+-# z$MUQ|wO4pf*$CVVk#w-o`w&h?Awx(lOIpyIa@JhBj^Vm1@NWQan0g)lcuqdu!`aAA z^x-TTAFM6VZX#zsOqLPZz(xP?$(z$xJ%UAMLW3l9Vy*2={oC?ko%OS*glgMvT%~~- zxZU^3sQL&2N(vgbn!MN0eS=c`>yx(I>0|5e%@=C#_p?0RaDHpP?BpVl2w4M07*`=7tXQ zlTS@CHX;ElNEu(y<5-wd)b=AZdpxH1M81s49uW7`3CF#ewJfS;lEw~-H3ygdK7lpC zpC|IJiv*@t^--*xIoUbt8XVKMo?bVYq1RDd7aW_q8|%Cq{2Ey3q!FI63j;25$nbs= z9#%;O7c9wUcDlm01FmH`aoAPDrO4M5dWgd9zFVWJl2;(ceO|5O_2(YN(RT{TFjW#% z%zfT1YTJF@)2`U!p;c=5eO?-q7>>+i*)o^%X*(ZS2(FkJqY#&4W-LQ?=6)ll{iajoV;re5~^^zxmOy4*$$kQgJJ z2v0e9oY1>@SOyK;)U&PwzU$fPCq)7g0(cm+WrJe)W6Vh1j6kzs6CjWgKvO@Q^3cf( z4?OMyRNDef&6`Yc&*+v{+Ttf(;ogl*ioU3$0eq97*{d-R($tgU*d8O^VgHbSfqc4y zqKTsGgwyuiK5IrJFA^r585YfJvoBHmNbor2m>h7Nj)sSlo;IEU?@BoC*jRA(ao=z+SHZloEAQCw&JtKs%P%m*uSVHu$0U_ zF?bR%eG6Vekj7iH=;CQFE6I=+!$*X_N`>bM%+0B>#=uQy)BMBy#Zytkyu&LftLY%O z7(#$Z;-rjj?jKyM6NyXh;lEON1(H^h+%)iR6_^VX=MmVE-a6`^({vUd zV*NB)nU-8!&AAVC@Iua*m-X&gX0FYsE-2gn zQRHG0ZOrk&THV@p<#OKc&GgrqV0MkcR#iAgE z{V|#~J0lh|F2)xE3udpyrO4Qx?2c0T_Jd|OoPA5fvZr^Ad^^w7^OvB>oHY-1x|O}XmFbZ$i#c}=n8-}8j2uphJYy=eRZ zl#jp9U!~pLG|H>>HF?oHGwg%5~=a|G0 z+?%;Eu66%c3N9QH+}R<{1;Wy3fYiohm@? znaOhS#2+^b$!kRFQ5RZKnmT*Pj4Bo@wr{03J zqtbHj-(oj!%IvE-HQ#J6vqUMD(Xg2^vFGNkBSZgwe>S$yWmhMc4~A5i4~}86tG|X1 zW4NnyQW}j!dV46vkrMOpb60zB%GkXrflW>gDTJ}yq(7pSA{d977D9_2p(Mm(l@QzG z)`;=M8+^?vQP-RGAOvGbd#22YC+<1i%j|-ScK7EGg_A=j8<1FJiB00jiJQH1Pv0KwlZ)U*RCNNbss3hyT3B#f|!Okg?dO zgCx$fv-Q?A{M8*2ad!Xp7q6}L=%C=`O53Yu;A0E5r-M|X^51nRHNxkl&{zAy zD+jcM1iaJoPy}tVHL2tu=Vj2V9C8>*W zyRd!50K+Fj+y%L%fj#~Nx!nuw0lAgMs{s64GH58Ur!A4i4_R+@nj5t(6#rpRocCnv z2@v66me<0RS<|f;J5*Mhm{@mk168b2{TtzCV!A?m52Qnt_zpwl&Evq}0$m}2n`9#3 z)fHKm9~cQ^Ifq=&2KjXX@Bi{-KyUj4Eg=tnRb!<>9+U>2f*-VO_4ym+_Y*Asf;i~R ze$N1Y$zOQI`Ce)W4w^hpwXS3~`xB`UCahHRdC8&qy23$D*kFe+7Ie;FdE1sNf0iUuZCHx%a4(t#^Vemnc$x9m2l@nd}LR(7r7^_rS-og=%$h@A|{36 z>W{kaF>&*PL**m_3*lMPYgWyeW_NW#)#HI)*U!QGSfH43tbluH5^gqbIz{x&UT z7Q$X)`&0>RB(H2;S+UXs1d{9*WS0{YhC-pStvWU z1;!cUS%0?Z&u-nqFS}#gjiRqYK8%l<1c|R>u&xHRk7MPVV7$M*6FvQkMe@%6?Htui zytmQNO?!~)AHDP|hk=XL_o7W;*UhcFW$!@UZQ7}8mA}4%%Jb8w<;BIhFDT0@z zbK~AI5CgCA*Ox&o3g%8$wsPsCx#~PwfA<9|3550HMJVA6TqUx+HgXoZM`IsuN~Cpc zGSKO6XI(^$6p`I{9FzUY3fJ9dhA&VwFTq#)_2yahvQ+KD5lQd1HMc?LCiHCNc~@co zN2)cWT%N}%*MV&6HeOaT*M*@ki_~0J{;}MF?9iwxE~KQ+FZZyA zoG?;EU=H&(k!noxRYU+K)Tfy7fK+MpyW+$@B?-5_V$~9xzF$HpXfE{@2H$|92fu>(BL&w2uy(oI^6cO(3{0`&zfY*8cmrllK_a`Hwh;)G1;i?oT0G3)v9 zQe#*FA5sV(N>cjl@g7C1VtFyK+y1+%mv8KMh-_nPsDvvvWwgN+x+5(0FDGJhY@A-} zL1n9hip=N^;c^M35Ud-OkCLhmQZVA>@l!8Eg$Zs24Y1vmZXL6xlzH^6Y=Pt8`J+dN z-=gD|O`K)xz7D_3`Liz*oqmsI&2JK9-^wSy{NXCmOLi~Lo_;#IwJz!+ez#)RJ+Yw4(<&hKQ*5gDx`RFR zl%S#8yWK>Vl)4@ku)H$>8(Z%e2)L{A6WpJd1s9*OBv_*Z+Abm9JDp8vtI?~Ay)S3X zz4%lrOut)n!yRmC^sXS@Rbo_Q6B%nr@I#HAU_r9F6N2~|QT1@f7#E`QiGr)kPw_J` z@)y?vQ_Q|dI{8gbv6(9&KSs+N@EZ@U5d2sxN2hSR#~TMOS=!<4rqk};_!;z|Q}oJ6 zFWMZu2e@o8vnu$QwNGMZ`rM3+0;(@WKGI9G|9#%6Mftz$9@(+lt7%zMC{v0RgbsNO zuJIRs>5ycBgKSU;`P%O&;Xz`B^g^KJx`%an5proJv@^knNnj=>bWZ^GV=?I@5w%}! zjwFd6}&3C@D&pvJLy_AQ; z*Y62VN@{rZbG1HC4nJtQ?^CHhpYw12oOko`?x`jEOsVtq_WGe8r#54|$s| zKJsB>PtUJ%R^OF$`@r>xgSB&&t2uC3rHru&Dr3*Qec@HXqTtw-<9ynVay zjY?kz2fn;kcii$HYu?PeY)R#-Jre8OOIfjo3CWbzaR+ zaeucz8FcjY^;LJLHGEpM@96KFCa#~*^gzQGC1j$5ta^SvDa{78+?2EWw7AT7UuC{vA})&n9@jT*4)>ny*>;5tBK!G}o4%X~4 zWnjdSw-LuvhqU+2H{(IhkZeOg&Hp>T^TzPI$WE#^BeUgX?^g zG~?95TfeQ$)O2^_FMb(4-DgIdy8|Bh_BiZV?pdu3r?+-nSHDW-qi0@kXfuDx{Zjc> z)?kMJui9ea*KjlSD;;&%7;2exe-SSrt?kvo=dY_#cBqbt>ehu6PQZ*!#=#ecHjX!&x$)s`2A zczlfee$kPYrAM?co^;T%ZckvQnKNqyFSIXP;`r9cO`(_TmsNav7xGXoawEHdel$A{(z$|@u1j|KHF||xmhW9zDWNpgg4ZnMZy;yo;+QB>X2OaJ|`M|MNS&9w2 znfX-6=C@<^9DK8GWW~7)jxQ)J?y`n18#!t7rA@2mRL|M;_56K<{Jc-!?O5}xh@ zUcS1Yd=~Z`m#vQPci--qxZQVGktF{yKNhNY_42FLmkXY*w9sz)vMnPW6FllZ^Kt7` zsb0BFc~c5jXjE(8z{y!lx2l}A(8vAuJ9bT3P-fJ*vzw1@FBla%rc1P6hn7y!*Mb*~ zsp>i6Qo*c8?dE@R{BTK^V^j7=@5|b;#9!|kUwhy9t5>IwY~6ctU!y`>5+*P0nyKE& z7LA(T`g2sPZdD&$PTk$?NZphYaY2QbP3&Ibx5ZUcTF=Z<;l{k3H&!$c+Er~)c>D2t z9yciKKJ@6sX2;4tFB7^VF}L5tA*&~S%(3sH&&|&p2ba!wyxNUiZ)RlaeY5x7FY8Vy z@J(Fvw^awY{+Ma`h2=E{1+N&pG4qJfsehgOc(Ty2uS52XnEqGIlY=*gULJqiukFey zeH>oZ8WFSWNZt2K1D`k9dFkfE1uHjd<-QvATsG;`pefyGyDS-P)? z-#0ybb#1bI>cq`Wzh5+W;pJDJ8z0;CI^DfYaFD~JxE)@L7h6W3PdVtlvubkh-!HBl zHSFn>#8!Eda&{f+nQ+PFgmcoM1i!^Qen>fe@0j!E#_zT^S?GD#wMmZ&m*(Zo_x5nZ zETb0<$TN1zqGmHjO#A-+ty`rXj=jGb-M?s{m;0eHmRF~KUz>S=XVZ59d3wJ8P-N%W z`ySo{R+pMlEOGRQ)wz!P^(ZrbPQ>knjk>m&7IS{hD~G3bSN@qjcF7-`W*z+^w}o-;G5l_1RlKZfXA8i-X>Edr-B(o89Lp7R|Ewc4+?AV|UmOI}`NR zZ0C8kehON&(W`8YgySnaIR)ffRCamW2P<~>eY)b-tleAAuc`E9i_E(&?2P~NVZkHQ zuT*%s_HxmYJr;XRua$88(ecDVV>Vwnm9N{oF{L8r4?a?MNWc4q&p9Q2yYBB9i^`v` z-15N81A&e1^}O`pVZ(e`XO~_V(I#Sg*{!o~yr{75aA^BMP4kUkwr$zoHdV$9>yu^A zILBQ>!hK)VIG)<(bWonR%TLC*ozFAue*2y$KK-%f^ZTIn*=~kjeBR6{tkBY1%{%!& z=rCqUxm}retf{fN!`J!C{^EXrVg9vKh7T(JzG|V`N0QyA)qByW=!sg_dk*Ox>)9r@ zOzqI;BddkZUpCsa-=D=Vm00&dvEVNAer@OR zi&NnF+Hq9|-~DY*-`n9|o|dN)h2auru)J1{GyT<{l@`wd&*J+5?%cUdpElxr~c z<%IiBX4$8#dfT|KYwo#CR@VOR*oFZ$n|@R5`udj>%4J&Zv;Td;235=Cp7VHPSec>y z|IRw&ZLMhsGS}(5$Nf>Z>AoJnjks`n%D0Wyv>Z`uaK5T3^?bjxTz$UwtFIsZ{728C zB|3(OUfJs1?%0*J9cPq(zi0BWGqp|+Uskt#!Q6#Aj*T-Y(4clA$bD^;h zCZ7H2H=i9f`?fkf+529N?bF{ppE@o3u66lNpZYo9ulaX5N7Z$344E@|V&toY+h_kS z_`H9I-%1|LIk-y^-}yuB+l_eK&cEELN9C8#S<&yo>$W?lbZ?PbCm_dx$)%Q_Y0@{> z`0#m_LjxA=%$4uS*_E}HF8Xl%!{f0t$B(M)zW6|d_qyn92i8YA-TYK=fv0oLm*sbT zS*xh~^(-yF8oaDbi5XeOtnPh!Md3TO3w(0FygjB=>9bxH+uT|FtXOpQ9_@Z^a5nHn z>XfU^9Upb;FwLpx&byy_wJg=Q^M~0;LD1N=&r5sNS_jCEZd#%S= zYW4N)I$`V3xOa`)*e~q6d|lHY?6S1@IR9GyS$~z*PL?Q|qu&o5?@#=c`|6qN{pwtw z_q&r@aqms*mUn)cZ%3o0mJzL|?Kt1NM9iJs$#rfO{A1?&GZD}7cP&2lx2bjW7YP0K zhW5JV`&=6y&zgR@X|F*O-(RSEZ1vKIfA*==W>1+*0hRJynAC8RmV4fhRc3YB(<>@) z@#E!{>IIdGy^wR|s_5>=u8uia{nVM;7yEZ8*1z!ARrf7vQ|sp3o!{qgQR?)zI?d+o zxa6_){@E`It|?w-?q|Q|{-Gsnc3jeQ-Rgab!^ivneyx7(Z|>Fn;oB7xzjWvvxIUj# zrR#BSuk!@gkL!JI>f+g>D%YzQcDhZc#!dJ2FLuOzldIc^U|Gino3}Hh+A@{Z$T5{&xrC-hYlV6I_1*&@(CVy z*4?vgTRLaUgkxjg^jUf|cKYG1ofZtJcKrLlPt`8|!KvBKHeYmVvaX%;;rX!#hi-5S zAJ!=M+NU2D>`8okZ05pq;r@+6OIB-ic*f+Ukq-4rE*&>}RolU<-Sf|jZ}@b==+8|D z7b}(1x5&rT@p+Ou_RBoD>fMncrDs>Uo1AsU+DX;F-t=*1fyHB{zB^ZO{junJjW)dh z`J{h>+x6x7FJ0{y(RN7Ow#k22pR=d?_x2~pJ4dO}`)Al+{E(ctz=w7pT7B-f|Iy^k+w5-Udb(lr`c)xc z?mbr|%bbVDB9Hqz)*qjx%-~~-vdkJ@uYc{-oqv}4yW9ET-hYkRRyD_@sNolD|M|Xj z!1WF7*7Qxiw-fZu3Bj@uqb(&bluP^g=r>n*9ms|1b_r$p)3aon2difaf zSNduOhD{&+I7>&DM&l~iQ%i?iy zC;pnaXy~g`{slXB8@24RWoiCD8n2HGh`8QB^KII+zDot)ww)`wxYwvwqnewRNo3na zoQ~oYT>SQmT{1D5Z##a@zn-Gy7uV>p8{R>)8=Bj$SdPqD5`;LnV8!(-nkIkug_xTS z=52#@v%$h`uy`A6E;DKa9ptrEf23(6sUc0AbI-JI=UjM(Lvv@BiJ82dD~xmWb`J1y z?kC<%?BLwb%Q?XQVm9XrUd}FF&V_}#iF1}_**gf*6o^lDVMT4q#L1EM5i4wiRk6W5 z6;?r%?Zj-Tc&vFj=N@P8>+E9RH=A>AFN(w4Id@ZM$|=lR-8%V+%3*zyUxxDb`D{7*_`VNt;ll|XO|{2qs=L!b(?13 zO=Mns@m!ZNQ9h!W>9PG=Oz|RyN#aZprx9Og=Xg7ZhPLBr^>6V!RN@&g;?W%Fi>594 zFF8t|8z1MRBCa#yyeQ5Tk*h>Ihnu!zYw~ZgP2xD@T%$R#gjq6u|L0u!I;YyTaDF4s zc>5O4lf;=I&V}Mk6z5uTCW&*mIFrSBRGeq*dx|m+HSI;IuA)>kQL33J6(Q(IoBiEP z_NxaiDB_U)I!Qck&dl?_#F{Mfl|FYq&UXb%73Ujq#@ma27iWTfXE6pHirEWwGm+_0 z;=7k1!T%iJIgW+mrQ@ME?=s7sSv>jIT#Ed>5of%E$fY9In~}_OD0iEGf|7ky*LxZ zxlo*m;#@1vBysK*D_pWTkBak*I4_Db#X%HGwI6Dl1X>R+@?s{}G+I|4@{=3lG35Z( zm2GM33awADuDCdn*w|0#yG5KH%&yY&iGMZ|5i<=b@rAMTbrjXM&g@ zQ4S$ue$YZ9mgcmDL@dqDZcWoOYQqwxZfZjkv;U*W0K7W#h7%5u(T*tc~q zJW=$f%eYM5&fR>R13EZ&^K$mH|0$ckMH!2Hv+RAGU7E@zpK_AKKC)k%3wDTE(SP$^ zjox0_d*RdO+4b0S^8zIOsZLiLtggZ;2wiJt|7G7BYNPzL|DpVD)^gLe!lM3PnYoL{ zMxREe?X!24`e+<~V7C2V#&L#0oXPee79(33#opN|e-<7HGN%;;8_FzRJht8=h=q+t zQOorA2;V8?$#b_2_A4{~rr)cj@RLBUXQI8B-ujre_Y`y97`0R%&3REZdFRhYV(XkI zR>1~qsIc)u=f^C|f7u5viv21@oOi`(zJK);E5E~WJE80+l-@!aD2Tn+Kd*mEZ1Ezt zhs@Ug`(E!WcA0VKeJe$rcg2}1PGcYIDfUu_p7!F7pq02I=qT<8yu}@Xx40wl7Iy@p;;Q*}qOS`T za)?01A>&>m9yer`^k3qR7qR@e`x4oPp5i{h!PQ=rY9-?9C}Q^(v3qmuq2lVl#C%1G zneK^GnHB%{G4PM~#64`^=lTBIeO^>f%Az%o6w`QS|G#3&H6n)I}vf&Z+4<6I_njB*dMQw@9>f_!v&Y=}>RWI+nnX6z{DZjoFE=oV3zn_=8f^%(QN(fEM8waEU^Es8b#hh&W{a z+{EL4%xt`--<&plz0%)%FX1wBKd1dQiCOBux6fC+F8$B@>jpcaZY%a!vDjV|eR9Um zd?TY2*Uc-wh(pE}FCMpM_8-Q`f8JleqW!fk?f0#CgAK7cR%nBzxXy44GWRrJZ!!CS zF<;&C2}5y6f0}RJ%>HlZ+jzy3#>+xx|I2v!Rq>(qBZXPXe;+UZxZf={oeQ?!0y6rd z7r(hEb4Bq)G5h}!&qF018XH;x`l4z7FUCe|_LA+T@fcu(#WIs~hjf!{=$6=ENjBIa z8|2f zC;Nlh#GB47vg@x`5?I^spSAYGsEluRt%|Tb#AQ;`%5=@?)P1JIj7-xVr#X$w?2y}z z)yDhRFIfLBP2WAIQ%8zN59c|Fn`gJ5$zkbnRVXAX5=Sv5&4V;YH@Wp~J| zm*O11;4-~!m*Kb^3h8A;VAO{d3X3n|SczpIF8h^KlXrGnyE?J)=x!$jHkaopVQ($ zB>ibYwH5zfYy8x|FIaz<29piF{X3(}JjG>~V&aQ9F4Grr+dhpxcbe!R-oB*w?L6%V zi`#qg=813-H}pQ)TjNvxML(6ZanRC}NqhR)#wE=m?bEvSR~^I^TZe;iQV)8GBR=~} z!<0%oXgm?8b)F8%TZ+fTT?mRp@_ynmaT>dHNFFa96ZaI~>5x28JSOf(P#lu)7mtYt z5EO^pBk8e2x@SZ*?GJI1K1zSapZkf&PxR+l+7P_ZzsVidVD$B^IPD-mDFnrlS)_*i z^*uv8myRWwi;Mapc}?cI72cM)i^BbxyD=YX*lAJBlN9}Q=E=?Qxikmk!86t;b3>(1VG#%A=L~cCSz;qSK1c0K#iuFj z<@?jpr!Vtlej`EuoK2J+!#+I~pNTr>H|pTpRQ3trOLAP1#6jaX^FhCZ^--)(=6qb! z{WI!svEGf}8<6okiE@+&Pv)NNQ$hdks6|7a^sp-HP_T*OOC^3%#)bkVEuO8$5ESxCgSm4>BD=1^tmQ|nCqYA2z@4fDRI;m zD?Zr-r+5;0J(NDxm?tuq@pNFHU5ZaP=~LXeAm3l=Cw-XfpUVizQ06I$ehTwc=JmLq zCCuGQ7(n*pcIKYU_0M61EKs?otOUyl)>z|>BE2+A`QG3QZ`Rx_!wK4{gK83_or2OP&9?u2pGEY$S0n8H> zK9YG7bJ_2Um?tw2WdEJa-CWbhbDp`U!k;nsQ+N)MK#D(rx&FC_uq>x1)={&w;|lp3 zeTGQ-sPgIS^p$hNV*74<1?S)(~I?PmD2l+VD8C0#lg6~n0;InpG4NXR7>x( zSNb#8-{TjObL>+`@wv?U6dpLTe_t?9Wv;)+EqwG3=0v+%C_eec!mf`8kM!|WWA4IS z`ZU)!0!OW<;?tJ(p0(5a3}EiZTyKt$Ok^IwT#o-0%oCZ*_qX>mPf~m?Gf!50UNhI~ zq>m@37^r%Cnd|Q%2}v1!06S_CO1mnu-mh+YA8+OX%;kLdXP%GMabH)r!kY0b4d2Z&iy{`qQdZ#J%eq_C0!}LC7#J*1P z2QW|6_jOHcq3`#O+A_tb9qYBm>3!(_K+ivO{e5F0nZ-W)6rTmGkM~ONvw?X6b9w)E zkbTZ8K1Wy|)x_}8n=c+dlK#x~_jv`+r{738YBv<0B7#%=QO(l()Mp;gT;``I`-np6 zjzHGCG&iuEzoVGDF_+_H5&INTe3r95sYQC9oy?P&%YHw}KKgqk5Yg|f*IK3bdB{9B zbD3wy9Hev98Y=N*7hE6biccx#T5H2!e?L+@tiwKS6rV3y@6y)L%lvd??#5jDgt1Rg z#V3;We*6MS`b=dWz+C2e4f}*CJ{wt|(ms7WN0_H7{4R5?gQ1t>?IZKt%w;^e#X?7U zb5Xb(b2o*zX6~u*KFs|TK9qTY!WS`*QutQp@d`i2JVD`im?tVcm$T8XB!$;xp3Gc- zuTrFn?)&Y;D-3zm&22nt&HCJ(jOY6MmEvIp^Z3pNm#;4;3r^#6l+uqgSs&Ha(7)sP zy-U|SrM-?eu4>wTsqbzO{ryPs@B;hKQv9#7KE9`+m-Flc>sKmzd-2&J^?OopL$ALN zDIS*5KT~wn_A2^{tWO9q^!odd;$a)s|E}mevOXoq(ChC(iic6Gf1>E)SnnBX==Jv> z#ly+0&t&ZyT`-gNsr?PT{vM=wxJ%bNrM;hETow70`hf<~-+vSjjn7aWwOoq-W!5J| zr`NyI{T;P06#WO*yTuuL{XIqTFt~|?UR!18_4gpf!{@A*_o>EJ@%|6%qc*13=gC9Q9km}7 z|3ZROK2vrWdi^~}@vsW(Pb>NwtdBZi=%aXHZO!^CioS#NKVsLs8|b zgZwZs123o$O}i@M*N2>PrLcHzi_^;iTb#BJTbz~`TYQQMUuVJ(oABo*@jHr?*{Zjo z2`_8H>zHu*839}Vxy1Oe#mk%U1}40V2_I_0{Y~Q8WumA3*;bsb#J**V&oSZjy9l=Q zwZ#0f#S={UPbNICnCG^90!(18tNw%-9+{qxwb0 zMh^53qC$b;;gLasvB4I&J~$>g+Gao&|G@)ADN7ht7#Qv!5yGWHGG3_Gx~ZEczPPg_ zcxY5)bZiViv-rmb$He;g3k(;Hi;j#^ct}`yY;bg%HYRLfXn3$uBqlH-Dm)l2g+>JW z4|ih_;vW;14p{tSf@4K1qxuEv?KSdjRol8KqOOrvLD)di+Q9Izm|%ZPTT2`N*ucKw z!7|(sf0N6?qNb@u*uc2pXyK<@rG=t8rB?|<4_k&EeL8si$3%sP4U{=a7ZY9<7PY8H zYMc5rg}yZnPPfdc6vlOI8MhTf#lKU`Kg6gggi&2nBdpiF)w-h%sj#kR%i6a!({#BL zR1|QRL(77F?GqXs-NxS%93C5J-i;YWQt`gNp1+or`pG3cW@(Ezg%Peu*7r5~vN zX%oq4iS@Zrn}5IHz}R%&NLb?&6d4|#F$#a+RI_z_hl)vLq#`If%xbS-Dq|hOA<@CI zkrsckHUtNWHOPM;EeL5nWYOnx%pgkyPH!MW!PMSbX>AFNjf@Bj5=A4$R1Niy?H3&!(=ReyF1sQA z|01yX2U_|M7E5MyuqCEnSV(M)?N(U)J4ac>dfqKMa9GF4!O=m%O(KV;MbR-bA~?36 z2%uBHz}TjNu|fS{Df@?u7JolIU1Cnf_%{`+VPEmDmc_(QGj7*6HZW|URpcEQ8<=)c zpN_u41B0W*p3$a7+ZKE!#6RN|%9?e7!s|8*=`+;2?x+*Ljg9I=8)WbCWvbJ<(u$d0viPT6(krwK&sauYpxUg9m@KDPmZ1<*YaRMlm~KF)xzM((U!hv8 z%U-(c(BJ9D=|8K;(YmmhX)`u~LV_x3@Yb+SHT1DrR@Ymnn$)Y)#db?D{JH=3qHLa5{tqe``76aolmXa5!T*eWu zFC4ZD={02>rTVgvp@c!Hj*KJDn1A{2?6QmQxSQc^~5cZ*h`Z z&iWAljN^f>%Q+kZ;~?E_0*T}V6(+{e$=DM@<{@XId1RCsQp))k;%{>kB#E4AX?4=T zqYH9IiTkd9xve6#oJ}F>ohn_FlO?T=bR(4%)@181diljIU9l$C)?FJZtf}R!Wtb@ z8oR=rrt8++*?4<^1a(STRvh_55=>9!6cxbGC!Cn6Eks$G>A>T(G!d3l2;p9mLxukZ^?~Y8@$FXOMs%Xc55?;*Ai2+x6`)UTf($a@J>U^tXY; zbDBy1);Fq%M9}*-{>Ha9k(R-F&}rsfBEu|xvC&p5@#-MFjd+m}DPG!$ki3HpYx+#k zI4X&6_r>|)vvJ5z3W+}eUPe5ogZM*nN_|a&;voJ=oRT*sC=TL}#VL75g5n_lM4Xb3 zCnyg3aR~Z!Q}Q(g#UVas(VmJ^@*fC_gZMLXO8zrJagY!Fh=Sy&35tXC&&4VE4T9oO zeeN4LgU@H?s-Awof)2Ib(!i;VtFF?tM$C!55T}gCSJ7Ko-v{)mpzp8f!&xtXo{Icm zf_@bEyaGN8IOc~w7o>xHkY6z2^(X=2AU*br7jrc~Z9$Lz7T}nl2=JjgEL52|hJyZ` zcrNofUeU|^OandUNB;Z|U8c{rrO#c^<2ZZ=9NTNp6O1n7ICN&N=CcHF^eJoNQycV{ zx7NThZ=Jyh^A-&H_t4$}ieBa|2K1P>;mm1Xe*m9D;8P5^{5d6x1@oDYfH>$I=AjsK zHJ{}{k9nvM9DSOY_;fYV`1YG`kK(zUUmX>_9B(~AkK?T` zb9KCp0X@dK#)RKuPI1zY_{ezZ_cZ9B*l~PT1&;kvi@BOlFVJH=oq(fHFB6|ZCi-C} z`U#-NdKUu6dJ|22Hh>;|eguv_M@)Qf0H>VFarG8_&_9>>{GJZd;<%E3r%oML^+Eqh zl#%`D$z1B?xM~i199Qj_tK(`A=rPU(CfxYF5#xTK6vR_R{2Txs>bP=aF5`4HWLh)O zW1c&KzO>NF_6C3+eC4H=+hoJ`UIHx3;|9t$?-oEe9-@U;5h!b zDn1rHYE8Qddb&rIlIQqO~uIAG|2la~4FX&SMIQo=euKLt8(Ki7-=Aj+vvEE?dSZ}O} z&lu37&jR4+v)aUG8|cyJ7;yBtXyWq>IE{5zJ-eEgUHtqF9h5iZt_t6vtHj^dU`|?$ zrx9>mH+&Tz3;WCiz4$Slv~^>-q7P^N8qnjqv59#lp~L(CgP_Ma@0oBr{?TT&U3pA+ zDHGm_Ipwn=#l-J#Yyv&5GlxNs`M(T$?B9E!$8q}-^w{1kv`C4AywRr+bBYsvjNd=W zIPYqk_%t)|>0zP|GttxUpU|P^-}wELjPWlt@mXc!vl}?(;fRTk{M&wNJa!;@Dt%*KMek0GSz5sJ|9jFW(>#c9% z;|qF>zc+C72{-W>272_F2^@WvnfR;+J^CC3jy`8he6E5XeVzkHANvBf^PG#hn&+~> z(Z|EY#|!irPgmgR6Jp}?m5F|aiT)eVW4&8|W4*~HJ|{tsJ~x4*&odLB&!9)20{p95 zlxOsDWv-5YchIAcFL3nfV&W47dh{6v9DOF4_{;-6`m6_zK6_1k&H<-fxEjM!dt|~h z7qX69(#!p{qzSLgoU|BcJ>W0IHM#FJQG6^$b=p19+rvKlT+z#Y_C4rdg1a9TsuY;SYKT==WenFZ(?L^w{sQ%+-EB1U@xHeKMaf z!3Xo1oqu0d%|kxsln1)SmOdpxkK?}vaLi|Y6Q6b_`mQGWAkbsIaloIVp91tDnK-Cla9n-CocaaFRTI$D|Mimn)m+ibapebk z99LbLtK(`A=rPV6;9pC`BUhh{oN_}y^C0^@m!g;T76Lui+Y$5)g_rb+1U-(Qalmo>Oa&iYuNHtF z`+Xg7^x10Sa}qeUc?0MF7Wkn5JK&g42mbp3YCa1xC-1C~&*F++=F=7Qn9s`0)p6wu zdW>@p_|u<)lKI~Zdd&YJ&{M3goQF%G$9!G~J@(5Z&|^GWNyCu5(I*RY${YF=0X^oq zH0a68V%TaPpw9+*_EhvT&&@%PdF}}OmGG8%3k8ntnry-^0mt@UXRfyQG3e1hlZ(+n zYAfa;Cv$bY6$X7y$b+k*mwBiHddx!@aEyNt_+WpH20g|<7dXbh6nwBB*Mi;|;@__5 zW&HaSKX({TE{$GI) z&c}JcG5<@!2lM|u=rRBMfTPbL6Q4^a`s*h8N1(@g^*6gkJ1L)7ZFtrQw zjfvEHgCKJ`J5@%sTspKc~T!JtQ<5x~)BoQY2Y z=+S2#aGa0Zz=v{S(X*v#??7J&=3}<m6X? zGX(VLGX*&M%r@~^272__0vvtznE3nxdi1#j9DS~v_`EjJe>TzQpdStpM}~d`j`fyj zuJ)q`=rMj@;ONuN#HSbN(I*Z#&d0C7huUD#Tcc?|fu8=iyBz>nTnd&m+*|d~~QxE`|^0Ejx2H|Aj!0@l*niJ~d2yd`2WT`pg54K1)q}z6U+}{0JO<$(4$WvaP$c?@reUH`b-0kK66ccR)8LTb^%A9A5DCI1wH!Q z0FFNQO?+O19({6FwcTF@n5+F&9XQQ-SEIPr7<_QOYOd(zdNl|1MPR*Jsp!Mme?91N zz1jgB*QU6boiPtfD~69OF9 zpFzMeKZ{KG788EZgr7CxnQG|G(qGR8iidJt`jR=d=_&Azz_Gu2f=@2chk_p4H5@ql zj5YCDV4`1cqF)bstoH!$XAtKvCO*G`9)11>jy{h}eBOc{eR9+kL5PF$j6V69tMj@# za4O?Ug~ibdd~iH>QuG!>t_=r$F&NJi6ulhJ(?O5pc>!=7&s%^$7j?>Ub-{$cHsRv0 zIp_!Ffnt|F5zHwEI6lV#$9zr&9~_?xK#y^*1CBmhO?-}+=ueyI{{%hO`xrRZ`^v<} zzLuVz46mmO07st^%++yK5%d^OL*VG+ZQ|n(ochMq2u~XUJ}(55{WxCHQyA;A1&-|+z+CPNvR%=j{{q@IRME?J z#e*K(^)+*PdHey|`xEFfo)e0{jORDdyFfe_6}^ln1@sut9p-90W$GCDq&!4GoQ;@g z5)9{W2jDo~dVmkk-w@DaKMn(qK4VOL7Mtk5HPP<}J=S{_IOhL0_)wc={@;VX6y)Ej zt`LbsjvtwSXXfOA`7aFomAEFye>LD3XBcqI&miXN`Y;;w=rb2Mws$G`V0+hsp8ocQ zjB~r9m+jpLdTei|dXya_Khfg5^eM-j>cnwXA2_zR3Hac+Y72U7Zy<2=2{Z8-ZK9uO zqMr|Xtalr5toH{KpFcp4KCggde|-iY?5`sADa*$FNg3!bSLW*YtO9!MuiC(Ie6|3N zaZUz~?VZJ39iPiUk3QRhV|(|554QIx=v|?`=M=phpI1PS?Jd*5$Q#8uM4WPbHe#-h z&kn$`y*X4L+FX_n@c$Ju36#L_h2$ z4s~2PGbazsb79~(uBrjYIAg(oxbTwo?g2fn8>fL|J}-a|_RCGsW4*6{qt7Q3pL}0Z z7BaprD8^hJ|K&lC_0|WD^)@l_X$yMv2?UNlVJ1FiO&+yqt9mG=(Eej z=cI}Lyovrc=&@eMMz;GUCv&x5%7GqzS_8-V(iwbkzRU%EIhZerie9cuYeA3mWgBpu zFK2;coKH=-Yhxo1)ZS6zl=CQvxw^g$1CISQ27GWHO$R;3lL#Dr)|&Y2H_;z9(Vqo9 z)_V&$*89-J=MCu5C%dPSH_8wC;^sVBgcVbo__-$9RGiT9^-ig9DQDz_&7BY!HI)%gFeN9WB$uBr*=^pi#X}1 z1^Nnh*7kZSdYS(gpvU}o0*?8g3qIpSeKMa}o9fl;v0y$+0>?a*XKpWwVLoeu9`oP> z9DUlF_yn2g2bk!GfFA3e0vzj|ZQ`>G^ysq%IQr}{@i`27^tlKeeNs$(9)TWxG;eEv zp-&d(YQKD8qAzEnuL*jr*9SP(+t$RVC+N{<5ODMvX5upkcqY;G4My{|W#EJU8-U}y z+oAYa*ykDOE5f|{r0C_mb82SIKhC>6z;WJ{0*-OEFyVnFd@gh9--+Us^Y;emasIvm zj(OI6jQG`Yl7o3BaT((*2^@XOoA@*|(R-Wd{XmcP1_8%<2blN_0X_Om0ggViO?;Mv z9(}d~N1weWK1V>0K9_-`&kYlwHzs<`*LFYVWUlt33vjHrf{9OK;N&IeT}$x6c69@e z^Da>Fu^3@%%RpZV=G{6)FX!D>(Br(@4;<&+G2j^ITN9qAxe=I}H*e-NUnYxF&btuM zODd~n~K0eXyc1#tBF-o)pCiT)Q8{U4ymdY=Qwdf%J)6l`HTZ?%ErcxcR= z@=3Xt_!ahsnUPy}yJ1H0a;QpvV5TZ)t5W<|i9-bzLn8 zdaSo1aP)CE@o8zI?_i>j06o?_9XR&OeDJ}3Sr7Uu&@Vd`z3i6*pvQhW3LN|8FW?ww z*;YoLDW5Z;U+OZa7_nbk0>^xI03V!peL#=(MgT{j!6rTvL61I5fMcFlfe+?+2k5Ip zo)0Q|ndf7m$2^|{j(N`C+M3VVkk2;E)qDm4$2<%GAI#?v&|@B^07swMCO&IS^qWoe zKY$+VJp~-={lmoP2I$e}C2;ilXyTK#jge={C;Aizjy|r;)pf%I^yt$bIQn!q@reaJ z`YZ&F<8uZ0;P^Za`f4yf&nxB^qFkp^Bw5X=O}O-w`aiz$L%}NSBG)y=x5C*>E*c1$($ID z+d{x`+`0qDIAg(oq41L9c0TBF+^z$T`Pm9SxxnWD=&|0jz|rTDiO*vb{VNl_Q+pAF zIMnf01~}GRnYlV{eL#=#hXBX^jRYU;-^rkNgZ`bX=w<&d0X_EbYT($vdw^q{mpT}E zpmsTka~gGyIIaPwIJfIm{I3n*s4v~o5RkSp#P80W{OQ_0L#8zXJ?6(((OX#W2m0*b z-$l{O{PY1m=4S|SPw*MXyppKz1D*VLKDUD23-p(OWBfOm%l69nAAla?f2!zZ{BJ>z z@mK6*Bt-UCtdsG)I&jMKb^~jTfm44;eKd2@VtqPK9pv7j#r{X0t0%REm2{Y&8?`*AvO%<~%HI8KfMe-1v^O}JB6 zBVcMPjw^TI*xvfg)p6Ad^w{1`ie9$27wECQ7U0<4ap1EN+O-VyIIgyUp2mr*(M;_J z;M8V0PR;-)eHGSU29Ekiz)_#Cn^92BXC3Bp9Ljh?L674w4)o+D$Kg&B{b3XRDbQm- zUQqNFZtpG7e+lFEzM_}?_zd*ekD0q$^MKvzy&zaKEspES|`0ebBBYl>d> z`$N#XL%+XN^s?VSf*$+bv8Oc;IDbn5$MMhwIOc5@bIRKmams$*272uGL!hUAk@o|q zfMY*i2aftYy{z$Lzql}`c(7mE14o~pz_H$ez)_#ToV1vSjleOUec*%foKm=}(H!kh z@WJtLOVL|c{{r*{U_5+O^m05n_BIMqEI1x=0>|-C9ypE%FW|U;g#gF=9Ar-YvQ3~6rb_6(%wz-Qh}qsXkVk88cze@7*9*)YJPeHCx5w~gaXHSMgvEE zB6BsKzkp*rkHCk@$o#wmJ3J~aTW;71qnWGm8~~2-oCF^#Bm40(=yAT>RP+|3nc92Md%%3j+TRe!_R9H^hdI@W z^QAQKSK^wSr;UN*d37pEvdDI&? z>ZgN0=6?z3$w%h@AaL@L>*opJSnprJQ7=C@BQ55q1a|~+jHe27*dSyGSD*s&_x=t23uKah`eue+K&2;FAk@U*ME~S8>uY z5IEH#=k)~Oq_09^am)mc`endTf0Q|CvAy?zWBNu-#!~?} z#Uu4?nXB^kpgAc|t3pn{Vq~OHy4RDNS6L8d@W3I;Y5;(?_X^>Hm9G@~jd6<(A zj{l;H-eMHjJV2iV#(xtdx#sJ5CYBO_c7v;y5<2(s^96xT+ zhJf0O`Dx5t&Cd$pKZ-JPKAr}jr@(Ike+K+H_>=~oHAXxUhZ=tY;1sKzUsZrpd*%G9 z4IK5&fTKQ=IcYIJ3BWNw%fW}r$nmokxSszk;v!uKj`5rZj{29()p!cV8h*qvp0do< z{;CUHZ!gEw1USai893@kFjwPQ0vzL64?a{z=4U(TUkEP8&wfR3F`B7e0DWzkUnz=S z&abL)Q*cJ8_@4i^m3it5Bis&KMWk#$vePtoy;@B2!L{f^Y%*h|uz3!l={JV;iju7BfMy`9qfTMl`_+!8P0DAJ-N9N+V z2b_H5y7vk=)|+*dUM%f?M(V3ECoSf!EpUvdH~3&YQNSr4x$cbuj`1V_NBth=YCKng zV>}PR2jlq+octRa&DU~_HXg`vF8ie>@yIO%0P zTY;ng0&_K;HY26oV1vqr@(PM zXyc5)?8P@KBj-y#;NE+K?ya0~+%;Sv)QXWw6#+69_FOQ zcrF3Q@tW}MT9p-Xe$@_M1;1rYWm#)C6 zU!;Bt_+y-lK#%!91{{6<0FL$E1CDy<$<}t^JgNp9QIO;nySL2BTj`2(YAB<-qaPpV^^(}CW zX9sZ9|H)j9=L2wzC;N139x$H5%!%RpUrNzijN)2v&=-XDzrUgnXMHs2as3|-9M}H^ zz;XRQ1{~KDyP4K@9TN}b{^i4*+J)6i(e>X74kHE_~b zA+b2N0Z09D;HZDXoV3_p_XOQezqiHy^<*yRySzW?2%P*qjN)1VaExaNaMUkhuEuj6 zIL4C-K2%1=<1otzfZ`;7c|VpLIL1>RxUSD)l=lFR`c}YEAH`gae-?1O&-_vGaW$&d zZh;>ACEsjAptj4CxmxdJ;F!0C;6r6(-qr)Bc6oCib^|B99EYcYqy7bRHJ*BNjQELT zJk6P_@TUW%3O_SAaIOl6!>5~Q$YVhR3+D?If~w5grS`QeM4ARFDZJtu3iT{uB-Qe39Si0f(t;MlH~%+-D$W1>$m(O(CC0`gFKzBQhwz&(LK1Ky4~#Zwx1AJ9Js9t9kI zzB2Jy2%N@`t2pUc1DtZVU8kCM0JuJ$nV$fT`oDmqKJx;jAZc+usSF(Zr9N}DU)lgC ze-DnQ2XKrh0yygDGgsr;3moG)20m0q&c`dj$zQJX_kd$OAAqC2>_VducICFnvJW%t~5jf_j zFLO0NUQ4V#SnnX_q(3Q6nYR<5$94D?=qV-{=O^IQzmn%%DxQc##v|8BSKy?V_rvbM z$zST514n%XbJF5Exdk}pCmDRG%sz3_@jGzxm+NE-aE#|QaMTxHrrYWF;TTUKaEvF4 zxy+A@XB=?yZ)n(R3BWO)mB3N|D|0nJd6rw_#CTkotMSxOxGTrg+=Pb!r+DPg1B3&| z_@@9zeFAXQuLO?zYs}U5W=^!--lELa_Idy(fB8OMec%{R8{nv)%v_CU3vkT;A@HFx zayCO+_#Jz2vt>IkjG2 z;FyOV%&A@~Bl9rBM8CpB{{wK$!$IKWFV~rKz)_!jrS16Z0>}7UF}GLZ?*{r8;+kA< z0~Ni+s7_l8`Z{)2{SHMh_o@A$$9?KBaNMV^0mpqR-*?vbV!KK+SKGA$^f-R5oA5lV z41e-DEl!z-Am*6_!+mE2aJ+9H2R^yLXAy7*!Cl2k$6DZ2hwS$sfs!^>E(TOC*Y_L1&;az=4$*~fa7?(rurDo)H1C#9;iNbm{WVPT>&Qg?}209 zc7qSqA=j%Dz{y{}A94{m=HUr&)aU=+cKqFeV?6zstNl0vIQh%Z87Be9c)kIS`a{gs zeBK3)@w^5f%ulv;M!VATpT&4y2sp-52{^^mko6sztMLp0j`2(cAB<-aaPpV$r>zE# z@$3PP`WwvEc%0T7?IVuy6kx9QR|VkYFMqzM7I2KGHE`4qVy?!s2RO!a6nv(PWct9QvVE?MaT((#4UyVVJ`QDXXYCP2n1E)IV{#6|~>8r55 zA#l`p1djTl%t?#;#X;bh&okgdWn?~Y0w;gDf29J)crtG`0;YVTz6x_Sobz(G9NX0#IOf40IO@kUSMzWTIL31ke5i~ZR}X-r{~O>KPxh@w zdu2XlKYB1%HjAuA-jOT0M7|&whsQ;O{8qXcz7|$#4!FaN6OW$6(zZV3K z@l*s(@ko6K=4w1~z%iZ);Dhlj1di=p1svnq2ORa+nXB{)H$b z*X_27-eROtTMYWwpkJft!&$!>^tgZR0*?FF1>m@UW#3^Ks_iPuTy0l-6McY*{weU^ zpd0?Yr;P?qamv?gQ-Nc=4yX61&;0Y1CH^80!RHc=4w1=fnz)= z;6r8jsrCr;F9etS-77_J5!dMGu;2Jj9yQ_hnm=>dUb)|eg8ro_BmErA#Et?`rwpMuQQcq#)& zpZX>~U4ZN3)~HSk22ORzeQ+dj(#w5qGH}!{0gn2^%t?#;*FE6aFK@ty%E*4no-Ce- zgZhj7<^EMTSv(Pk>_-`YCJekuaj^Uv>V)do&pGC$3LqduCs8qa*-7|$y3p)xX_-N4cR zFmR0LB5>5({bbDpj-Rr?F&+=*YJPlylfQglt`l&KClom96PT;{*$y1z`5AmLKYsux zf4L9d1dj2%1CIKVKikfa7jRt1qM6%^V&vm$6xSAj9`k%1IJPSle6U>(hYUmMFW+y> z4V?6H9F_-8@ko7Z=A^}VW&y`|62XVc$Z@z0IQh%{JsCLZWjq&vqy7VP(qcSbhXoJ^ zag4{0Ipu+_$#{Z*lfN8?QNT$r;~57W^&6P0@tgsU@mvERjOQWfUx<3;{`*qVTf{Xw zI{spOCy%;zR($|-*$>;#VS90ngMBm3*J!d*F@=O#S=2}2;jJQ`~n>H@0hFcl>XIdFL8{=ow?e-O+b(Lw=F@ROL)oo z+a2_{-wgncJ~1XfbAV&J76HdRYygh>E6mkAWIAa(5BZp@d8i5;{c8fpcv=ESeH?Q& zo&&(KUrvG#m67B84si09KiBjKIL4Fdl+j)}&ZXXsxf;)C;26(z@WFW20LOSX0>^j` z0Y`l*b2Xkur)~FF8|G?%g#t(a2;dmcc;Ki{Vy?#X2{`5_=NW6Bsf_He^1#Vot{YW> zV?5r#Q6I%zjpqPxjOQfyU_5t#lfPVN9s$RAGM%-y3-xZy)p$k&$9SfL55}_?^mzZX zQqfzCFtq%?84t+gtvKcSQ;NA958RU{1I@BbfC zcc5Pa{1xymz~2L3MUzJyRPRS|%6bnw5;gcI@m=yAnRTq|KLfX9vvLjcR?FGS?SOxs z$I9)2uPAKg4#i3eHMa8dz{hx5c?IBun^<{87U1~*T#W2!v=!;^&&cOl_yO^&^qAM{+zO9(FaU2yx)13+sh~J27~j%( z06&-CNtQ#L@d|IkJVD`YnI|f|2lFI_hcZuAcqH>P3LnNiMd4GK zrz(6tbGo~-JcxO+ z!uvB%QFsh`jGxCVd<{QORQP6oo~-a){5(bB2l=_? zM6<}6|C7vJ6n>7mr^2r@4^a4H=J5)D$2?KtPW(Js;f45lio(nBbB$kL$oyAk?xOIT z%smxek9mN?n=p@8cw6R)3h%=_S>YkfQxrasxyBoej6a&Wi^AiWdn$Z5^8kfUWFD{Z zdCU_P{vGpVg>Pq`qHy_siV1<{G~amHBVZ+(qHtnR_Zcn0bK0fB5rzrdkb1i%N{9kA8 zqVOloJr(ZA{TiU~T+HJY?!r7#;T4%DD_njaoucqotk*Y7Sbw@OccD!bcqnsEg%4#O zpzuk|;}t%ad7{FXGEY|cX67jhKg3+inLf_T%v}`zfVro_-!l(Tcvc>-@d_`=JW=5_ znI|jUmwAf9J2Kb!{T4a?gP6N0d?0gAg%4#Opzs9d@d{tbJW=7>nI|j!2=f$$pJ1-# zN}vC8%v}_ImAR+FA21J4_zULo3jfGFQQ=NJKa&-ngL#U=b2Ha+r;k5Ba~FjdX6~u* z;>-gSUYdEl!pk#HRCp!k$qKK*JVoL4nQQ#MoE$$s%v}`ThPkK01DOXXd?539g^ysK zsPHMw{|{Yv0&i3G{&D=6GS6j}Yo5naW;Z2d$V`!LhRh)uQgI{FKqOPbjR-{~Nt9di z3lS1YhMQR`Dw$IK_qyxz_^-aNm;HLV*LmOPJbSHYt-a4a`<$~+viNK~MSLC}%k$s* zFT~@-KgHw4x8Mold+_Sbrx&?H9Sr{BgY@{;yLgH z@qBoqcu_n_ygZ&PUL8*nuZhPB{dfM<$K%9X;ql_#@C5Phc%t|yJW2coJXw4io+AD> z9^>^atN#i-PW($eUi?QqLHr7yD4v$%uq5$3c(QnDJVpEtJjUy2*8UcFoOl;JUc4`! zAU*<56d#8tiBH9o#TVi!;%o63uj^U+zrf?fcjNKmhw%jQvv{KT6+B5iE610~;>GY3 z@p8DG&$jmO;k;oc&T~6=G$p}rCk4b;w)Gwtc&Uh5dybV0+~pbE$^W62coKQy19!WU zF`PGS8o0}IezMQgS$qn45(0O5R+6Vr;Hh0*&`ZRZY58qz+IjxkNZ3Y#H*1fE^ybLw&W=ncxru~^?7QE$B`#K zaF-`Nd71|9^2GP{c{+>dB2Pl#F3(Q#^asT})AJcT^TfxA3?$+IPJM+t1e`^8)1DS^8@UC47f za7WczML(kDvUnUGi%b3U_4EHN$&)GYRBGYl3W%R??maGW*PeRhDHph-T*TKBkKyrw zyF8W2(=>2L$<3l4(b73C^;c^9ov*|b0(W@|lBZALj;b|{eniX2xYS?Q^TgqafxA4Z zJLBlr#K2vi#KzH&Xqgq4`s;e0!FW>OF3-_4K5j|isd*ZC|44i=o*cN#vzI(u0(W_) z+!g(Zmi=+5zpl3@5l;!+<=H@<(}BA@xf(`4qUCa2>aXj0VtB0h|GUdGk35+IcX<-( z`M3h&t?{_PU7nZ8Q!en-_SE%xYKh12_`qGB``O-`2JZ4C*712dix0*V0(W_yO5@}E z1n%;jujPHDcnY2vxXbg9?SBGKtq<|D#9QM@fxA2d)A;o+2|TqvcY6OwJccI+?(z(# zJzD~Id6Mt&dG?F1#8U!yd7g21ik8!XJ4)=3%CXDhN#serSU?n#Ci>^{jHf-BO8EOZ zcX<+sFCg9;j|<%8Ip^*aE#(4tl+q)WW3|NN$rB&A%hSa*AX=IR?()Rlp31S#;<@mI zz+IkZXA|+9LZ-l7o?XxTxB}ww~NW;{j>PbY(FFMxwySwJjU&mz+D}FX9cGNckPca?*DLEyc%vFkhJz3u>B0<=G^7U z<SdF19y3Hri*?=%f!G_`}LUjS>h|npA@*ula|6Q3Ebt0^J!wqfxGy8#BT}Q#V2#z zzF*vqzf%Htd5)3)bl@(JJ#V}$KA7iGvC{sIHZFx}Po}_Ko@(Te3*4=D4_+>C7az~_ z{95AG@c6)8p7N}>Y2Yr;dCqrq7Ps>t34yyjw~?n$;4Y7yXBjCzh5U(uyF6>7e{Pu= zcxs-)-e-wV!IJ`ad0zM7u_b}KJSz+OKYS!^=M$3ycX?hW&z8Vb^AzxZ*e`D9RZ{|Y zQ=lAqP6zJtOv&y4a9P}5_llMAf46z^IC(M!?()Rv@P8;EUJZ{6oGF=lhjM|tJcD^% zsg`&G9v`@?4-ZGNrh&UW_CA=-;wv+zt~QnsxXV-DbF5F`F3+wE{tqL??R`#(fxA51 zj7<#O<+1ZWv&56w|0D(O^3-KMFA3b`iI4udB{^^x&oIWe1n%OK-H&M5FTN5_3Ebsr z!+K8#?n3Q-gqOwR$P?qZ#^%Yd^ea=~E;QxK=tr~^5YI)PxWHX|YLKT~;Hm8yo|-0B zOFTxN_`qGBjpS(>xCQc-Xt{pU?r@zAevN{}vxCo{xDvTD$=J)p6nn8Lt`QH?jX;CVm^o2_K2am-MhvJa=&qTgA8Wc(GUf zyAmF3ZdiRX(%+vXz9akLbK>KfXn%=6k`7=yu>4=qPWKrfx0t)nez?Vb#>6e=@7?5Y zyU!rF#k?H#bnk%={Xy!}T)YDN;cnu8P@f*+!`zCZWsvwF>hrXCR^A`*f_M)bckzFT zUnpLmjp;-2IF5fdiT5DSF7Xod_mH@~@8>7+;?(()cvj|TdfH|6&z0ZT$3CxTUM0@^ ztr9<=xc7?cY)7@kzou@D#UHipUHpA^Y!8d)Xa6`t+_xqcdrsWO(auj>zv{9*&Xah{ z|BiT1_E#T?Utz_Y#e3r4h}UDjoe=NKxc@G0?f+N&2gWNK+okm@J9R54zJvUg#Cvkw zTU*@Ld#||Ft(|yo=2e1tbslhriZ38OQQX!$N!-@ENBm{xVG8c1dwQD;(eE=7Uzq2K zm&D_lhv~Sp)$^wkK3Pujo8!C}7C+7Y(>~{6c{=j=XrJ>i|Aqs#hLY!R_Rr15|D^q$ z#i!H$N5%hS`yVQ9^LB#xy=)iL#M7`pNfJNK_P$#@2an4~#4Z0>@v_v}K8IoT>CfYD zZuWoXbJ>n=6+gs$t0;b)e%&Kpl>JE?@%!*@;!jeC{^IujgpuNJv)>pm{w&+wtKti& z!+i0p%#(M;w~}X__*u4#|A{ZT#n0P4;1B6JJ-n5c|WX;(sz9I*8xKcGO)w9=FduSbN-OYuz$J z;%(lJ6TeJ7r-(0OKQLF^-dDCvJZnk6-nHUi(O>)AgSE3A+tqG~&+h`F<*@iMwy(3| z_W7>M;$_&5GGz4mY`uG#4|&A{p)@KhAucAif`;Ccct+k|h2E`~T(Q?z6sb`B?lSD^s6p;x_;7^A?uhp3gLs_+s3)&s$jha>lEN z#Ghnd4i;}sKc5lb&hhq(;`^9aGsS38wWwC7*(H@TgSd{)oC>`w}cH|6-StoT+x>@oX1hvhj<-R_e3I%WJFT8TH{ zd07|nS>)*>Zr3{)Ccd5h)mZV1>`z`1uSCDz5TD9;EfqgN{tv|$5Wh+M1LoT<@n;!d z`<#c>?LqSYB=J=kuS??j$)BF}Tb^{}&n@1G6F;|#Pl@y6Qc-*$^{*wKnf5dm?@N2y ziXUTq9~R%od>9~pmG(a=K7#o?L3|!{nVhc#(ul5#J|scYAW8` zj*G+l`VC~F>@0R#su^m+sx47Ek>lw#;#S2lN_TskxOc1yCso3Wato=<(N0TL5#!CEf zZch{6#QyLt@qHY>trQ=?@ysXU-;rmV_zLFdcj6!6$HhO#?S1;zue6NcKNA0_3y7Ai zJPup@P#y;hh&N=Omll7H_0|x7oBlQwUyHX8Kgn^(1LA$~N5waAd#HFW9*-u7Tm4@Z zpF#Y5@e}yF;#H~RI`KOA|HK>P--_FM?Q@z||9?1c{6*rwDdorOs`w0!&ofdd%TqnA zkH1-bE8|#P`~tVDh`&o6>WbUvSz3u(`yUjy*Fk%WTfd$Vw|+e~GtM-_3rzo4C7XkXvj!v-VqkMoRo>+GFp(v-pg0scB-bO8hq* z=g$|v!hZW*@m(B;uM@ZTJN-|5BK`ed+}e3U{4d)7yZ9oumw(0MX=gUpWBqO8yBjMg zo}2R+WyNj1_WldY^BKn{@e)6bacL<&ll_CeE^2vlkf*oAkLS4J3Gt@1^I7qA96wAJ zFG2n};>EbVMEnQpxkmgY_Jf~`ucf~`#qIpjA#tnQ8S!VChZn`;*{;*m538HS=Mo>t z<9lWCPiSWy+}$KJ`d{=ubBVX(k6uCiAs0yf$4UHf_AmB+6PMq`+v@=tP5T(Vef zz2uLlo!?9RT|6G0l=y_yEYbU6CB87n71t!5@K^@+D^|}|Y`+D>Tl08YTl^}1zxY(z z-%ETn+vfyv`#kMj@i81HpA&CFo$qCyTKi8jZzhZX&f~>u@rOB%%E0lm<*CT}+KTrl z|I^}4xV=XFW#;V>@y9stdq%_GBV%haXkXqciV>yZk4jf9IJ0Jp(@;Ic1LZ5obEYhKS!E-N!A%;!=O5 zzMsnFY36UnMvITa$BI9JKOcD5&MASr{OgH-O?(wTSNsb8ruZ3rnfQGi7p)L)gm1*H z{!_m7iMEU1kM9fexPIB|*Czva{TfRCpTzBbW9M*N??m!vDjwZATFl$vd2k!|YV03M z1n$;ruhW$gpWqTkOGVt>bn|U6`R@_8I<&%Vz4x--Zh^b?zUvZ3i#`8#@y?gv{fT!s z-FhwmFkJJG4cz6~{fL(2z+IkF#Ovqv zn&9Vx_)<~49dBF<+~qG#{+uPE8`0wWdn)?(I>#Y}@HCOR^|p@wxuu50SLOJjzQo6; zM#o|uB)&NNr38tuM*K*Lf1P%ZllVB|=Sch?{9&=g=OX@7iGMP!|HD>^Cp`9}#5bco zXC;0xcSy@|kkxY~k89Zjck}0fD{Qpn7T<^G3*3!MPxkM{19y4O5no#T41T+KPWoF* zJR{yj{0_XicqQE4$74KcZ!N;4aSx#LpIgADUrz&{ngjPDTt8$XD<;d85W*VJ~)Zxa7H z&!eve?%JPBJ7c9%?{@uha|50MxBT|FlSjNe>un@H9PcFlj$d7DXyEqP?*24AaMzTL z#4ix%a3l7f_(|e7h^M1HTgB~l>J#GAiT_=EJ^ruw4|w*{z5%vf+kUVB?!sKZ5`0*! zb`XC&GP~YOees=)OAGN+cx&-v_`~9V;XTAJ;g5^k=K+U{XHDlj@Dy(S>P5fa4dUJW z&&>Y+Ly5P~o9vKy+aK-^+|{Qz>pdu56#p@Bw_e*H{v5c=Q=j;0Md72>bqs{?m=Ugvqpr-8dX?_`L6M9b&mPvTz&?(*3F z@SDJ09=raqy-&gVHT5Q!J^C)5Al~J%{o%>LU7mM||4DoaelBpA$M%Q+irfA$XW1wy zT5LPA{b4b2+aFdHPfOhzif6>{7te-2EdDa{V32rG;>U=Wz+VyPW^6wHvF+ua4}3pA zka*i~Zw%bkc@y>AB)$>fBKez#fxV2?P(}p5N|482X7&M2i`%v9sYp${dl*)!#ea1+|}U; z;`@s~hChK@orhBANfK}Gznm&=*RP%-d7dWEJK~%1E#lwe$Hi}b&)4}^+{V2HK|hq}Z!6|af6!EM~jQs;gW{~+;?iFd|_N}g)unJ7LCUnKq<{+aki{43nX zy$j?1L*TBzeQ3`S@t*i;@t5$Q#qINVe~Q0D{1x%H@w65EINN;Ng5M-=*9p%l{v+{u z#Sh^H0(awXbtoRVt3%q%z8$5-uaTz;ZsYzJb#5Z@#mUoL+^#p?TJoe{>DSv!yeKE6 z#)?;@Mro zXjv?t5r0p-GQL{8JibBvK0I06u7AE&ya(|+#O?jN-vl1k;fKIo9iAi45%JOZY23#B zE9#u4QffJ_&y$73XAplA&w^W?Z^=_kyaVG~PrN7IS$sJAf$q3lo$K#Jw)e*acl}*O zdxneK`;5nk+xv#ciGNL=m&GsOQ^e2Xv&C~~^>v&po*iE-UIAYwUIu?J@URXa1@7w5 zocImmjq%O6jn^3Jd|2Z95r0&?4}MDWOeD`W@f~=<%Dx`9-h+5e@iOoGxQ4hZuj}tj z#{K@lU4KW?o_69-;9bS%;0fZ>@xJ2g@qyy@zUtxP`-vYV{w+RE{5Sl0@n7(l0}tyk zJ#bftT-l-@(K1^+8@>Ry`d6jS>m|Mn@f*cU;a^CeTIBg&yeocDyg#0&if^yAXANEi zw{c(0xK|3?_4jt#W7m^&@y@H^wI#k2USGToey{il{66uacsude@J`}W@C5PYcu(=A zc;CR?xLX|_58U;4JMqKCx8P%NtMjMSd6vW*E^hBr zuP@%0_`Ae=;r9ma#@*_0f8cK1ClcRI{5iZUZsXpPIuDchdBi^{{sul;@^mE6H1SdR zO7R!)?c#gzeYlOgeQx$-;I6;+e)^xpm(b2X#CPJC#JA#h!8Ds!=kWB^{CqJ#i)R{=&tUP&oR^yl4Dy<5Zw;k$2;=cL4sA^s=vr|@%<=Lg#JkN9`X z{rDED?ti;_x?PN2pQ>Kq?j?$I#5WT!vcliLn|M4vRQv)yQT(mpt<_llpy zn@gTs$Ya-=xBTaa`~1&H{J;2Y@tz}m{6gHueHr7vI&jzDi?nC0`0seKc)`3r{}1JZ=}zBo8<)P+ zxfpKq^DW{_iZ8&+NuI~aQ(yd5{6XyK%QV>i89i{~C z>hJ~euZe$(&&93&d7kuj{!rqN5&yCHkN8H(QcT;v5 z@vn;4;qmcp@z3xL;;q?F?iN3SpA;X@_VtJO8+fdq?}ycGg>8@G=kOBZTd7aH_~xg4 z-j?Er@GiI;U-vw^P@Io{RN_MqP`}Th!o&(<_UJn0GycB*?yaE1`cpdzTct`wi@wRxj`o6ukeLaTf67PqX#BH8r zU_U%Ohu?oxEtT+Snp!-MEsq=-Fof(+lPU>JW0fVEdDy4 zp@Cn&)xoZVnpu1!@dd=~^XEmx?epr*#9J2d>uV|A81E`R7Ecf#iSG^E)xpl!r3CKk zP@D3-E5Z<)26X{^A9`iIPRj-1yXA*FSGt8UIlH&=wz; zjJrCx+0&mMSKhkSv%I&>hukhonRpd_9qiggbyFH`?|65o$J@#1~(#*$|` zc^(u$#W;=-zlcv0@4nU7WiD>xUWsvE9=Pl8NZPYX+^*yBvG^R~KNX*je<{8m|4Mu< z{;l|ae82d&_>bbh;U~m@!G8+ejl0$1kHB3Wa^33dc1hg!d$C6T?`fi6ZrpoQ=YqIB zUX&qEQSnlE3CS~vu!P|?^$L)K7tv%=PQNem$f9>-KF9z=Vdpqr!EM5(tDc%X6 zBi;soOMC>rM0_Z|O8hl^jrbJ&Q}N~a=i*E8F9Q$juq$v^hwa3FE4~Fkgj=02Q|F5k zf0X#E;`aUS|4N=T+oF|6%guND-&s)s5Z=`s3{8@4PzNhixRf(T0-Wq>Zyahf-ye~dqychmf;9(t>2kz=Hk@!{O z&*2~AHZGH>^B#$xNBnoDrUR?Y| zytKGoAE%1=LgH(P&&Tf!{6y4#t3!O?u3w)Me~yK#c%~iEN}^@Hcz%4n_#^mc+{S$Z#^_)+o9B~qIcJ1L$H zKPO%h|3loq59peBQ{rPy{CaG@-GgTqe+17i{t%uga5wH&hr)roI@t9Ui-|u?o^rU2 zdq3)Ym&CtGd}HzX_a#mnHo1n$P&>Toe|R|mTe(N*!r z3w+x+ZDe4Kb6ytL%GojeW1$KoBtr{Y7zf54x@ZQSkqcwP$J^>-xg zc}4sQe3tke{0(ut&e0u2BW~lq zI+M@yXyC^qvgwp zyK%QVyc_uO$PW>}Qv5r79d6_D7Ipqc;;#|EPy8zWgXH;uJQu{%?(*Z6^*;aG)|(qI zE8YdKj@!6@&bZ$d`0*%ZmeM|NWAU5tR^pZMw&G>+F5>s&4~sX)`-l&~2Z;B%W_9};-lh~^PLLA>TxA3sO@b9|+Er)@rdv-quFc|Rb& z9zQ33?{*)5P5dD|b4&kstIr@jpZHw7qWGO0&&P}ZhIbHO#qmQw@gY3Ej1jNS{^T|B z-|(g4H5tb*#e41X{rpz^8T?1_>G(O^&BIgCzvpuMJXa+?mmBX@nzDX~%+eqRrp&$loxU*I;5h4cEjuO+@3@h2qSK96=jaJTpeyOp5n*wzLLKW`G<=?jZYPyf-e@Yk8c!ji*FZC!jI!N ze=hO<)pHUbM|`d}Q75Ct#`h}yx>@`pUJ$o>mL^a2ApTU8Z#dg^U5QU7zOBUDynQHe zSN}ZaT|ifxC9v z_Vu2)y^rWq-0Iw%$MHQ9Z}awW;I6-qvEHNN{qR$gzcu+Ui1)?wwT+TSi?wG2UiAiE z`v%@o{AcFFhxWWHZr7(>Ctip6FLB#mrZI1KNc>9H z`)d&I>ijJA{9XJRJWV^_5!=3I;~B)K<5_U4^BUG$F^G5bXEdMVs44L)=~oMhx9zJ_ z;I93vSZ^2c<#1^#T_@k~EYUBw|5=pZ-_DL({4U0~l*HTPQPsfR zcs<2>ZxbJZ*9ts*Ji0q@muDLBO~fbR9mPMu9~6HV?<@WTK2Ur&{KF7l_x#7mMGCuMvL$UnkxH|1|LM@#SlAdwe-8ZjUd&ireGMKjQZIlB2V) zQ-6rw&IMN_+Hy;MjuSVj1@FtQ!FZtVxH^8Thx4>82z(2Zy z?-eh6lkeAWfxE|%`krHXAMpRNI#eeFGMEnK3z4#mWBjPLZ$HeVA z`A>=4b>=6E53@Rp&%qbsHcwXayl<(*C$sZ(Xze9Yw2mLs@_Ph3ciDwkwgy$2t>)RF*x9j1S7r#WF%Hn_EHE^3(TdCW~ApTU; z&L)L?yT?j=GUGB^;%z_wR^YC`*(>=xOT;td%L8}owdeI81@7{cCVqpsU5EE8@utM@ z6mNhZ7Psr|9u*%z{BPou@jt~U;_18ie%rXei)Rvl3(qgU6E7^j6|W$E4zD792Cpxk zv$AjBUE-PX*5Xz1_TuI79^%dM-r`O00fD=5xAP=Vi`(PQ%i{L9^M<%R?z}5*k2@R0 z?Qv(jxIM2qC~lv(`dQpQZ*>i~?aQunp1bS+&j;6Ed;BUAxSQwQs85{ugLrA&@-NTq z^VSeQg-;Q;?+IOc17Cdu-z9FJWBNJpY*AC_ek{jBzFpSNW*pBK7Ps#yC?{_3C#)%M z-{0Cud=dHY7vG9^6+e#m6~BZJ7tc#Q$BCD*I*T{L=Zbg0mx)iq*NWfbb|BI6g?LST zw|H~>2<~pW`DX9iKOyleGrH{2^*_4#-!9(G!y(MKEaG;Z=z`*QednU$c0K4y;`7O0 zO?(dCP<$#n<;0@x1sv@v8U& z@rwA{xNTqSY#bl{|8aEVp0A|8oeQ`4!OWAg5^s;=H3E0@p)u>dL;NnhZs2ac_BeiT z;4aTY#NQ|034cKRY5XB^yB_oa@i&McEdDA!M*M&HIPqlsRq>Pf4Dl3vk@!tjef^h; z$MBED%itTtD`PQkQ$g^W*KsTr3 ziraC^a&bFu`9$1~TXu-samyibJ8t@QmGk zdo91kml7|K)yKsLUdmJI&+dVzjid_kgTx!*Pm6cJUl4yBpCSG{zEJ#ie5Lq@_$T69 z@onOV@$ba#`yh^s-{cM+qvbd8g7`n;cj8%l`1@J?+u;So`{Jd=$Ky4`=im*+*WxY2 zci<0*-_HDbRD83|L-AkmMDYwcd_O0N--5p`UK(G5+jcq*Um@`c91rXa;@$T3DBIB< z@$UFh@zMB6@lp7n;xq6o;;-VFdis9boA@%ku=xM*V&b3URm2bDHN+3#cZpxb z8;k#rw-?V*-S?-ncouw`co}@AcnSP%al3B&JL0v7UoYMk-zeS+{{pw|s}J*}O|SnS z7xy^WwY0z81-E#+ZvW#FZ^xO>1b#e9(TDXuE8Y{I5V%{f9cNAr+~r9me!BQ5{4Mc? z_!9Bi_*(HD_!G=m%`IM>hsxlbRV8k z{2n}?ct5<5cu%~%_=|XD@#pY*;&y%cc=5N0zh8V0-cEc6-b4IPytnu-_z3Yr9Jo9s zo)1q9JUkAZB5uclN#b@KxKiAX1Czz=IB=J^9S5d}+i~D|aXSu-_5T0);M#A;fw{!( zI51AUtiOA#ns{YA9=H9#(p>%z9VFh4>k^znbU_FMd|;_vf#S|@OKDDb$yd#r2VX(KyKd|&Y^_;B%TIensW;$`tE;&t%3 z;@$9N;-m1j;*;<%#Fyf`#W&!G#rNZ9#n0lG#WONq8T$HuTm1{+dBpAe1&WDxA-Z0S(#(@~;w|w6@m}~q@nQH=xZ4k(ivA@Q^!J?wGnLp+1{`}ob`U*iSD zx8Y^QPvRBDkK%R2|G^uGU&Z^2=eynaXOMUv{0ZFVd2Po1d=T&E$@}?yo~sf+g?W{` z|Nqy`#oO^!k-**jslq9cm?rmcop%B zcmr|!9*aie`R?%TZY$mZ?@HKP+CB_@m!eJgSdSU zL?Q8a$#bjtTX+?5`@V!4;#-NoGjO+`wDZ7C#O?UMleiuK_ZGL~{}JML{QtbT9skc1 zx8wh}#qIcit+*Zke<^Oq|KEw1^}`W6C0-f7B+kQQEaPMUKaQ?$b{;stcti4(6>p5+ zDc&4!B0k7HutrNKao%n+~RhsQ>U@5P@N{{f#V{v-ajxa+f9){6h)IrgRaANY5; z9VgVv@Bi?##M|?!OM$!na|-kJFY$wTY>>}q`RkKEyZA5oi{k&{3vb}dZ{XX+-^%0L zaWe2y5tZ{C%QX1E?fiuJTg3O{CB)C-RmF4V^?B=wSHhc#H^e)Ncfosz55WhEPr{!O zUx>dbz7d}(Zr?}qmiXVquM#hKv+wVx;#Kjl#GB*$#UH^>h(Ck>EXuQ@k{t^;(2fJdE>>)<1NJ-;}42=!F!93!=Dhh@8ft@d=2rF#dqU##82T% z#M9;T{aPbl5dU1fCcaa=Iet*QH-1L^8T_L7Y&_lL{~gEo@Lb|m*>4mTzome`e+BXS z_?_Y%@O#92;cdi+;oZc?;9`SK_ zKJhVlY4JIDdGVQe)+heEUi;pooZ{~hf2;UbyoC5>yt?=iyr%dM_&wr(;!VYW!&~C^ zxcn0Hs@t&t@0Xj0uNU#R``{M8vViw75^v`RUJTsL&#WAfO%~6HPYc|w*Uk^j3*6-? zN&EuwqWCKDczlg`J^TytcK8V;ci^=c|7u ze!F;P+Vj150o=ZC)B1Zmet~$a=V|u;S0zsp>n%LOH_Y}MqiXxx#l(l>Rm5M#Ylu(A z@5HU0i&$^xAl}WNt$F-6+ z-+;d({v-aL_z(D};{W2Gi(kTbi{Dzu*I}=C5&XD#1^l#l8T^uXef%%++IW_ce*HF2 z?EAEGh}-vX=={ljhI_Wb>BaeMyWPTZcqcNe$k??Z5#hc_4Y{h1)~w*Q$L zcWBdEIlYci?XS zle3WjgMI&-t+yopti<1ePZn>7&k^s9FA*PwuMwY&e=hzezEgZ1eo%ZLen$KXeo=fF z^-n*_*TeRoGjeeoxB4U%^jJ`K0JEuwDwgLt=pNMQeeRN|8umn#x)`~UP$`MTPE zDN9{{JCpcLc=o`<{eOYL-8hyYzKD1cyrOtRysCIzyn%RIypeb-yh-5U{)!ytMij} zef(_kVfYg9srb9%lkiozjmz}HKL0mCyqiBK(|JEA@kvFz{~__Vef<--YyVQ#oAw!h zKilpW;TZ#W>+PN1$K?s!<=IGlKJoQ<;lN!UyPj2C&jN7;@qt3m=KgJ&y{~CW@{4hQrw|Tgl@m(hI zQ;7ded?vnA@>I_1st}8PFY)JzKQHk%PyPzr)#n;@_*eWgo_=)dYErM470#2KfxA3; z>iM|5;yLgFfrs;?WZ*7O1>(z!m%=Lr9?p|mfxCH<-!~_ASK#41X%V*#iEpFe_ z*#WnC@>dbRg6`s3i+Udi;u!-XMM&|4KZ4{pe27vNJLDSL*)L z<*^G}92PG^{88}&_$l1#T$^_0c{a)!E!MACS%13-Zt;oK;Wml4?Y%+ZuAMblZzJ(4 zc$2`x?Y(W_F3)|$cND(|?(@@=7m9y{FA3b`vE!N5fxA3MiC-(8f^P`i<+1(GSK_w+IUsKPpEG!xD2Lnb3KWZe zM9V)CZ~LFjWBuQ4zx^xi$u9mAo(H%5Maf@GycXV6+`eb+A@SAB+b08e`;%k7d9m4n zr;Y3o@r%WO!&i%^r#;Eyo$!O={qZy6lkiKpZI7K@L82w?IRD$~Gnn?2#I4S08u+-f z;(t@;JH+!7Uq?JI-b}nI-cr0G-WIoZ4!hO2XH*dH=AnI#>^X@~ZtLUcNWAT5766@Lid9k|P5-}`(paF=H`@hRey@DqW%JodfM=L2_n z4iJAqd>4KtaCf{)*d~*+;3egkBD!_PvbW3%c*mk@xHLOpE*H% z2JvHf7TofDNSi(nQQIe|{uzH{UA7`#inG%i;Y3ck8w1eZvBGd72XcqcQ>sM{sKUKUYK11@~LjHx~weSt%t?<3#AM-xTBe>gsPelJprj33? z%elZ^J6qG9Kg3($*Tje7v5BeauFr>ocxLfwcy{qu@OrgXrSBKNY*A_pHH^OZ`Y@*IxB>o@b9~Qrc_mn)_$@8T6FZc}cYxw)(@x1?X18(D9 zBds5&?SZ@gX5#>5mv|=pd-1CHA@TC~N%8yfpTwKve~1sjFNycTV=w%7zS;L-rx&;H z&(0jU8+WTip1@rl?E9_ri6^n%Vz`aFy>Gad#M}2<*AxGUJapgy)z7)@h?-DPEe=A-JKO}w^ z{-bz({AA#6+^r7h0zV#k0`Y%{cg3&aHtsvA^DQs>{<%84Jj01EDE~2N3#na(= z#pCe&;zjTxfuD%-S{+IU?$+Co`10a)@fx_zt1Z;Ixx}|8{(kYcczemSn>>BRZ-3lX zA^Lt`@w@SP;N_td{@qNscB*d19wxQDe-5-AIJX@pN8js+2^x56l6OpCq5sq zFW!azJ|I3G?~S|pa4P!Ozl2}k;}XA;=P%QOcsGvSsq;+nZunyHQTQ_P;rKf7DflPi zFXP+A7vsCc7vM+4H{i#_Kf+JrR{u5Bp~xg(C#zeD^!|1k+~Ri;Uti+wd0f-LT|K{H zy)DGQ#@ht$#><{BbPe3)IZb?m_;GxYc)jw;3I9y;XNq&#W3jKri^QdJ?2LFd z{2%c{jBozQ{(jc}-|d}uXvioC&YPQCh<0J*97j`(}ne}6Yqq78hAKww+8O=3?Y7p_(1%i_zQT7_&EGT z;NiSIAGllZBH}NIzlmQ7Je;=~Uh#FZai13FZx_OCd~M#A4&1GG9r??PufeNG{#oR& zEB-FtQ9K#%FFp`|PJAprReTq|6t{k@&m2o#K9Km<#P1E_U4OUJukXdT;D;qo2l8AD z;$8i#=Zb!~Ws3i0^**=_H;F?}}R;F2<#X$NGxjRNVV;@tg5iZs4=ti{Ic46Cfk-%NQ%G3Th@zQu{ z-16T|{u<(a@V??t|9%Z9+yMz?1Zzg2uWzE3Aix)+kU%d;I5wUv)`PYj- zjPDa4ik}rPil?9M*Kh5wiRTs{hnL4~{`BT}@^*=j5#J$*cl|9%za9`ThIf-Z8OSp{ zhJ`CLD=|ud;;%)FOGkklk zZuUNzTLO1^77$-hd^mn<;I2LPKAG}?yFAa|=hs(Ryf0oOaF@p(e;bM0<8NDWd;IM# zZjZl1#O?7n5x4nvW}t89bcwg^B`I*%uYApY{9EFA@THRfSMq--p6fB6XOsA?_(AdK z`}_ElxEn|JykNiUNVHrG+_keZ?YSym9#1zj^={YC>$b)-ir zF8%^uL3}Y@MSKB%Ti{_G>ILrVu$lOH@z3yPxXp)0sDF2fKS2DW;`{OblBXYe62+(X z_jR8uo`kOxzld+bZQM^W?t21v{XI>4z7s!=r-)~5;oEauJOloVct!j-@v`_;@uv7c z;`iVgXZiKJe!2EPf@c+f2+tL`8+WTi!N6UfMB1O+O*|*C|^ zdE(FDZ;LO)-w~gWe;}TWe?!{*-tv{5kP%_(bss@u}iZ;nT%O;Pb?%;S0oH!QT$tjl0!hW#Fz39}xe6_zL_J z+{Wt#>by_l_Yi+Td^dhX^1Mo(3*z74x##?Mp8SYc7BBahk86tC{^tnmZ6SUL?{I@W z{UrVu;vW+~gAbKFw>{?DGeNu-KII0!T)aAur&|Lr6{Q~R)5p#R?xuJv;$v_402{BJ z_$}fs*-usw--9<4e}MY8!Cjr*^NxGU`1N&?_$fT^8yUpA`Ii2EAOEy?41YsPN-->~|dOps2tBOB{-yXPIuRZ^57`V$bnfSZKC*pI(SL1JrFUMDkAHqKn--B-w zFWlPK;Y;z{_z!`*e%bT!lYzT_jUoOg@n-nBz+E1DK7KWDmuGsL=ts2tBR&*QH_!jw z#>>vn=Lp>8xudO*%OhR`&mXvJkDZ?{5xC3KlK3*>&G3qWyF7M&psu)`A80Oa=Lfoo z+xdb1;&y)EDRH}g(M#fX{oy&{cKzXZ#O?aS>v7v&;u53IMawRUxAPA_1n%nAllmMH zPr#2${?dsqd-T0y;)4@YIaYMO|84Dg7QaJ$@zXvo9(VJ^Jud&rexP;WuANWOp7!D+ z@Q1|b;g5*V!uyGTia#d)F+M{42mC4V@9^iuFW?i!&*PH<59=@^a94-C?V=yi^166V zd?9Z2-=3Hn9@`-C<%mxfx6kQpmOOjOb3lA0<9JDYBc3KT8#C~o7PKArc< zfxG_RK|8C9*TCzDKY%w7Z-+M#e-dvlJ`8U!J_GM8{wn^6_iTF|C^YAf}=M(b0CjKM7O8h*&L%h!Ds8P}K z9d6@Zf^k0;xa;o<+H+QX8Gb>0H-1_CD?H7D)YV@Yl7;z#f_3!_d(i`F4a;I0noJEn%ka)|#!p8U9t`#03Nn#7kR&+X#H@!FE-2lBKM zFFnS$r@#2^`19iX@Ts`gw$B93eip_#r$W zZh20Vr=ob~M4z{*cmcepcoIGaxAvT6z2n5s;4j}G&zll|h4@9{f8xs|&pWhdo%nqm zm+lQbTh!GC>7pOeaw+h%(d~Z3XIqr|=j-qPxr!GTf0oC=JH&IbeKi-Kk0*#XW?Tk} zcg07EKZZXq{vQ5@_zQ`?KOc*af7bh#;&bu6xEo*hJUVw)AD<%eNhQ6fS?qt?e8|II1YTCW9$rzr7G6iZE#5%972Z<358g(+C!Qca8t*AS3Lh#y10Ny& zDn3Dc1^%M=GJGy>+xsHM<$MtD#&K3DKQ31#KAHI3Z~y-|x_CSPRwVG_k$=H@mdGAl;>O~-;M_*KCz4U zArf!%_UXV~d)lzxMDdpR_`uzI?Y!}nz+Ik!#J?uq8~;rFHGH%9%lNjy-Fof3?%u%N zdUp{2z4$u(aNsVFo!31RxXV-Tf#^rH{32cs|2=S*$F`%t19y4m5uawM%jS=JoxhIX z6u8S{+fnYoUHn?&ZxLUG7Yf|vvHeK}aXYVDTinj;HpOk;9_Sh+imbl@i^wsbGWsqJL{b& zo`6riL7v4DKbZJs;sfyaBu_=!^Qrh+;tvGwo@Xs|PpYEjufQFBLwv4xQ~!MZdDg?! zp|p5z)_b>jW4sOS>f;_4M&$K*yGneuQr@2l;@$W@PkWvfABRs7pNqdHJ{w;sz8Zg9 z{5||#+~!YB+PN!;cm2wq!?*u{#P53E`)?9&$3NEscm3VUdSlD|{cK)s#xn%&)@#Q< zxdL~2P7r^y_z^rQaF@r9XWj|iIzilCryeA3uTzi4Z9eCp32yV@ z3+kUB@k@#CDgHLzSMuy2&uH<QeTnZy{9WQb@OveXU3aXDc#Rigsmn-lyI$C9;x7?D54UmO$GEQu-1T=T z?Rj5(5WZe~2EI{z3cf}BLwvjV`}kh*J^1(HJMp99KjSCGPvd6;cjIn#xDdFjL)J&4 zAJKAIJR_cFmH)eq`)ky>5N`9ZIPtfN7sE?Rp1I_yCH@)SLENs7HblJ8i@tqN;Wq9k z826U~cm1tOdtMQ*h|dyli@zb>0$(KlIKEW;G5me;SMU$TU&1$vFTpp7zlCoJ+>N`{ zVRzuJ4qp<#S9}wG7*7+$y5szk)cKOcA0qxQ@gMNm`_Y}F#qv}l&n@D1eYYCo4-?-? z+^);k8Mkr2z_|Ab-1YY??HM3`3Lhq(qkC$zV^4}_!pDkN!6%4Uz+Vw>j!zSBg1;f& z6HgLPz!wGX#@*_$B5+rSam2qbJ_cWp+j!aa?7o%w*~IS`pNSumJnI?9-^J}ZcR5%4 zdfR$`CBC9~t(SaUP29#kU3%}019$y>oAxvlUx>FA--dS-{{nwl{3pDJ_(^<#c*Y*S zorA^G;ZKUk;m?Q{!N&&f#@*`hQsAx*4T*n6ye>WqxBA~jomWYG2jbU=x5Gb{Jok}j zr}$&|&*IPEnLhA!vi5w2=fQ2<^Dyou0(brGL3_%GKa5uupMc*co`}~IUyR3#FTk6L zZ^B!OZ^S!_AH*LN-;X~WxEpt?L!ZE19sVMIfcPbR7;bgmMx7^1eD0opJYE&giO-Zg zd&u*y_yv5cc)FK;{uAO&@N>A0d-3!>{@=h|e~ZzcbZh+IUA*(ccsB7ycrNjJctP>* zcv10gcp33Acm?sN@Y}>^;&+Nq!|Mg^#@*`BIPl|fB%Aw-Y}| zd>j6R-55%kD zHtzeVb5n_bp7<8xR zV_x*@ogOS6Mmi*ck#`MFD3Er zeT0ghTm4IO-m2my@LHZ*f8F~CjXbwJ^@wjOUJLIi-W~5E-U%Np{uw?@{A2u6@pbql z@zwZT@%{LG@!j|u@vHcH@k{s~@wjcne*G*S9X}0}?tPrHp4+;bLwp7C8F&@XEsuL2=MB#- z&v(Q(6kmxq@!ayb_i;LkyZ3Phh`aZ3Mv1%kai)vA_i+}9yZ3R{i@W!6_KLgraZZT4 z_i?U@XUP~$E;t^E=L|WLa9#KZcRiV(BmDdY++Ba%`#yO+w>od7o(07>;zcF@67rW9 ze}YdGPsKnNh+oSVj&q0Sc3+NW6A6yrJ-7REjO`XT@pV{lwY_*-|A*R!@o$J9 zz*~x6!`q1ef%g=T)h^86S3Da2k@)lYDDkxTH1VSN4Dmwv67ib&a`CG87V*~jcJUVY zLGgk35%Iow+>K#9+~c%9eHnrx#f9C{MX_S@I{_m9=HElFYfj~d&S-U=Y+W1 z|6CP!`=3YRZvT^Tb1;bDaF1WN|H-g9ivM%B|H&)v_CKY?-TtQr?yeg>bA@?ZNW9zU zboAWn8M{Lm-$gt+-V=BE`;mXB_zwJ#_%ZxX@k==)QAg}8;qR8;_BqMz55e)G=eEyz zM0`>4BrisVN6L$5#cPYdiZ>Cjhqo7Ri}w=mj}H?cgO3xRjejA&3SS_;8(%Gc65l5N z4}L&A3H3iEo)y0$UIKq0ULB9MHLQ2E;5X|_b3B>&MLfUwUHoP7=W~a7tK#v4Pwnwr z>ABGBNqiU9Lr*W>)^l3cZC~+}_($S}@KNIV@M+>z@fqS3@g?Fd@a5u7@h#$g@$KS0 z@q^;y@gw4+@yp`#@hjqU@rUAD@F(IM@Jv61{c-D|Cw=(@k01QC_3>P+Fm8gxznVMr zg%a=fE#G-={XNQgzZXA*Z}QyE>-H_XJ-0kJiT_Fb3Vu>N$-Ch=&x*&#Z-^Jf?}+EX zV{8la#Seb7@w|n{5wC}*67Pkl6YqlO68{v>Cq4=EpMu8}pNJe zKUe&3e7^Wyd zf?=Zd67Tl6J3Y64)uAtY#B1RDCI4;mpA@fLC_Hb;ec|tJJk9W0;z#m_aUDEQ7*KKh z!r&O?x$STNB7TZ^^a4@gk$K`N@fG4P;hV)P;rqlJ;m5=~;g`jS;CI9);?aHz$Ls3z z4W3AR1D;;|SNuisJ9ts?c=W5hcqY8IcoDpbcs0Decr(10cu#zo_$T-{@tOD+;#=?q z;vHC5JH&h9zlwi>pAlb$UlZSsKNLTN$Nf3%qr1-ZNDzLW9C!WAoIms;Uc5c--suv0 z3GrrlW%2%ab@AT#TjHPLZ;Ow`JBcsEyNZ8}4-x+XA1=NL|4jTi{<-*Je4hAie1Z6N ze64tbu3BKYPnQ+&~?DTP|7jO5M6zRgeYbE|hq0skByxXrH z_uSS)NzQv(yeNLbb33owuU_}u^3)~%ws>_s`hjpT&Ux4twX zew6r|_;}APkK12O_uTTlNBm6jPWW8UEsxt@trByT?~GHafW_-tAk8dTw>--z_R_q@;LnybSL0*Cu~;@j>`!;^Xme z#LK-Do_Cw)wr{CwCkl>Bp4+~qA@R4x-^C-pMtxbtjdKwGocJ_6o%m8br}$R9i1=@K zIq^U6TH=rK#^Ol|hvRN1J`Eo(z6u{NejNW&{2zP~?$*y4zW-;n#Gfe?j^~IMZ|h`C zx6n_Be}Z2TpNC%;pN&5e--t&)7@o&nSHH)TivNbE6#oU!CjJ-xqWGV9aq-yQ!*RSU z9v!bHo)NDlo(_KlckA;ab)MnH+jXX9yzsp9B|dYJ&^JrGyASU1-1=37^ZqPe5I^X- zo!8w5PkU~8Y7l>3yb^v(yc2#;yfq%{P&g0Q-wAj;@lWtH;v4Y{;%o3c;>Yj;;s^0E z;`i|K;y3WR;;DLs^QkYM1aBo?3~wi%AMfP3^~K!>`*?1BX-ND4@i*}IJ-0mWJ~+yA z%hQ4QapLXpiJn^?cORT9?(T!D#NB;xySTd#9ujx=!HeSVK6qE$-3Md+77Q*p+~ccX z(eU$hxV!GT`(Sp@tq#4IZ!Ym}cz(%0nEWq`FTsb2Z^0*vcP$c*bEW5YA8gEh@Hfxx zJ~)E-^WxL+8{$jw$KreN_=m&uySkmmQ;Xlivx&!MUWLTd;$_71;nl@o#orX)gZB`> zh`%qMpjbGcG2(gf>9|`Dld^{KUrT%v=DW*_w{`Myk1&3(_y_oL@mct3@h|Yd#Mk1t z#8=_bj)doT*UtlZEb)DK3h_ViG~yTWoZ``XhUd#8{+Reu;%SL5Bc2kkfV=gUvPhVJ ztQT+Bnf$3kpCa*R-U)q~#Jl_8I?t_N1vu{}@jUo8&+WYKKKQfemZv)LzlvAHQydM) z<<^z^xp0|0w>)Er&nn&lf6;RrkNdfBg*}fNPp|NN#l;KZr9HPi?mkl4bIY@f`0C=@ z@H(Db9(NyU?78JROMEl&<9I92Eswj8^bmLVk>TR*KJuBkyN}Gq-MXsVCOAoOtd@9p z{om@jjXzrNsPM=R@yCpRkK}Ja{v+Z&@QdOf;7`Q6v`HyEF-?9 z_#*sW+^vTL)PK0dZzujE@gMNflIIwCW{dZukDJ6l#*d3%!Y|^k@9)v~yPn&8e`P#> zi~o#AKOO`Hhr2J_!()rzz!Qt7>Jxr_a`9w%2JympX7K`eF7X<8KJhB}OP*WbT^(Nb z-0JWy@nyx^;#F|h_bb%7g~Y#4d~5N+cn8UIhdhJDleP})HeEb3zDB$iz7=1J`z7Iz8pU%t-0F~n_zdD%@a(wjyZbp0WhA~d@#V!!;guy%FZ$I){Byjo_&j`~_@DR;-1U7l zeP8Oi&9@rkSs`8tUoYMX-z?r1|51E2zEAvP{5SCh_%ZQ$_<8ZI_+{~p_*Ks@2J>}w zxa+yq;XLtwi=V`!pA3S6!}a|(bxx1Fbr`*07?(*rf@hOFkH}L}ycE~}2I5umZsIfX z(YPB=BF;NrJRv^y|KwRJ@fnC;A)W?bBY74wo}Jy>3rBgCo|d@OM|| z;>4#D--H(se}#I!CjJB7NPINo?15X|Y~R=FVS)aY_#yQ} zPjx2z-JRF<_XW?bZo8>ZcJUo}ZqKd$?)h&~&n?d-;!BF3!K;WT?jL+naMU;x_1k~@ z(pY$7@yd8J@mKKfp4&Lx^XoyL+c*akKUBOYKEm^;^_DUCqTu-SOw@1xjdLCGlf;+e z(>%94?)mjR&uu(Uh+iOn4`1rJ<#ErizxUkozZkd2m(^i#m_D+>bDM83=Cwt<8@^k7 z3jUM$XZT_9b@*}d)%XSRBlz#)2l1QYH}SjT*YJm)`*nzME-Z-E$Lf$|VEB0)@q~C1 zJX#QDeSb`yb4h%5;`51T#b1&<@#$lA@k-=xDPA8RBt8?Lg1dPY;=Eso7r^KJpFHa% zzAW*Z#9zj@NuGs_=U4HyT>r0oZu_)>(Zl*DJsKvUsImF%n6Z)F?KH}?&U&P-Q zcmJPYU)^W4Of6EEf7)FW%Nq1L`ne-2Fe@Ys5Pezh1l@zDIm0{i;BfJqiO+<)I=K6CZqMzHlR0mG@dE_-x9$uJL-7uU(XwRZhzcHo@U~k@m9F2!w%}uRs1)6koX1s^Z(@wvtSz)OlJ8Wem{aJ(WO7q20n8?Pgt4R0o134cetJl@W8o0r=cb@$xn z)tLC+;tlZuo?9NbFB;*w<>^fPNb!#NIL|GQ+ZW9ccl)9x;%;BG0e9>C1nc1!iFeOm zPI_+R?8kV{iub}VN&d6szb*bRp7~OcG&tONlJUSUDqf!Wx}MwpqLY1LaCG(DZXpeb zA0*xZA1OW%pDg|*K39ATzFa&5x1OAfuTD&;!`W1_Dw(#QZk1NszAA;juiEmCFhDyA<{*3nA=6i_q zju$_GPx9RU=pGldJhwboh@UHd8DA+LgN^bU@yEn(@ZA3Bt`ECBxASHq&tCEL_yN!D zkM8<#5_fg}jJn;Dc=tGc?75w{F!`hX9()oUZv7O%W8*IWWb!8$Uxeou-;7rf?}RrI zABwjZ-;ckKyT`@aSmC%wNqimR7klwG-%8BuTk-Pvcao<*d4BTZt%MDDJ?e{=ay$4@mIxX+)@S5j#-oMCSMf^{^rsQ8k z{lL(GqCB6&scf5F;Z?yNL!XppF9}fxr zvE=DVo|MD6@D7YguD7Dc{%)xViNDxZ8^_vJZTwEMe&q)HP5Z@?*3libIVhR z_(tM+@OI*L@Q&iu@c!Z*@xkJ4@G;^a;h%~R$3OSne_YJ=+~)NK@$u3pcueTIjdMNY zNg}=mPl>zyXUU&M{9nAbc(T$_!-%vJFHigs&l3irqwGY%G1K#?=f%V?7Vm(6FFp|8 zDgGsXP<#u1Ry+gae=PnIp5S^oURUSg$->W5;jV5G@)!5wt^Pk#=a1!J)~$6}o0ym(um7qSK)f@7-0H>VEYO1!)NtoPjJn|f$en#gAH z#v)^;eQ-Jt`;<@ni;&t%L;#Kf_;=S>Q;+^n#H^O;X-)%fI@aM#*;TgnN z;+e&l;046@@&U%U!FSo}47xaZb) zw{IEixz)KT@t=t|!Y6xfdECBbj^~!A3-Mozcg7caZh74GdA+#1KJOKG*XI-B?)lVJ zarf_T9*Mhuca!jDFo@uA*BSTkZZe3wpYM@Z-2Hry(ztuPS1%Lhtt;{Fe$m8ptJ?tT z(_Fj{-dgh4A%8dV$@q5hMffrC4P0mLd2aWMFKr^hk>*zTyPMaq#Ag@3jK3uQ5HBm9 z`js$m4e>m9Bk?kL8}Zli?&7WSA>w`U(c+`Rjvo{+j9;zRKz;@{!R#TVgQ#4qC8#n0jg#UJBG#P8#m#WQ>m*7=HfD*T~%8T^TO z3H-UcVLrG2X@w^he;dy%{vn=Cd?=pFbALZj#PbWm5M~fxLVOzjis$}*po-^~=R4wS zh_A%!dhYKBT8O*-Kv!|MA9!Ef?FYt-yZyi{akn4%R^06ewurm^zyaJn?smKy_V1F! zyZio4&#gY&sKZ_H&Gn&k$EcQHl z@cBUEzZV~m?-c(EKPbKhKP!F+|4aOL{2%d0c-;F@!~O65;>53ppQjYhif0ioh8Ga8 zj+YjH8?P!p1aBZd7k@`QD}5O({t7-y{9Sx1?ymp&(?_C?xe_0tK0Ca4TW`l%SG&az z<447B<0r+h<5$HKd>D@BhIkzOU-7JXj0fS@x$9mgJel~*cq;MYcy{qO@Lb}x@e<26;kCsFt60jeh{={Y$(Le%o_9uX|iS@!axECO-N< z!6(7t9;ct;NyRtdDaBXg*~HJ|FNz<+Pj;z{vp;wAA~;)U_1;*IeZ;`Q+^ z;_u@<#QWpJ#OLE9#Ao4?#E;-p#ec!)i$B2^i$B2Ei|6<_tmkI&jQD=>H}Hevweh2# zUk*~c>+l87ZC!OB{&(><_%+WhkGl>(@Z9pePy8eC!FaSsK~QkG>wvosClzO7Wu9ugmkACvsc$bVV<9$x8jcz!pY_!Yy?8;d_zA@ts!+x_L7ePM7+ z@!YPfS&5$~UL0Q`UJc(Y-WJ~{-XA|EJ_f%mJ{!Lyz6y`_Bx)M}JsOxD}5tz7bC=ehPnH{1~2B{2^XY{2pFb zJk=-R_$r7e!(SJF32z{tA8##Q3vVx84euk~0UsdV2LDFHPW&ba!@gw4M@e|@R@GIi` z@ay8+@h9Ss@aX)<-|>TTSzqqqNyW2`3hS9tJOiFhye|HtcuhQ?=l;H^gy%M%cEpz! zZ-tlh+}{_~@Z9nYCccjNK)k-^mdEXjT8q1VQBQHVFZw{-?TaRgyM57Iaknp8CGPe` z+r{0!=#aSE7hM#0`=YzJyACw09QHSU^l&3#>%r>a_CF~-w|b7IZfV6o#xvqBe-rZO z5uc5JBK{pdL;Mogt970yw4vK(Cpb=eZuE%wKgE+&iVBb17te~vj1ktsjsI0Vsdzm+ zqj+09k9dE)g!mY|qWB#Ab@A`;X5xGBj^d~AzT%JY55<#G|Ifs;;WNc+;fuw$;6IA* z!+#ThjGq@zStab(P28=|B=r4}#CM_Z&&Pbao^9QJ#rk|fd?sE{d^27|d_7)4`~+TE z{0QDa`~m)!_#M2xc#6?s9Xg38!3T&J#D|FI#m9=*z&{hOg3lIjhtCslh5sNv9N#HE z1mBCh$6a6gSUlF#K3)z=&@XFPhjO@!FUUH1Q{vrz<{i&%J$%V|+lo)cJ9=*Cb^Dp# zo?D*piSIAI68}W}G(JZBDE@_bj4|PSXNmtq{4(*p#IF>85&uEFBED0+9KP3ctAl$T zyruYPybJE`3j?wR zlM9ai5}%p#PWR%iUsLJJOz}zhQt|Kb72?bBt>VAnJH&s&4~hSU9~J)-KZU!#WM^JU zb8ve7WU$;{uiioaq%;FCGp6(sPvI);*W@LB%Xozrs8Svmbe?| zD#kg=i??~zj31tNlEf#e9{OU5ch{dap4)s&aNhOeMer@2+j-qSZLjB+rylY9#cScG z#e3uD#k=FT#K++G#6Q9R@!ZDg_EE7DJY5eq&UwVg7oUwM_T2KgeN;NnEzf%5Gm3wY zXZ76jxP8=1;_iA<4tIT@MjsnUyt{6+@Z83;oAb67-+_0K{4>elTYN1(MSLH=M7%V< z+jF~al;*ner{_ksh`%r15|5cM?3b%US3IfsCwNBjBX}{~9wt`*mC1t-!BI})BgD7# z;;o)%saqTIlXy?@$9P}yzwtr1n^!E(JHv~&aTe!sJzwG@HEi^e$YzOmkLx|2+xXLt zj|z|cES?fS=((NOJ+4oCZh49le_p%*eoOo<{GNC{Jl1nze{GtUe-Iu|yceEEd@i0r zd=8#Rd<9-Wd?{W=d^cWRd{2t!PbL)${j`i`}>X!V|;6rc> z5KoN1@44l1*RfHaTb|s+j}w0ppXj;eao4fA;_f=OO5E*>wu`&xgNMZ3>$exh-Rrk^ z#og<-u@XId-T1s_FxcQohr8>5yMAW(-0Dz*`Q{QYg6Eg~)5-s`_)qw7@w508@wz;& zS9xyN&zf964|{IZmiP93-JVrpRL;_JdOA-cy{qi_)Fq{<7LI4qYgF1 zGvkfKOW*L+U``|;wui(?gW7P_Z8XOC7w@#*~3VoHt-{89an-_2Er#9>4n0R&k z5Ak>LYvOJ3N8%&!NRnr-pTqFP;&bri;xq6p;+yas;_L9D;>Yol;)n4n;`i|y;pe@KYAW@{roJ<^Rswj{GjJ{-v928o?D&* z#Ge<>h2Ijdhu;&gg~v+zY=8UU@x;5}X~bvX8N|Q9^N26T3y9Ch%ZP8o%ZqQu>xv)8 z>x&=8TZ!Mn+lk-6J9%z>`S1Sdx%K6_3BiZp7$6=Gf8TS<^WXi^bIX&B_;KP{@QI#V zp8xKT;{V+r#s9lMivM?i6#wu3DE{C5QT)IAW3p$j^E+z?gAI;!xV!HCcYpNU>hKct z%_W{6&oBA+lK*A#C-`vjq;;Z(6PY6ZK9B2Fo+k`K8@v0X=SH6se?fd9ep7rS{zUvZ zo*?OWn)D82x9 z>m*^!uwSbrJ~Q?C&5O77QKVHxxgIH}Tx^ zxckw&;_iObPu$&)J{EWPqsh2ihpp=dCkc+l67TLKYdp8{-(sBW#s9*$Nd6Aw|4Do_ z9z9jK9^80l;;F@_vi=KuZugP)+(+K<+-M2$&BeFkox~5}{lstKBgEq~&I#f<@mb>4 z@Fn64@ZI7Y@FTdpZa>NxOfERiOMD&188h{>kFR)>!uYu2vGCO5+41z^FW|Yw%i;OO zU%_7yZ;F=_Z-iIE-Fm3cy!v|a_PA)7Eu7adiSN=V^r;f>u9I^;xAokY^UfFVi7)Zo z&g-s|YdyC-6N%p-J{CXYx#e;HAIBBXEzd6EuZyq5?|5!`-2Zp^ujiJh@aMsY;E0jt z+4YbWkL$VRao4@%o?D(_#HSV?jA!uN^0@0>PR}jRbmH@fPr?g&Zh72wudKMc?$yHG zIyv-K@I}G#j>Nm`SZB{|JnK1cH}N%iZ^?g*{KLg>;N!)kHI5oiWPx}*;#cDKxV{+t zS{H^#ws~&j{FU+S68{-LAbt-&Bz^-wC7x<>c;a*7$?z-Uh4Jg+1@H&r)$vE-mGNk4 zpRP~84)Hy=I&>sHk$4+CHSX3!BI;aF;ztr+MEqmCq~u9Wo?7Bt@pr`!;3LFa;Nx-E z_uce;mghF#DU4^X_(Xh(_g^@e}wa@gw*y@xSrC;&<^w;z_536FVyY9Dd4k z>$|JNWzVe+1&O~Ro)^D^yS~@uara!hr|Zh{R3tvBcm+Ha?()1vo;>33(#I;|{qR=e zX~;7ex4zqW>T=#;;yT=B;E0?Ct=@vIe}-6$ATa2)d7_E+QCU)}ZG zXcO`A(}#6-bvuP;5|77uOW{@ryUth35PS%ZiV}Z@{ZAV&-s;wid36x)hW8a8hYu7V zg%8Eu_%Cwa8D6}NbEgeBIOa=yl6s+UmUy?{*yFj4e*x$HS$rOT&~yJjc-nKzvzPeu z;+ye%o?9Nb--wnW?3Y`IDW`_xk0l-tkMFtVar=#wp4)gD5T91O2A*Bfb+v3^qN8;7+$n(#x&q{ba@z(fr;w|u0;sfw>;(hQ;p8Iup(Q~WAG~)A$ ze~uT$UEd#5=UNiKjQH2Zm*5R0Pi*?wQM?!VKNkN0pDVr#-;BHKzfr8oTRpcr_xw7ixO-i(xVU>=v9h>(pR1v` zd!MVVxO<gS@Ff>sfxS4pQG=M zJh%DIW;{*Bzr@>!Z^t``Z^FBaU&MQhpT&oWN1qjmI);lsA%3)Ydg8~6r^Y9FZhd!k znBlqAp%n3R#EapJ@MuA-J^x)!owrMTZQ_3vuZjO8dDfBVy!d?{*O9Dt#;5;pUF?lf zpGGo>zs-8cfxEunr0>N%xA`_@Jf*}N;T6P(;FZPu;dR7k;`PL*<4wgk;4Q@0;vK|) z!#j)rf_L}a`tIs5z;mm^UE+s`-^4%0qXn_n_qNn|w!|lx9sFf*%oC4?FO)o8$g^2| z0)A3_4*s|JH9ST(8)5i!Eo{D#IQAj_E!Cl|)QRl%Dzmxc3;@k0$B+tL( z`9eJFTI*Kux`p^l_;2EC@iVx4yeFpbH$1ob9%4Lq#1G()#UJ3&vOir{mgg28Up&nm z8+{~_NIV6eTD&NpUc3;VO}rNVqIfktpXb(hSBGMr+xXujzLa=-yaMj}zLh#Rk@zvh zHy0m;x0XD6$kR_e<@Z*#;QOA$v*9bmJL4O0*Y}L{eV^wx-|39!fcRwmnD{3Al=xcw zviNEIiuiH-j`$<|f%pSFT8?MeTgtDZ$`gqto)nMox%J)EA-U&PhhoI17B7fr#$Dfg zQ|FQrU!V9_#NWWnOP(R*X(+xO?;(B=A19u1U08>yxa)g9`o6&P3qj~RjAw~>Gkmr9 zhxj`2A^0}&dH62z+4uqRAMiusoA6WOC-8IPNASy@Ti;zBZg_qn@W;g85x4INnM8C%iZA`d*s8kMP{)o15{B6wiTA z5U+_(7O#xY67Pi16>pC(5&r~VF8(3DPJ9l&Nqh#r&GU=Fd|e&(d2V&sM*IQsjrcL# z_3JQozAf?RiN7y?7Jnpp&X6bRi{bil`Lpx9KEHSoyqfqRydLiQUY)+T^4#Wolkv0@ zzlL`ePcbj}qTuK$o(LZ#UIZU1UJxHCUKbxLUK5`z-T|L3-UgrL`Nd$|t_}-4w>pd@ zeu?-8_-fqsWdwEJFYybAKPdhUepK>|BhO#rhwucso?Q== z#8=|AJ-7P2#X5P@bIY@f_$K0;@fNtN&q&78S$qaQP`u%H!AXK+is$wc{Le9>II_<3 z=mEV){4Vj)_#yGx_&MnDr&HN24cb6UlY%TR}s&SHx#dcHx@68cMxxicNTAi4;1f>zc1b$ zA16K@pCCROpCi5)|5|)L{+;+v{Cn{q@Q1j2T*sr2J@P*7_wW%>76gK!uBYTK}1 z6D8igZ#C0%yUv{DykCi*#OHf%=XLK}t@PaTJSKjP_?tP*>o?D)x^TY8J5KoU6@!ayb_le4SZh4jyUqO5cUd40E z+I^^KZseNc=8dN<71YFn<~GRCs0a(s*_85_moFM);fJ_3;+s-SF1po$(Hy`*rB< zxz%Aj@x8@I<3n&)=N28pI!uxHdBlGq{uMr3@^m22YVq^*@u2uE{DydgcHwv);_-t| zZN3ZXd;9|7AIxpOD;ZBB@nv{w@xyp}@dJ1^@%#9T;*Z0QMxtGMZB)*?`b9}JmX+@q1;_u_%ijTwhiT{Zo z#^VQbviW{b-!FS^^X|w95?_iZ65o#}6W@!c7ykp# zBz_Uk=DA;oe4bk!;w}n41VzlWa_e;2s*Z02ExuC>fB)*9FdAy|L8A_g7;#=`|#Sh>k z#7lGtCJ`L7aqGLy>n7)&Cw?7Y`hW6lllXs$-zEMJzEAR0U_2+phjE{J;CaGeoH2Rc zo$jTuU#`wGi7y~tfXB;g;y>Vx#H&)b&f+;3=P2<*_*C5Lc{%u1l5x(J_#`|}+2O@o z{ZlOtJ_N^Z@nrZ>@q+kC@x1s|@oM-D@k;o=;;rx)g`YkyY@E&UWa5MHRO0>c^tkI+ zO6pM4i?_NR3QrelDDlhQx9UV9oh9BqU+e3+jdL>R9Vk8lAL_ZC*F9ew?YZSyN&I;6 zCHPG7llWKSzv9V?Jgujl*F9f*-g7%|^(DcF;CMki0iMHi%j2G}74qEje7e-K1p6ZK zj(920EsuM?R?&0IQ)!uH3I5%@czL{*=a$DkUu)#K zm*73czrhEI|AY?}-;IwHzk-hyzl={7kNIsl-|6BJe3s{a9Ts?Qb;wTq67fv+?*b67avoZ_|cLb&Vu+w{Gh=QiKh8Bay= zT6it-p7`tH@8ONbC*sY-$K&n9m*XA9m*73c_uzfScjALQx4yeNjPTs*aFzIx;+OCV zxT}9d>bzLu6D$w=@~wD0{5#3hoIHEP=ix%J)Ep_S)Whi{2* zC%zc(io3q&q0XO3{7&M>h;PR~lRQPp^NskM_;&HO_*wC#_!ZpseE@y`$MXxpd=D|6 zf5i{raZ7~j%JNwJ13aPlEj*=oniatp1xH%(6nGZ#qIeGRLU;l3T6kgcYIq6Ht?#Z5 zm-$Lhe#CQ|?{vmp7?S6U-3tHjFPh6@Py(iS4Nd5l0-Zy zp3-ycyQ@P+&#ewch|eNk5YL0Vz89d*uStAW;;V>P#%oHR669$qUU@_~ufgIC@M+>h z$uk#seVn%W{$KGcc-&IqI&^&>Nu6K7-SsdQ|2{6ecnmzZw5$0{6yln5Fh>JaGkjIunCWYyFAUvlR-HA)KO_DZeqB5f-(PfFJU$-%m1o!Oi+F7D zYt3I{?z!dJLHup;&3N=O&-U>;9$Wk(UfFXy zuX}y+b--p&g`~rF2^W5^d*C+dVZh1zKXOQ?X ze3<8!$Gtu|#&gT_CGnq%PsKm?-14~BC%+bVuTQQPcdt+G6nC#r9u{}6PhQ5|`drZ? zIC*e9l6d#JVw|$i_I(xOPays+o&mc#R#1He_ z`tIs5%5$qjX7}%9#53Sia995u)OoqY7bE^V@gn#-$@4dP_KTq34zLWS7ych2J-itbqmH1o4elV`E`OnkTaQv8y5q-QXR z;D}Z}To2ZFo7WT08%z8Vo(OmIa(S{!d_ul&Jg0bEJg?+QzpL+u|6Al zZsX5N{-)xY@HXO=@DAdy;@!pD;Jw9L;zPuT;lsrToCJ}tHV0t z=ZLSy7vZjt?Wps1iQiBBkK*p%h5aOXx{~L-_(42UAzW|nyyx*W;=i&#FXFj9-^|GK z%?6&^Quv+tcf`|jo$MoC3Lh=L8J{g)iO11u@jLh-@#kF~aC>~&{&P)=;6rfyCGiD$ z{*w5$us-hbc#8Tb7e9t)5r2s15Wj~P6;Hvx<0vVf1g|2VAFm;v8*eOL1#c$)8r}+b z>!B!hnC!({-BLzXEqMK0;+szo|6;AgyXSG+Jh#V1OU}DX{B3-n=XPHAJno3+mdE}3 zxD(=i$a6(}K7L*N3%pdtXZ!Ac&QBTfGsM>vFT}sQsVklppX0gB%RNtA;(63{gZSm* zUGdePTORj3ajWN+r`)FCLvZX6Pmb^L-14~RiNASndD3nUJ_N@x@syjRIC9!^%j2FW z{^_~pDMA;;qHM$2*H3#=D9Ciud;1ufq_}tqu=}A1;0iAC0^Ex1-MU zBtG?)AX#uM5KoCOl{{U^vrT*zepY-N{#ZQGm*IKiRIw30{kb#+=zD6z{`ujk5?D}60a>j6|d*H_1)E>spnRQO~khlUyFCZ zqXn_H?@CRbhe`Y?;zx*|z(+}*%;cFR-WuO1-UB}-z7fBGyT^MO`hLfAo9|V|^FaJ} zJX+OgK~%VYEI#qps81uY#N*+K#0%ib#Pi_k#cSZ1#H-*hinqq|inqWEd2W4obtvVz z&Fe$r%ZLxbE8|v;=)td@)VaCD&n3Q<_#C{wzYDk0+iR zPvp7v-PIwr=T?Weh)*xx0MCZIzW+*{Uy=AO#FrEAj8~F8=gHGdJi*Mc4!y)P;uFMs z;9ugd?@j3Y63=bEgBZ_p@qYL^@mcsL@oD%j@gMNL;+yb8;%D%q;wSKP;{V{6#P8x) zJh#5PI^6Nx>X3a~@F6%Jh`)eGs~)a5*Y|-lqrxNUaI3TBd71c(;wAB{l4mq|N{g?; z-w@x0cNNb#D>!*@^v7M_-=*&(J-2aIWjtfWE8>&IJL1#D+u(D>$KdnDKf#xaFT}qS z{~F&Uz61Y3d^5hwbL+dS!vW7P1b&P7L*iHQQ@HDUR_c6T;*)L<;)(G{4a@NK z=SCJLPg-&J^YjXe&z&9h<&j$Ai^=mQ?)u)3zPIz-=9`J}bQDjI_Y^OW_Z2UV4;62T ze<=PoK304%{+W1xe7g8#e5UvWe6HsggZa8TEb-jxu$uVg;w$iVxOFXh@M}4BJ}B{f zi9aH~8$T&|){*Cycm&!BM?603p(yV9{&B3RFOO98+~#|X@l+N6 z4S!wyG2TG@KHf|`(~j`G?}(?zJBpXayNH*-`-(Tj2a4ClhkAZ77>cXINYAYf?-M^( zd>}p39=d@EjB{7?K<@k@AB@i;rfan=-%fj1D(g1;r60dMBH_1)E>o#$4E z8pL-LuY&i)UEgEQ3CB4`;#(5`sd#hzbIId=-sUp#(&XPMUK_tCegu!y4%dVA-R9Mq z^TrhKh(CwBdAU4UBz^$#ImG+mc_hzy##2h%{l5SWJh%5L=EVr>*~jzf0VU=2iqYal z@!8@R@YUiIc;2*EybJs4v*Hu*d*WqghT}|7=h?^I?|5eMrnAEMeB#@AoR$~=k-9Y( zpZisqr<3?aygzPzza0EJ&;Nt-p~Sz+`vqTn@wQGrqmK*4$Kv0MFUB{D&&T(P@5B#? z|A3znKZ{=wKZ)NK{|CP>ejkrr_u0oq%3WbS>E1WF>A8(F$B)5>;J7Qkn>-Iaw><8BlUQ#&t%K$HXSZdEMB<5W#}j#O zdEEOZX+5_*_xD(qNaT6(+jthwEsuNOB%kM&C(&NZ68zi@@%VT#&n=I8-=vbbd*7si zxO?BEjktT?q?fpR-{eDa_rA#_areH-JaPBF$#>%JeUlyH?tPQr#B)YfCla|N?%p@K zCtjHN*!9A`yT{{|?craf$K7?$y`Pf9bL&fX>YrQu1-yXdzd`=e;%=YWOuQrenLgsH zH-+OIirYACAJRTf=%0FS<1Ed1CW)88XNWh$=ZH7L7l{wXmx=es*NRWaH;7NhcZjdT zcZ+|E|LnP6ha;X_9S#zILVQ1d5qIlhGj)C<@qZE@z5cV;jo2|PlEAIMLdvjQa;}SoQ_|xKF;TI&&o8 zKLvvdj^5%)@FC*)@ZsXQ@Q*#WzPmbn>bcdS2Jw@`tKc(mSN~I+gD(n>H4@*F`1Rt= z@hy_)^5&>Ck>A7Jazm;6re97mtY#z^&T$ ze6SPspD6JeiJu~#0sm6+^dip+argf3e)0LlUlqTN-@{$s=hFAsZ$`7}Ma*r!c^OZ9 z@fY#r;QKXTtHU

xgg0 z8{t+x>-!_>+)d(-5#LMv2;N`v#M&Ap3yyK(x$wo}CGa1`hvL8DuJ0@9`+3i8zP~e` z%i^{1fq=_&jmzS(n| z?^ldxoA^w8pZHGvfcO^tnD}MMR3KaW2UPm4eH-1_e75c{oX z*I^mr3#^unOEYQ5?@ffFII z*7tWA&n)q__yX}U_!9At@YUi=@O9z~@NMFI@Ll3N@dM%)@I&Hf@ME4^-(4Nfdv0}z z@k`jp%i{l%=LYWj{wZ~i*ZA4>lZHIciKoJo;VzH+euJFihk2g3cuVZN51e&mi$E_z3Z;_(<^z_yqCx_+;_c_$=`c@wwtd@g?Fj@a5vu@YSAM z-(4Lxdv0~uO#C+S@9}-O>sMv!d`aR@6aT09N&LFxsY9NaO~Uo#@=wHH5dR8)Mf@+k zGVc0*gT6QP{6a9_YmBF{_#b#H@g%##S7v+#q;8W#B1V1#jD~Y#oORx z#arSNJh#5PI(*@|)nNqjv&7%W7vQch_o?#_5(aM$;L>3b2+ZN4iRPYLm5cscRIct!C8crEe!`0L`g@y6mQ4n~zB(o8%F z-cCFp-cdXk-qrJqL26frex6$$Y7jq2yb3-7cYXhwI?t5&mc)N0-W;DVd6to9qxeDm zxcGVezW5OC3(4LN*Ms%l=GB?=rV{UnXY%}F5T7V+@F6%}^xX0cAwI8oAG{Fm>NAq@ zlo6kS*A&0Z`%UdVPZ*r{L<}2E@bjNMj~;yfFY%Mb)A4%dT=4?%1QNH80-Q&wSR&P4T&Sq#3EzcC< z`-^{uefeG7!SS>BKjb;+x#e;1=brZ5@+>0HdGSg3 z@19#8_kQke&n-`y!@-B(xG$apf8@F4aqs8GY5BCjmZt#m3B>c@Nj$ea?)}`%;_m(2 zg5vJ|+*ifj`?+;+x32m&3r-RottH;Q@7cw38-FFn*+aZM-dFMuBL9ct)9^3F7vZbL zJHH(@%*Ynp?W698968{*jk77^IV9c)KP5f{KPTP~zal;pzb-x*O~)p7<|#BF}A~aGyG)_T1`lpZN6RH}Pz^TMxad^D7dc>PUEka^flQN|I*?c^Zpv z#rudKz$b{Obn66neUBZME3(9Mn{PJ8vs^qgzD~R{zDc|szDv9sW;8a!I7uz;@b8L4wR+}-z25}#50IG$DVPfC&O1*0GCt-1xr+$8%ds|&7;ypI*WJ2M~e5y z=ZOEser%KH_WbC3+uQ}mS+gXF>&A9d5 z_76ETng&0oR^pqp|Lo(%TVH;pZUe-(<0Hk-<7357<6nwD#%GKFjejei`dHNPBdf%d znd z<*7-0F7e8E3Gr@tY4MJDb@AzVZSha>`Qj(=#p3(%{hr&r+`j#|=Qgjp$Ab^Saaue% ze!+9g>J-0k#Pgs`V=RJtG#~*lZdECA|M!Tndv^*6~T9!y8j(Ax-q34#z?b}m( zZh77!KD~GYJhSJP$L-q-io1RLtKx3oUPs*R+nbBK@5AgO?!FImh`9Sc%yHuG`!HvU zyYItXChooub2IK9cP~y2CLJ7yB;GxbIODn1vnzGGAl?c8L-OY%|2^@6c$W5I-`#k| z;4h2+Jt=CKk&3uI-t9iMI%eqgJ-2aw#CRHs564@IFT~r3&&A&p--CA---!8~E3fXDWHt zi*Lk_itoekil?3&uET$E*Y`v8J@LEYIx)BTMox$E$;BTr&J5yNiO($lJf2Iu0-jI2 zEM82!IbKS<30^_GFJ4)^CtkyI>$|H%eb229lZkI6J^^ouyS`_p&iy5R3Gsu)7vaMt zPcHII5wD4VC*B1AMSLE90(X7CK;N%=Zu4Ewcy5TV!T%ONjXxGYipTEw?7I3Fk1zfY zo?JY`nP5=Cky<>>nJA8A7B7lt6EB44^4$9F>hO~1R)>1T7Zb08m&IM*ms009CB6gk zO~l*bEhNtx^7Ilvfqy1`1z##&V@f3ISc|*9-=^pN1b3pNO9q zUx!~7Uya`oKZ4&8KZrjTzllfd^z3@OhR62Y`tItG*mJ8xlC!~w;7Bf>5YK?SzSpPD zMI}B5@g>Ex;bkPx+vIsed??;Ud^|o%{5U=dcYTi@E1cIn&uzX%7|#Ong7^yYhWKjn zy7*@C-uO20?)X0O3HSl=ariOu#rP@l`S^Lyt?#Z5S3S2n>?Qt&_%8f!JeuVQeqEr> z$vT@n{XXP##HSKJgQv$`o@?YOD4vvkdrk4oczf}g_;B3%Zu9z!^L`|L6(9e9^30R? zN5n4>e}FHQJPR4mR`J-=tXe_6y*$>Jq^!>qp4<47pR?#l(ZSZO01$e%>+H-rIa3bGFany4g|7*lw5^ss$7Vm*a zx`h35by$d}6#p5|E&ew9=d$A0@Ot6{xc{~n&%)z+kodRwXX2B1ynHLZ0pBQIae7ds z;Mj}X`n1g-@L=H=Q7hVs1;Kkdz`iZ_j65ov{==$vX{}WFvehJSg9{YUMa3Wd6 zqvM6dGvh_YGvKd@m%^)v7sDHh*TWl&*TFl8cg8!5cfbdVe}um;J{+HoyY<-}*5BEjH=p>I_)DJKdEM)@uXt{GHWFV>d<|Yl z{4!oo{3L$Eb33nl9rYj2?Yy1rLvZ{no)VAQ{ptF&JnnVW#GYGd-b!`&&w>i{;KCj?se2U;_h|S=Hl*k z)Gp%gb<`o^?se30;_h|Snd0ts)MeuCb=1w`?se4t;_h|SGvYaeIoojqcaN8Y3qz09 zL;9XL^dz2JonND#Da2pJ)8a1wQS#>y&%Y?lUsk*V{-*dt9;YpFdz{)*`#M&br<>bu=pT+n0Oz2l=zqUIPt0Y|2n(#IGgJK58#KfN2bV9g_*>+&;O~0e%)5@mOplv!*o^x3 z$=AUbz;*oBW1Kfp{m-c1Mt%hT73E|g=LmV;C35~;BQFlWf0+M#7!NNC*LnXM^IpT_ zroU&=o=EbO@F&O%pOyABAYW8kmDb>82>IM1f~1*o4(o(BJja%LcB9r>!IGR_CcH^cuRzlEHeaGm#q znD>Ii2H6u=Nx$-_!aWW@ayCg;LfZ5^Xe0L9`fbzBIJAE_mS^} zKS+K8UYvj%=LxvZdsmEe2db}(oEOL|!MjpUFXSYU zpTzu5BL5S)2pvTQP>_vSM@*VIJaGm#}80Xql|2yjIk)MS} zQBEo3v?cEb??)aFA5Xpxo(k7_&y~;ZoO?_2xasdrv}Y0dRro6Mvgfm7oYmwd;2X&s z!?%)0!FQ5(f$t&j1V2cg2tQ0d5PsC-X5MuiPI=sn!&KCtBcBAn0@r!phH)-D^6q(M za+aVzoP06-0l4PuLQZw^^YCZL|AO}7xSK>jCu5%~ibr2R|D?}M)szl8h*d?n=!Mb4MxTj0mY_rh_Xit0cK8q!%zsC!^lpZEN$>X_QdL8w1 z$XCKYCEo}CihSJ1l7EE!G5lV3k-RfJ>^1-MZY?~V{1>dxA0~en*X4%fW$=7&Pre-9 zi~I@9`v~$j80Q&qGw-L}uUwU+zw@a+8K2i=ch*zA@W)9y5wu&4air+li>P1dK2UPA5XoR zm)#G`ykt^+)@1SPRIi_lSANd~mJsvm5)z8IBc-+kU@2G!}{3QGl^5R+2 z&YI+f;LAO3j;o)Gt@XG$?nKmoLEa3$)#D~dKNtJP<0j|kCHI?q`*r!Gk?ZGT{mAumv61BZx!6Rwp8s#pa694N=2N}?J%Z&PH{*5z zf}$s>yh_@KTaNm2gt|3o0E@#xAM4m96EU1jKe(Czd$}4-W{&z z!#Rxet5m-Q^`pr*!BZ&b3Ub~fub(RO@;P~P`1jo^ql zxEY5Us1GNv0xu2MdGC*Lu1oc8P~U*O6}%DUBq1k;e9|=O*8uXl@VCi-Le6x!&U-xO zeWAxqf4iVPOUOIH)5%A}*N_i`Zy}!x|B`$*d^h~Yi1;%Lt}@^JVh^2gy*$?L%9kiQ6@N8S;>gggnpf;<7fhI|ry z1NqzVEgm=XuH&%7<7ON_LH%y>rSJoAov%h1=krwmE$Xw#_rR}GPIKfG8ZYOM=D&&g ztx7%v{uKEs2ic6XgP@;j&>NL~i@iR2~W!#!^1UB@BW<7ONhqJA8C1NbDk&ij0f^T$*l zgZkyDNOz?oINF@Z4|vUr#ir zBwTaqpuP-wO?U+5+=KSiCU1@3TibZtyzl)w-uE8har3@+chrw0pAMfwz8=1aycd2C z`hvVT)>q$=N5PMi&xB`@Z^r9(-iiM4jK}lzLGl~$+T<57zpdeB{LTAd|1RTxb8nrf zKDC4RFi*Xi_m-HKk>t(cZ)hE0to=o-nKK45vH}mD(lKLs+ zf1^D!JZ{bheII+i$4yRInP75NzWI`a478_8$Fw|U$<4!b;_eg2?+FZmkyk8pEVnCr#880U*re+2cH$q&J= zQ%*_b6rSWi-!{S@A^!&6f;<-99H>v1#hIu46HZpPs?)Gs3+3I7bP^WL9?@YfDads$Yfr zp5)8muTaiCXwN9}aQK@RUqHS9*R`!4uk7|K#{Dw4V;(op$8D&;LVgUMcZ!UszHTR$ z&yI2Kf$RLH;rd&{Q*XxeEA%&#JOdt0ehmH;`A_hca6Rr~95>EWZ?042OsC!3AgWId zX0tP%>h-!m)#Il9H*nmUmg6oXzZZVa<0eP1`&T_~a$ZFJ zP4as1J03SVdfhLWdUqbP+jB?SQ-u5$a_;xI$x?Lg8Vf60(pUdr2Uu3^T2PBSA^doF9#2sdiT6C*9#qo z!X7u{5I}u6d1H8KxSkJ#G5&R_z7y&jkavVPqMVV)i6O6!c^p9g1pICC$?)lLo%edC z!R~FL$4!5GqCHEVUd;xqn`F8ks2Wjf zIu2(&ZpPs+)L$UK4!;IB!)UHY(=pCPr^&$Tb@@I=wu_M$g_nYB&RpcwB>w{5ntT_$ zH+f;aZY02U-kX^NxwmAGoBmcnd&ZHMflnfD2A@ja7(R!*2Yep+%kU-Suftc6kAklu zp9SAQJ{`Wr<7VD<9CmozjKezA?2RHw4=~Q(Q2jL2e@mVU-%mM< zkaL>+H+Y^IGJbm8OYjKtNL()(!S(fL9*!GLJ{R87lGBaqSE4?ad?~y)K zV=bOWzBlMjj(gkear3_Jf!x{b9P_xL(?Rho%W@8JE+TjZDE#om*4={T<{AUPGt_4kWM$@RGH z$z#h)&dcOgN{A06AC*^pB>BRJ#ix_cM9zHjwHSvL)y7LZ!;ZtZ%4_mVVrM} zN8cm$#b)`(ZT@}Ywa72tFJ71YY94W%*4eM(rir=WHRZ?u+z#_Z0_)}O?Hmg zmsskLqu*v&esjM*t0IM$EFPC%er`r1%>9a#O=mfrb2eDK2F^Rpi9*g`IOn`>@hg;* zfb%Mo|{dS8#ey`+fj*d@rJpc5#Nsmcg z2aE53YmUzQKbHC;#r*x!{1mw6=<{NerT!a>H!1G#mp(6s!?|B?TKp#E1SHm}hwA|6 zG`09zxb{oO`Ie==0A8m#e>7ZkUd8Ln4!nMA{h24Fp$9GAvYdZ>biPvHoHNDZVYpst zdvv~-gxr|6-{>XYnAe|C*!o-XdE1tL-^4R<=7@d^=ooQj1D` zmuCK)%<;0_$KpT3HAkPP4?Gn*=OK&Ff@_XGPk*q~|JULjo6B)ECm@?nI$Xy``?b~L zbzAsz^mVL38*$Ak_nvGwxA+FQwnzJQ$x{E1#fLoWZ%;rromFsd&nAo4ZtKs{^RO?R zbB0^|oF%7nJIUvqx)xsy*YVNzjO*afx%i=MPqTQwj{bh>d_}{xJ(|I*&}x}9U-+Ap2=^_KdtE&fC&f4>5<=?sB$zfvrIm2w_z?U&|! zXUS<64Bej5aL$=%@jH~Guk%r{lFvC$TYMutm%Ce-`=#Ujr=>o3kI?zU;GC0gso!ex zx;-UdbF^RaaPF6W{=<7?k#c*!V=N7NJ$e*Lv!FV|5 zjI{Vg$_b!7^%hG$=QOqWTDbPB8glgCSLU1oABWB#4c8oPPwwUZ`U*8oh5PSXS$rH^ zbM*Y#VyWL_@#qzDT+Pw*c?7(W`{Uo-FKy3T7XOQKbe&m$rQ~yt{(IHz>rt=m(VQ9S z{+x#&lkr(#@e-@0UURfxUEz9M&FO3Lqm+~L{Q4|(dm=5q2(InX^XHhQ{=CJ5pZoje z;QiUfaPHUV7O%R-pQGnb5Y9ORE&dzj1SHleyH*}>PIZgVgX{R{`=UQ?@aMerq|Ezq zi@*4V)N78;`&78LM|0*|yx2y6j?PzGIOlY=`1h2fuZJ}@h3;2Fi+=*we(8Mm$?)f_ zYa!z^)Z%C1nxo@W>&wtNjV+!I*BpIbT(#7P{X2C2aJc3Kq|DiDso!n!w%cV#&C%D5 zx8OQoIzBTkUg|4y zkB^RX??e9M9*8wZa{pf)i=TmOd-Qox^Kj^#Mi&1Rt~uJTl1D=4JZ$mz;W;_7yEFBu zKj(NK>DPRV7ds~Pnxp+{3)k~UbGlmmd&<%0d%^#PZcj;zzXR9y1Y~#TYfJqBi$9Yo zJ8F)Oa|)dMHO1m#zxs1@UDpK8Ic+Sy&5~30H_7Lm>K2~|&*knG=6>n(wAX2WPS_Ar z;W&vFKLytuo%cu2gwA=w;;Z1Aqw{{*QtzA%ou35P9G$N(EcH7r-sqg<=j6zylLXgs z*7146;@2oAfSg9Z%LC47W$`U=ZI50ra-R>K6K?TwaLv(m_*P5(UW*4V$Z<7CuNNcX z+^-21|C@63b+y68(EVy|@eOcok6tf|T=C~@Nj4RZQ^w*`;F_c7^Fd4fZx(;yPdTpU z==uBsT*pV-v&`bCm-&$U)XfQ@xavQR&?(x;jyVVmzpNnu0q}e+ttYjV7nIi zYHZgf#~+V%8ju$^4Rdb|9)$n2DyWpe9e}yNLUxdF&eiJ^CJReSqDda`r zGstVg-zRSXpGO{mFC=dTUrPQWd=+_j_-Ew(;OohU!@q=^?t`*L&pY)5)M-D|b*C8Y ztR9YSJ-&Jbd@R*RV*3<%2DX=(#i)DJ9R2skTjcfoK5TEo6JF~x@=M*E0{;30bE3Mp ziucI9)(7sD?Jh<9o_e2b@4&s*rHT@nQE!*+6{hr!XwjaR)Am_Nv#GB%Rm*XwO>tLMJBU_5MsW12H8MvT5 ziE&a7ZzFZL$em}!-@$ljeIzb`WicM=!5FD;fbmezdQQAOxzj}?Q{m5(r@{M?r^8doGvE<2Oxdpicqz{oOnZ*& zIt-9G(kL0OG;`;k6xn7qKlPBYPe4bpd*T0~Dnx9rmj+aHQ z*YV0&KjhToIu}K**Y`B?)H0I4hFq`vXUJ2_O8wvDY4EZbS8Y!wJd!*M-kDt274hV{ zu2@5!fa~Hea$Ro}$2v;elZN$!zOSaP>yXA&uj`N)a$SdvCXb1b@ku2Q!Z(v=!uOG9 z!OxRNR+M~w|5e+s>zr5cd%wDa^~q>*UH>d4PeuJ^a$OhgBiD7&336Q*mBRB$+Y?z? z+FyfQ*GmC%T`xUPuIr^=$P=nceipf|qmrcu>#G`t<>y)-f%R2@T-RNh z-tS!pR_)R`e&&=4IU)d^K1bGbBJvYd8-B`J(fBZ91A4RU~$#LWfSU=4s*LCJsRXcQI(Y)QgLU#;a$P^~BhNzp336Rm2aEgr z8_`a-lgM?wokboKBlUUkd#?6N*Woebna@dmFLGUvQ z0r)TEG4L$%AUsbAX_xjl0bYtc8D4`t6&@f@gFjE64u6F_13rU1{CR2LF7o6~;y1|k z-wUmQ_YHFT)kW%qQ-e4zd1g=X8d%5aaRYtCJCkSi6MvU|p!jC; z)I{;~o6oN*rW1OnIGJb^M!A?xxu%yTEy6YBvT|2 z?#5%fM_iAgeY*D@*n5zBtnA0JJ$fX@4fFljjf(F(B+0ZP`zTVE7(1{}oNKaM;=n%t zTNB@RV4N1X?be5eyJv6r95i4++(5UV0|zC=)q18~rUWitoOw3=U~0y!cNGP{_W zcAR}b$+r1*!yE_8X$MairwSau21>E{{-ZpMq60<<=X|f>J~ElSf8eB)WXfI6_lTTF z@}Q1;XWCVlT=yS9$2Ya+ruP9n=Ka&Jnku*da+mX6`>*?(mr?VW+n;<*c1go}S_2~V z0q)zo_o2o={V_=lHv{C}%=qVbzx4Pz{(W3C$G_kG^4=oKOAEDLf2P}dpXB}^)c)15 zKYq>D-Lu!hB=H)!p@iPaOI zbbsAG<~|6u|AtK2|CTwh=f?XVaeo%7{o#Y7<1S;QjNuKhNG@4}klbg=K%!X75eo^!{G# z@2xS9mtuc*-A~;-&CKfpo#r|F>-=lIwqLg!;P|}%VH`h*>%P|W{&n2_L(O03kurZb zF#cx#%+f;l*ZDL3;bWU)MO71Biub3?$6{yQFW%pr9{k$tl9Tzg?0<(|7|rp#H}h*= QsQqImNc#h{18qA02SdFBs{jB1 From 339d57cfbfbd9906c4799d68aaa27ddfa9cb6e68 Mon Sep 17 00:00:00 2001 From: daviesje Date: Fri, 9 May 2025 18:55:08 +0200 Subject: [PATCH 114/145] compiles --- pyproject.toml | 37 ++- src/py21cmfast/meson.build | 2 +- src/py21cmfast/src/HaloField.cu | 9 +- src/py21cmfast/src/HaloField.cuh | 4 +- src/py21cmfast/src/InputParameters.c | 1 - src/py21cmfast/src/InputParameters.h | 188 +++++++++++- src/py21cmfast/src/IonisationBox.c | 2 +- src/py21cmfast/src/IonisationBox.h | 1 + src/py21cmfast/src/IonisationBox_gpu.h | 49 ++-- src/py21cmfast/src/MapMass_cpu.c | 281 +++++++++++------- src/py21cmfast/src/OutputStructs.h | 93 +++++- src/py21cmfast/src/PerturbField.c | 2 +- src/py21cmfast/src/SpinTemperatureBox.c | 2 +- src/py21cmfast/src/SpinTemperatureBox.h | 1 + src/py21cmfast/src/Stochasticity.c | 43 ++- src/py21cmfast/src/_inputparams_wrapper.h | 186 ------------ src/py21cmfast/src/_outputstructs_wrapper.h | 96 ------ src/py21cmfast/src/_wrapper.cpp | 305 ++++++++++---------- src/py21cmfast/src/filtering.h | 1 + src/py21cmfast/src/meson.build | 13 +- 20 files changed, 698 insertions(+), 618 deletions(-) delete mode 100644 src/py21cmfast/src/_inputparams_wrapper.h delete mode 100644 src/py21cmfast/src/_outputstructs_wrapper.h diff --git a/pyproject.toml b/pyproject.toml index 727d898bf..5d6aa0bfb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,18 +31,19 @@ classifiers=[ ] keywords=["Epoch of Reionization", "Cosmology"] dependencies=[ - "click", - "numpy<2", - "pyyaml", - "scipy", - "astropy>=2.0", - "h5py>=2.8.0", - "cached_property", - "matplotlib", - "bidict", - "cosmotile>=0.2.0", - "attrs", - "ninja", + "click", + "numpy>=2.0", + "pyyaml", + "cffi>=1.0", + "scipy", + "astropy>=2.0", + "h5py>=2.8.0", + "matplotlib", + "bidict", + "cosmotile>=0.2.0", + "attrs", + "tqdm", + "ninja", ] # [tool.setuptools.packages.find] @@ -50,6 +51,9 @@ dependencies=[ [project.optional-dependencies] tests = [ + "clang-format", + "clang-tidy", + "hmf", "pre-commit", "pytest>=5.0", "pytest-cov", @@ -57,7 +61,10 @@ tests = [ "pytest-remotedata>=0.3.2", "powerbox", "pytest-plt", + "pytest-benchmark", "questionary", + "pytest-xdist", + "pytest-mock", ] docs = [ "nbsphinx", @@ -68,6 +75,9 @@ docs = [ # When the min python version supports PEP 735, this can be simplified # as dev = test_req + doc_req again (as it was implemented in setup.py) dev = [ + "clang-format", + "clang-tidy", + "hmf", "pre-commit", "pytest>=5.0", "pytest-cov", @@ -75,7 +85,10 @@ dev = [ "pytest-remotedata>=0.3.2", "powerbox", "pytest-plt", + "pytest-benchmark", "questionary", + "pytest-xdist", + "pytest-mock", "nbsphinx", "numpydoc", "sphinx>=1.3", diff --git a/src/py21cmfast/meson.build b/src/py21cmfast/meson.build index 7ae5cad23..961020972 100644 --- a/src/py21cmfast/meson.build +++ b/src/py21cmfast/meson.build @@ -2,10 +2,10 @@ source_files = [ '__init__.py', '_cfg.py', '_logging.py', - 'cache_tools.py', 'cli.py', 'lightcones.py', 'plotting.py', + 'run_templates.py', 'utils.py', 'yaml.py', ] diff --git a/src/py21cmfast/src/HaloField.cu b/src/py21cmfast/src/HaloField.cu index 80cdd28d8..6be600b7e 100644 --- a/src/py21cmfast/src/HaloField.cu +++ b/src/py21cmfast/src/HaloField.cu @@ -7,17 +7,14 @@ #include "HaloField.cuh" // define relevant variables stored in constant memory -__constant__ UserParams d_user_params; +__constant__ SimulationOptions d_user_params; __constant__ CosmoParams d_cosmo_params; __constant__ AstroParams d_astro_params; -__constant__ double d_test_params; -void updateGlobalParams(UserParams *h_user_params, CosmoParams *h_cosmo_params, AstroParams *h_astro_params){ - cudaMemcpyToSymbol(d_user_params, h_user_params, sizeof(UserParams), 0, cudaMemcpyHostToDevice); +void updateGlobalParams(SimulationOptions *h_user_params, CosmoParams *h_cosmo_params, AstroParams *h_astro_params){ + cudaMemcpyToSymbol(d_simulation_options, h_simulation_options, sizeof(SimulationOptions), 0, cudaMemcpyHostToDevice); cudaMemcpyToSymbol(d_cosmo_params, h_cosmo_params, sizeof(CosmoParams), 0, cudaMemcpyHostToDevice); cudaMemcpyToSymbol(d_astro_params, h_astro_params, sizeof(AstroParams), 0, cudaMemcpyHostToDevice); - double test_data = 5.5; - cudaMemcpyToSymbol(d_test_params, &test_data, sizeof(double), 0, cudaMemcpyHostToDevice); } #endif diff --git a/src/py21cmfast/src/HaloField.cuh b/src/py21cmfast/src/HaloField.cuh index 484c24620..128f340e4 100644 --- a/src/py21cmfast/src/HaloField.cuh +++ b/src/py21cmfast/src/HaloField.cuh @@ -7,9 +7,9 @@ extern "C" { #endif - void updateGlobalParams(UserParams *h_user_params, CosmoParams *h_cosmo_params, AstroParams *h_astro_params); + void updateGlobalParams(SimulationOptions *h_simulation_options, CosmoParams *h_cosmo_params, AstroParams *h_astro_params); #ifdef __cplusplus } #endif -#endif \ No newline at end of file +#endif diff --git a/src/py21cmfast/src/InputParameters.c b/src/py21cmfast/src/InputParameters.c index d402bbdc6..37417f44c 100644 --- a/src/py21cmfast/src/InputParameters.c +++ b/src/py21cmfast/src/InputParameters.c @@ -27,7 +27,6 @@ CosmoParams *cosmo_params_global; AstroParams *astro_params_global; AstroOptions *astro_options_global; -// TODO: check if we need these for ConfigParams void set_external_table_path(ConfigSettings *params, const char *value) { if (params->external_table_path != 0) { free(params->external_table_path); diff --git a/src/py21cmfast/src/InputParameters.h b/src/py21cmfast/src/InputParameters.h index 0a32f3ef5..6c9502c6e 100644 --- a/src/py21cmfast/src/InputParameters.h +++ b/src/py21cmfast/src/InputParameters.h @@ -2,19 +2,203 @@ #define _PARAMSTRUCTURES_H #include -// Since it is unguarded, make sure to ONLY include this file from here -#include "_inputparams_wrapper.h" #ifdef __cplusplus extern "C" { #endif +typedef struct CosmoParams { + float SIGMA_8; + float hlittle; + float OMm; + float OMl; + float OMb; + float POWER_INDEX; + + float OMn; + float OMk; + float OMr; + float OMtot; + float Y_He; + float wl; + +} CosmoParams; + +typedef struct SimulationOptions { + // Parameters taken from INIT_PARAMS.H + int HII_DIM; + int DIM; + float BOX_LEN; + float NON_CUBIC_FACTOR; + int N_THREADS; + double Z_HEAT_MAX; + double ZPRIME_STEP_FACTOR; + + // Halo Sampler Options + float SAMPLER_MIN_MASS; + double SAMPLER_BUFFER_FACTOR; + int N_COND_INTERP; + int N_PROB_INTERP; + double MIN_LOGPROB; + double HALOMASS_CORRECTION; + double PARKINSON_G0; + double PARKINSON_y1; + double PARKINSON_y2; + + float INITIAL_REDSHIFT; + double DELTA_R_FACTOR; + double DENSITY_SMOOTH_RADIUS; + + double DEXM_OPTIMIZE_MINMASS; + double DEXM_R_OVERLAP; + + double CORR_STAR; + double CORR_SFR; + double CORR_LX; +} SimulationOptions; + +typedef struct MatterOptions { + bool USE_FFTW_WISDOM; + int HMF; + int USE_RELATIVE_VELOCITIES; + int POWER_SPECTRUM; + int USE_INTERPOLATION_TABLES; + bool NO_RNG; + bool PERTURB_ON_HIGH_RES; + int PERTURB_ALGORITHM; + bool MINIMIZE_MEMORY; + bool KEEP_3D_VELOCITIES; + bool DEXM_OPTIMIZE; + int FILTER; + int HALO_FILTER; + bool SMOOTH_EVOLVED_DENSITY_FIELD; + + bool USE_HALO_FIELD; + bool HALO_STOCHASTICITY; + bool FIXED_HALO_GRIDS; + int SAMPLE_METHOD; +} MatterOptions; + +typedef struct AstroParams { + float HII_EFF_FACTOR; + + // SHMR + float F_STAR10; + float ALPHA_STAR; + float ALPHA_STAR_MINI; + float SIGMA_STAR; + double UPPER_STELLAR_TURNOVER_MASS; + double UPPER_STELLAR_TURNOVER_INDEX; + float F_STAR7_MINI; + + // SFMS + float t_STAR; + double SIGMA_SFR_INDEX; + double SIGMA_SFR_LIM; + + // L_X/SFR + double L_X; + double L_X_MINI; + double SIGMA_LX; + + // Escape Fraction + float F_ESC10; + float ALPHA_ESC; + float F_ESC7_MINI; + + float T_RE; + + float M_TURN; + float R_BUBBLE_MAX; + float ION_Tvir_MIN; + double F_H2_SHIELD; + float NU_X_THRESH; + float X_RAY_SPEC_INDEX; + float X_RAY_Tvir_MIN; + + double A_LW; + double BETA_LW; + double A_VCB; + double BETA_VCB; + + double FIXED_VAVG; + double POP2_ION; + double POP3_ION; + + int N_RSD_STEPS; + double PHOTONCONS_CALIBRATION_END; + double CLUMPING_FACTOR; + double ALPHA_UVB; + + float R_MAX_TS; + int N_STEP_TS; + double DELTA_R_HII_FACTOR; + float R_BUBBLE_MIN; + double MAX_DVDR; + double NU_X_MAX; + double NU_X_BAND_MAX; +} AstroParams; + +typedef struct AstroOptions { + bool USE_MINI_HALOS; + bool USE_CMB_HEATING; // CMB Heating Flag + bool USE_LYA_HEATING; // Lya Heating Flag + bool USE_MASS_DEPENDENT_ZETA; + bool SUBCELL_RSD; + bool APPLY_RSDS; + bool INHOMO_RECO; + bool USE_TS_FLUCT; + bool M_MIN_in_Mass; + bool FIX_VCB_AVG; + bool USE_EXP_FILTER; + bool CELL_RECOMB; + int PHOTON_CONS_TYPE; + bool USE_UPPER_STELLAR_TURNOVER; + bool HALO_SCALING_RELATIONS_MEDIAN; + int HII_FILTER; + int HEAT_FILTER; + bool IONISE_ENTIRE_SPHERE; + bool AVG_BELOW_SAMPLER; + int INTEGRATION_METHOD_ATOMIC; + int INTEGRATION_METHOD_MINI; +} AstroOptions; + +typedef struct ConfigSettings { + double HALO_CATALOG_MEM_FACTOR; + char *external_table_path; + char *wisdoms_path; +} ConfigSettings; + void Broadcast_struct_global_all(SimulationOptions *simulation_options, MatterOptions *matter_options, CosmoParams *cosmo_params, AstroParams *astro_params, AstroOptions *astro_options); void Broadcast_struct_global_noastro(SimulationOptions *simulation_options, MatterOptions *matter_options, CosmoParams *cosmo_params); +void set_external_table_path(ConfigSettings *params, const char *value); +char *get_external_table_path(ConfigSettings *params); +void set_wisdoms_path(ConfigSettings *params, const char *value); +char *get_wisdoms_path(ConfigSettings *params); + +/* Previously, we had a few structures spread throughout the code e.g simulation_options_ufunc which + were all globally defined and separately broadcast at different times. Several of these were used + across different files and some inside #defines (e.g indexing.h), so for now I've combined + the parameter structures to avoid confusion (we shouldn't have the possibility of two files using + different parameters). + + In future we should have a parameter structure in each .c file containing ONLY parameters + relevant to it (look at HaloBox.c), and force the broadcast at each _compute() step (or even + decorate any library call) However this would require us to be very careful about initialising + the globals when ANY function from that file is called */ +// The structs declared here defined in InputParameters.c +extern SimulationOptions *simulation_options_global; +extern MatterOptions *matter_options_global; +extern CosmoParams *cosmo_params_global; +extern AstroParams *astro_params_global; +extern AstroOptions *astro_options_global; + +extern ConfigSettings config_settings; + #ifdef __cplusplus } #endif diff --git a/src/py21cmfast/src/IonisationBox.c b/src/py21cmfast/src/IonisationBox.c index 2b7a28358..52e678389 100644 --- a/src/py21cmfast/src/IonisationBox.c +++ b/src/py21cmfast/src/IonisationBox.c @@ -1377,7 +1377,7 @@ int ComputeIonizedBox(float redshift, float prev_redshift, PerturbedField *pertu // If GPU & flags call init_ionbox_gpu_data() bool use_cuda = false; // pass this as a parameter later if (use_cuda && astro_options_global->USE_MASS_DEPENDENT_ZETA && - !astro_options_global->USE_MINI_HALOS && !gsl_matrix_int_set_row->USE_HALO_FIELD) { + !astro_options_global->USE_MINI_HALOS && !matter_options_global->USE_HALO_FIELD) { unsigned int Nion_nbins = get_nbins(); #if CUDA_FOUND init_ionbox_gpu_data(&d_deltax_filtered, &d_xe_filtered, &d_y_arr, &d_Fcoll, Nion_nbins, diff --git a/src/py21cmfast/src/IonisationBox.h b/src/py21cmfast/src/IonisationBox.h index c0b350272..a221a2064 100644 --- a/src/py21cmfast/src/IonisationBox.h +++ b/src/py21cmfast/src/IonisationBox.h @@ -1,6 +1,7 @@ #ifndef _IONBOX_H #define _IONBOX_H +#include #include #include "InputParameters.h" diff --git a/src/py21cmfast/src/IonisationBox_gpu.h b/src/py21cmfast/src/IonisationBox_gpu.h index 219eee16b..62a59fcc3 100644 --- a/src/py21cmfast/src/IonisationBox_gpu.h +++ b/src/py21cmfast/src/IonisationBox_gpu.h @@ -1,6 +1,7 @@ #ifndef _IONBOX_H #define _IONBOX_H +#include #include // #include @@ -10,37 +11,23 @@ #ifdef __cplusplus extern "C" { #endif -void init_ionbox_gpu_data( - fftwf_complex **d_deltax_filtered, // copies of pointers to pointers - fftwf_complex **d_xe_filtered, - float **d_y_arr, - float **d_Fcoll, - unsigned int nbins, // nbins for Nion_conditional_table1D->y - unsigned long long hii_tot_num_pixels, // HII_TOT_NUM_PIXELS - unsigned long long hii_kspace_num_pixels, // HII_KSPACE_NUM_PIXELS - unsigned int *threadsPerBlock, - unsigned int *numBlocks -); -void calculate_fcoll_grid_gpu( - IonizedBox *box, // for box->Fcoll - fftwf_complex *h_deltax_filtered, // members of fg_struct - fftwf_complex *h_xe_filtered, - double *f_coll_grid_mean, // member of rspec - fftwf_complex *d_deltax_filtered, // device pointers - fftwf_complex *d_xe_filtered, - float *d_Fcoll, - float *d_y_arr, - unsigned long long hii_tot_num_pixels, // HII_TOT_NUM_PIXELS - unsigned long long hii_kspace_num_pixels, // HII_KSPACE_NUM_PIXELS - unsigned int *threadsPerBlock, - unsigned int *numBlocks -); -void free_ionbox_gpu_data( - fftwf_complex **d_deltax_filtered, // copies of pointers to pointers - fftwf_complex **d_xe_filtered, - float **d_y_arr, - float **d_Fcoll -); +void init_ionbox_gpu_data(fftwf_complex **d_deltax_filtered, // copies of pointers to pointers + fftwf_complex **d_xe_filtered, float **d_y_arr, float **d_Fcoll, + unsigned int nbins, // nbins for Nion_conditional_table1D->y + unsigned long long hii_tot_num_pixels, // HII_TOT_NUM_PIXELS + unsigned long long hii_kspace_num_pixels, // HII_KSPACE_NUM_PIXELS + unsigned int *threadsPerBlock, unsigned int *numBlocks); +void calculate_fcoll_grid_gpu(IonizedBox *box, // for box->Fcoll + fftwf_complex *h_deltax_filtered, // members of fg_struct + fftwf_complex *h_xe_filtered, + double *f_coll_grid_mean, // member of rspec + fftwf_complex *d_deltax_filtered, // device pointers + fftwf_complex *d_xe_filtered, float *d_Fcoll, float *d_y_arr, + unsigned long long hii_tot_num_pixels, // HII_TOT_NUM_PIXELS + unsigned long long hii_kspace_num_pixels, // HII_KSPACE_NUM_PIXELS + unsigned int *threadsPerBlock, unsigned int *numBlocks); +void free_ionbox_gpu_data(fftwf_complex **d_deltax_filtered, // copies of pointers to pointers + fftwf_complex **d_xe_filtered, float **d_y_arr, float **d_Fcoll); #ifdef __cplusplus } diff --git a/src/py21cmfast/src/MapMass_cpu.c b/src/py21cmfast/src/MapMass_cpu.c index 56999ba3a..07a17b11e 100644 --- a/src/py21cmfast/src/MapMass_cpu.c +++ b/src/py21cmfast/src/MapMass_cpu.c @@ -1,67 +1,64 @@ // Re-write of perturb_field.c for being accessible within the MCMC -#include -#include -#include #include -#include #include +#include +#include +#include +#include -#include "cexcept.h" -#include "exceptions.h" -#include "logger.h" #include "Constants.h" -#include "indexing.h" #include "InputParameters.h" #include "OutputStructs.h" +#include "PerturbField.h" +#include "cexcept.h" #include "cosmology.h" -#include "dft.h" #include "debugging.h" +#include "dft.h" +#include "exceptions.h" #include "filtering.h" +#include "indexing.h" +#include "logger.h" -#include "PerturbField.h" - -double *MapMass_cpu( - UserParams *user_params, - CosmoParams *cosmo_params, - InitialConditions *boxes, - double *resampled_box, - int dimension, - float f_pixel_factor, - float init_growth_factor -){ +double *MapMass_cpu(InitialConditions *boxes, double *resampled_box, int dimension, + float f_pixel_factor, float init_growth_factor) { double xf, yf, zf; - unsigned long long int i,j,k; + unsigned long long int i, j, k; int xi, yi, zi; unsigned long long HII_i, HII_j, HII_k; // Variables to perform cloud in cell re-distribution of mass for the perturbed field - int xp1,yp1,zp1; - float d_x,d_y,d_z,t_x,t_y,t_z; + int xp1, yp1, zp1; + float d_x, d_y, d_z, t_x, t_y, t_z; - #pragma omp parallel \ - shared(init_growth_factor,boxes,f_pixel_factor,resampled_box,dimension) \ - private(i,j,k,xi,xf,yi,yf,zi,zf,HII_i,HII_j,HII_k,d_x,d_y,d_z,t_x,t_y,t_z,xp1,yp1,zp1) \ - num_threads(user_params->N_THREADS) - { - #pragma omp for - for (i=0; iDIM;i++){ - for (j=0; jDIM;j++){ - for (k=0; kPERTURB_ON_HIGH_RES + ? simulation_options_global->DIM + : simulation_options_global->HII_DIM; + int dimension_z = simulation_options_global->NON_CUBIC_FACTOR * dimension_pt; + int dimension_ic = simulation_options_global->DIM; +#pragma omp parallel shared(init_growth_factor, boxes, f_pixel_factor, resampled_box, \ + dimension) private(i, j, k, xi, xf, yi, yf, zi, zf, HII_i, HII_j, \ + HII_k, d_x, d_y, d_z, t_x, t_y, t_z, xp1, \ + yp1, zp1) \ + num_threads(simulation_options_global -> N_THREADS) + { +#pragma omp for + for (i = 0; i < dimension_ic; i++) { + for (j = 0; j < dimension_ic; j++) { + for (k = 0; k < D_PARA; k++) { // map indeces to locations in units of box size - xf = (i+0.5)/((user_params->DIM)+0.0); - yf = (j+0.5)/((user_params->DIM)+0.0); - zf = (k+0.5)/((D_PARA)+0.0); + xf = (i + 0.5) / ((dimension_ic) + 0.0); + yf = (j + 0.5) / ((dimension_ic) + 0.0); + zf = (k + 0.5) / ((D_PARA) + 0.0); // update locations - if(user_params->PERTURB_ON_HIGH_RES) { + if (matter_options_global->PERTURB_ON_HIGH_RES) { xf += (boxes->hires_vx)[R_INDEX(i, j, k)]; yf += (boxes->hires_vy)[R_INDEX(i, j, k)]; zf += (boxes->hires_vz)[R_INDEX(i, j, k)]; - } - else { - HII_i = (unsigned long long)(i/f_pixel_factor); - HII_j = (unsigned long long)(j/f_pixel_factor); - HII_k = (unsigned long long)(k/f_pixel_factor); + } else { + HII_i = (unsigned long long)(i / f_pixel_factor); + HII_j = (unsigned long long)(j / f_pixel_factor); + HII_k = (unsigned long long)(k / f_pixel_factor); xf += (boxes->lowres_vx)[HII_R_INDEX(HII_i, HII_j, HII_k)]; yf += (boxes->lowres_vy)[HII_R_INDEX(HII_i, HII_j, HII_k)]; zf += (boxes->lowres_vz)[HII_R_INDEX(HII_i, HII_j, HII_k)]; @@ -69,59 +66,90 @@ double *MapMass_cpu( // 2LPT PART // add second order corrections - if(user_params->USE_2LPT){ - if(user_params->PERTURB_ON_HIGH_RES) { - xf -= (boxes->hires_vx_2LPT)[R_INDEX(i,j,k)]; - yf -= (boxes->hires_vy_2LPT)[R_INDEX(i,j,k)]; - zf -= (boxes->hires_vz_2LPT)[R_INDEX(i,j,k)]; - } - else { - xf -= (boxes->lowres_vx_2LPT)[HII_R_INDEX(HII_i,HII_j,HII_k)]; - yf -= (boxes->lowres_vy_2LPT)[HII_R_INDEX(HII_i,HII_j,HII_k)]; - zf -= (boxes->lowres_vz_2LPT)[HII_R_INDEX(HII_i,HII_j,HII_k)]; + if (matter_options_global->PERTURB_ALGORITHM == 2) { + if (matter_options_global->PERTURB_ON_HIGH_RES) { + xf -= (boxes->hires_vx_2LPT)[R_INDEX(i, j, k)]; + yf -= (boxes->hires_vy_2LPT)[R_INDEX(i, j, k)]; + zf -= (boxes->hires_vz_2LPT)[R_INDEX(i, j, k)]; + } else { + xf -= (boxes->lowres_vx_2LPT)[HII_R_INDEX(HII_i, HII_j, HII_k)]; + yf -= (boxes->lowres_vy_2LPT)[HII_R_INDEX(HII_i, HII_j, HII_k)]; + zf -= (boxes->lowres_vz_2LPT)[HII_R_INDEX(HII_i, HII_j, HII_k)]; } } xf *= (double)(dimension); yf *= (double)(dimension); - zf *= (double)((unsigned long long)(user_params->NON_CUBIC_FACTOR*dimension)); - while (xf >= (double)(dimension)){ xf -= (dimension);} - while (xf < 0){ xf += (dimension);} - while (yf >= (double)(dimension)){ yf -= (dimension);} - while (yf < 0){ yf += (dimension);} - while (zf >= (double)(user_params->NON_CUBIC_FACTOR*dimension)){ zf -= (user_params->NON_CUBIC_FACTOR*dimension);} - while (zf < 0){ zf += (user_params->NON_CUBIC_FACTOR*dimension);} + zf *= (double)(dimension_z); + while (xf >= (double)(dimension)) { + xf -= (dimension); + } + while (xf < 0) { + xf += (dimension); + } + while (yf >= (double)(dimension)) { + yf -= (dimension); + } + while (yf < 0) { + yf += (dimension); + } + while (zf >= (double)(dimension_z)) { + zf -= (dimension_z); + } + while (zf < 0) { + zf += (dimension_z); + } xi = xf; yi = yf; zi = zf; - if (xi >= (dimension)){ xi -= (dimension);} - if (xi < 0) {xi += (dimension);} - if (yi >= (dimension)){ yi -= (dimension);} - if (yi < 0) {yi += (dimension);} - if (zi >= ((unsigned long long)(user_params->NON_CUBIC_FACTOR*dimension))){ zi -= ((unsigned long long)(user_params->NON_CUBIC_FACTOR*dimension));} - if (zi < 0) {zi += ((unsigned long long)(user_params->NON_CUBIC_FACTOR*dimension));} + if (xi >= (dimension)) { + xi -= (dimension); + } + if (xi < 0) { + xi += (dimension); + } + if (yi >= (dimension)) { + yi -= (dimension); + } + if (yi < 0) { + yi += (dimension); + } + if (zi >= (dimension_z)) { + zi -= (dimension_z); + } + if (zi < 0) { + zi += (dimension_z); + } - // Determine the fraction of the perturbed cell which overlaps with the 8 nearest grid cells, - // based on the grid cell which contains the centre of the perturbed cell - d_x = fabs(xf - (double)(xi+0.5)); - d_y = fabs(yf - (double)(yi+0.5)); - d_z = fabs(zf - (double)(zi+0.5)); - if(xf < (double)(xi+0.5)) { + // Determine the fraction of the perturbed cell which overlaps with the 8 + // nearest grid cells, based on the grid cell which contains the centre of the + // perturbed cell + d_x = fabs(xf - (double)(xi + 0.5)); + d_y = fabs(yf - (double)(yi + 0.5)); + d_z = fabs(zf - (double)(zi + 0.5)); + if (xf < (double)(xi + 0.5)) { // If perturbed cell centre is less than the mid-point then update fraction // of mass in the cell and determine the cell centre of neighbour to be the // lowest grid point index d_x = 1. - d_x; xi -= 1; - if (xi < 0) {xi += (dimension);} // Only this critera is possible as iterate back by one (we cannot exceed DIM) + if (xi < 0) { + xi += (dimension); + } // Only this critera is possible as iterate back by one (we cannot exceed + // DIM) } - if(yf < (double)(yi+0.5)) { + if (yf < (double)(yi + 0.5)) { d_y = 1. - d_y; yi -= 1; - if (yi < 0) {yi += (dimension);} + if (yi < 0) { + yi += (dimension); + } } - if(zf < (double)(zi+0.5)) { + if (zf < (double)(zi + 0.5)) { d_z = 1. - d_z; zi -= 1; - if (zi < 0) {zi += ((unsigned long long)(user_params->NON_CUBIC_FACTOR*dimension));} + if (zi < 0) { + zi += (dimension_z); + } } t_x = 1. - d_x; t_y = 1. - d_y; @@ -130,49 +158,104 @@ double *MapMass_cpu( // Determine the grid coordinates of the 8 neighbouring cells // Takes into account the offset based on cell centre determined above xp1 = xi + 1; - if(xp1 >= dimension) { xp1 -= (dimension);} + if (xp1 >= dimension) { + xp1 -= (dimension); + } yp1 = yi + 1; - if(yp1 >= dimension) { yp1 -= (dimension);} + if (yp1 >= dimension) { + yp1 -= (dimension); + } zp1 = zi + 1; - if(zp1 >= ((unsigned long long)(user_params->NON_CUBIC_FACTOR*dimension))) { zp1 -= ((unsigned long long)(user_params->NON_CUBIC_FACTOR*dimension));} + if (zp1 >= (dimension_z)) { + zp1 -= (dimension_z); + } - if(user_params->PERTURB_ON_HIGH_RES) { - // Redistribute the mass over the 8 neighbouring cells according to cloud in cell + if (matter_options_global->PERTURB_ON_HIGH_RES) { + // Redistribute the mass over the 8 neighbouring cells according to cloud in + // cell #pragma omp atomic - resampled_box[R_INDEX(xi,yi,zi)] += (double)(1. + init_growth_factor*(boxes->hires_density)[R_INDEX(i,j,k)])*(t_x*t_y*t_z); + resampled_box[R_INDEX(xi, yi, zi)] += + (double)(1. + init_growth_factor * + (boxes->hires_density)[R_INDEX(i, j, k)]) * + (t_x * t_y * t_z); #pragma omp atomic - resampled_box[R_INDEX(xp1,yi,zi)] += (double)(1. + init_growth_factor*(boxes->hires_density)[R_INDEX(i,j,k)])*(d_x*t_y*t_z); + resampled_box[R_INDEX(xp1, yi, zi)] += + (double)(1. + init_growth_factor * + (boxes->hires_density)[R_INDEX(i, j, k)]) * + (d_x * t_y * t_z); #pragma omp atomic - resampled_box[R_INDEX(xi,yp1,zi)] += (double)(1. + init_growth_factor*(boxes->hires_density)[R_INDEX(i,j,k)])*(t_x*d_y*t_z); + resampled_box[R_INDEX(xi, yp1, zi)] += + (double)(1. + init_growth_factor * + (boxes->hires_density)[R_INDEX(i, j, k)]) * + (t_x * d_y * t_z); #pragma omp atomic - resampled_box[R_INDEX(xp1,yp1,zi)] += (double)(1. + init_growth_factor*(boxes->hires_density)[R_INDEX(i,j,k)])*(d_x*d_y*t_z); + resampled_box[R_INDEX(xp1, yp1, zi)] += + (double)(1. + init_growth_factor * + (boxes->hires_density)[R_INDEX(i, j, k)]) * + (d_x * d_y * t_z); #pragma omp atomic - resampled_box[R_INDEX(xi,yi,zp1)] += (double)(1. + init_growth_factor*(boxes->hires_density)[R_INDEX(i,j,k)])*(t_x*t_y*d_z); + resampled_box[R_INDEX(xi, yi, zp1)] += + (double)(1. + init_growth_factor * + (boxes->hires_density)[R_INDEX(i, j, k)]) * + (t_x * t_y * d_z); #pragma omp atomic - resampled_box[R_INDEX(xp1,yi,zp1)] += (double)(1. + init_growth_factor*(boxes->hires_density)[R_INDEX(i,j,k)])*(d_x*t_y*d_z); + resampled_box[R_INDEX(xp1, yi, zp1)] += + (double)(1. + init_growth_factor * + (boxes->hires_density)[R_INDEX(i, j, k)]) * + (d_x * t_y * d_z); #pragma omp atomic - resampled_box[R_INDEX(xi,yp1,zp1)] += (double)(1. + init_growth_factor*(boxes->hires_density)[R_INDEX(i,j,k)])*(t_x*d_y*d_z); + resampled_box[R_INDEX(xi, yp1, zp1)] += + (double)(1. + init_growth_factor * + (boxes->hires_density)[R_INDEX(i, j, k)]) * + (t_x * d_y * d_z); #pragma omp atomic - resampled_box[R_INDEX(xp1,yp1,zp1)] += (double)(1. + init_growth_factor*(boxes->hires_density)[R_INDEX(i,j,k)])*(d_x*d_y*d_z); - } - else { - // Redistribute the mass over the 8 neighbouring cells according to cloud in cell + resampled_box[R_INDEX(xp1, yp1, zp1)] += + (double)(1. + init_growth_factor * + (boxes->hires_density)[R_INDEX(i, j, k)]) * + (d_x * d_y * d_z); + } else { + // Redistribute the mass over the 8 neighbouring cells according to cloud in + // cell #pragma omp atomic - resampled_box[HII_R_INDEX(xi,yi,zi)] += (double)(1. + init_growth_factor*(boxes->hires_density)[R_INDEX(i,j,k)])*(t_x*t_y*t_z); + resampled_box[HII_R_INDEX(xi, yi, zi)] += + (double)(1. + init_growth_factor * + (boxes->hires_density)[R_INDEX(i, j, k)]) * + (t_x * t_y * t_z); #pragma omp atomic - resampled_box[HII_R_INDEX(xp1,yi,zi)] += (double)(1. + init_growth_factor*(boxes->hires_density)[R_INDEX(i,j,k)])*(d_x*t_y*t_z); + resampled_box[HII_R_INDEX(xp1, yi, zi)] += + (double)(1. + init_growth_factor * + (boxes->hires_density)[R_INDEX(i, j, k)]) * + (d_x * t_y * t_z); #pragma omp atomic - resampled_box[HII_R_INDEX(xi,yp1,zi)] += (double)(1. + init_growth_factor*(boxes->hires_density)[R_INDEX(i,j,k)])*(t_x*d_y*t_z); + resampled_box[HII_R_INDEX(xi, yp1, zi)] += + (double)(1. + init_growth_factor * + (boxes->hires_density)[R_INDEX(i, j, k)]) * + (t_x * d_y * t_z); #pragma omp atomic - resampled_box[HII_R_INDEX(xp1,yp1,zi)] += (double)(1. + init_growth_factor*(boxes->hires_density)[R_INDEX(i,j,k)])*(d_x*d_y*t_z); + resampled_box[HII_R_INDEX(xp1, yp1, zi)] += + (double)(1. + init_growth_factor * + (boxes->hires_density)[R_INDEX(i, j, k)]) * + (d_x * d_y * t_z); #pragma omp atomic - resampled_box[HII_R_INDEX(xi,yi,zp1)] += (double)(1. + init_growth_factor*(boxes->hires_density)[R_INDEX(i,j,k)])*(t_x*t_y*d_z); + resampled_box[HII_R_INDEX(xi, yi, zp1)] += + (double)(1. + init_growth_factor * + (boxes->hires_density)[R_INDEX(i, j, k)]) * + (t_x * t_y * d_z); #pragma omp atomic - resampled_box[HII_R_INDEX(xp1,yi,zp1)] += (double)(1. + init_growth_factor*(boxes->hires_density)[R_INDEX(i,j,k)])*(d_x*t_y*d_z); + resampled_box[HII_R_INDEX(xp1, yi, zp1)] += + (double)(1. + init_growth_factor * + (boxes->hires_density)[R_INDEX(i, j, k)]) * + (d_x * t_y * d_z); #pragma omp atomic - resampled_box[HII_R_INDEX(xi,yp1,zp1)] += (double)(1. + init_growth_factor*(boxes->hires_density)[R_INDEX(i,j,k)])*(t_x*d_y*d_z); + resampled_box[HII_R_INDEX(xi, yp1, zp1)] += + (double)(1. + init_growth_factor * + (boxes->hires_density)[R_INDEX(i, j, k)]) * + (t_x * d_y * d_z); #pragma omp atomic - resampled_box[HII_R_INDEX(xp1,yp1,zp1)] += (double)(1. + init_growth_factor*(boxes->hires_density)[R_INDEX(i,j,k)])*(d_x*d_y*d_z); + resampled_box[HII_R_INDEX(xp1, yp1, zp1)] += + (double)(1. + init_growth_factor * + (boxes->hires_density)[R_INDEX(i, j, k)]) * + (d_x * d_y * d_z); } } } diff --git a/src/py21cmfast/src/OutputStructs.h b/src/py21cmfast/src/OutputStructs.h index 479e256c3..8230a77c0 100644 --- a/src/py21cmfast/src/OutputStructs.h +++ b/src/py21cmfast/src/OutputStructs.h @@ -6,7 +6,96 @@ #include "InputParameters.h" -// Since it is unguarded, make sure to ONLY include this file from here -#include "_outputstructs_wrapper.h" +typedef struct InitialConditions { + float *lowres_density, *lowres_vx, *lowres_vy, *lowres_vz; + float *lowres_vx_2LPT, *lowres_vy_2LPT, *lowres_vz_2LPT; + float *hires_density, *hires_vx, *hires_vy, *hires_vz; + float *hires_vx_2LPT, *hires_vy_2LPT, *hires_vz_2LPT; // cw addition + float *lowres_vcb; +} InitialConditions; + +typedef struct PerturbedField { + float *density, *velocity_x, *velocity_y, *velocity_z; +} PerturbedField; + +typedef struct HaloField { + long long unsigned int n_halos; + long long unsigned int buffer_size; + float *halo_masses; + int *halo_coords; + + // Halo properties for stochastic model + float *star_rng; + float *sfr_rng; + float *xray_rng; +} HaloField; + +typedef struct PerturbHaloField { + long long unsigned int n_halos; + long long unsigned int buffer_size; + float *halo_masses; + int *halo_coords; + + // Halo properties for stochastic model + float *star_rng; + float *sfr_rng; + float *xray_rng; +} PerturbHaloField; + +typedef struct HaloBox { + // Things that aren't used in radiation fields but useful outputs + float *halo_mass; + float *halo_stars; + float *halo_stars_mini; + int *count; + + // For IonisationBox.c and SpinTemperatureBox.c + float *n_ion; // weighted by F_ESC*PopN_ion + float *halo_sfr; // for x-rays and Ts stuff + float *halo_xray; + float *halo_sfr_mini; // for x-rays and Ts stuff + float *whalo_sfr; // SFR weighted by PopN_ion and F_ESC, used for Gamma12 + + // Average volume-weighted log10 Turnover masses are kept in order to compare with the expected + // MF integrals + double log10_Mcrit_ACG_ave; + double log10_Mcrit_MCG_ave; +} HaloBox; + +typedef struct XraySourceBox { + float *filtered_sfr; + float *filtered_xray; + float *filtered_sfr_mini; + + double *mean_log10_Mcrit_LW; + double *mean_sfr; + double *mean_sfr_mini; +} XraySourceBox; + +typedef struct TsBox { + float *spin_temperature; + float *xray_ionised_fraction; + float *kinetic_temp_neutral; + float *J_21_LW; +} TsBox; + +typedef struct IonizedBox { + double mean_f_coll; + double mean_f_coll_MINI; + double log10_Mturnover_ave; + double log10_Mturnover_MINI_ave; + float *neutral_fraction; + float *ionisation_rate_G12; + float *mean_free_path; + float *z_reion; + float *cumulative_recombinations; + float *kinetic_temperature; + float *unnormalised_nion; + float *unnormalised_nion_mini; +} IonizedBox; + +typedef struct BrightnessTemp { + float *brightness_temp; +} BrightnessTemp; #endif diff --git a/src/py21cmfast/src/PerturbField.c b/src/py21cmfast/src/PerturbField.c index 65e64a9f7..92be25f1a 100644 --- a/src/py21cmfast/src/PerturbField.c +++ b/src/py21cmfast/src/PerturbField.c @@ -179,7 +179,7 @@ int ComputePerturbField(float redshift, InitialConditions *boxes, PerturbedField init_displacement_factor_2LPT; float mass_factor, dDdt, f_pixel_factor, velocity_displacement_factor, velocity_displacement_factor_2LPT; - int i, j, k, xi, yi, zi, dimension, dimension_z, switch_mid; + int i, j, k, dimension, dimension_z, switch_mid; // Function for deciding the dimensions of loops when we could // use either the low or high resolution grids. diff --git a/src/py21cmfast/src/SpinTemperatureBox.c b/src/py21cmfast/src/SpinTemperatureBox.c index b93353e7a..572008f94 100644 --- a/src/py21cmfast/src/SpinTemperatureBox.c +++ b/src/py21cmfast/src/SpinTemperatureBox.c @@ -957,8 +957,8 @@ void calculate_sfrd_from_grid(int R_ct, float *dens_R_grid, float *Mcrit_R_grid, bool use_cuda = false; // pass this as a parameter later if (use_cuda && astro_options_global->USE_MASS_DEPENDENT_ZETA && matter_options_global->USE_INTERPOLATION_TABLES && !astro_options_global->USE_MINI_HALOS) { - RGTable1D_f *SFRD_conditional_table = get_SFRD_conditional_table(); #if CUDA_FOUND + RGTable1D_f *SFRD_conditional_table = get_SFRD_conditional_table(); ave_sfrd_buf = calculate_sfrd_from_grid_gpu(SFRD_conditional_table, dens_R_grid, zpp_growth, R_ct, sfrd_grid, HII_TOT_NUM_PIXELS, threadsPerBlock, diff --git a/src/py21cmfast/src/SpinTemperatureBox.h b/src/py21cmfast/src/SpinTemperatureBox.h index c9799615a..033ea2e99 100644 --- a/src/py21cmfast/src/SpinTemperatureBox.h +++ b/src/py21cmfast/src/SpinTemperatureBox.h @@ -6,6 +6,7 @@ #include "InputParameters.h" #include "OutputStructs.h" #include "interpolation.h" +#include "scaling_relations.h" #ifdef __cplusplus extern "C" { diff --git a/src/py21cmfast/src/Stochasticity.c b/src/py21cmfast/src/Stochasticity.c index 9cce87aa8..f983c7e48 100644 --- a/src/py21cmfast/src/Stochasticity.c +++ b/src/py21cmfast/src/Stochasticity.c @@ -956,8 +956,7 @@ int sample_halo_grids(gsl_rng **rng_arr, double redshift, float *dens_field, // NOTE: there's a lot of repeated code here and in build_halo_cats, find a way to merge int sample_halo_progenitors(gsl_rng **rng_arr, double z_in, double z_out, HaloField *halofield_in, - HaloField *halofield_out, struct HaloSamplingConstants *hs_constants, - RGTable1D_f *sigma_table) { + HaloField *halofield_out, struct HaloSamplingConstants *hs_constants) { if (z_in >= z_out) { LOG_ERROR("halo progenitors must go backwards in time!!! z_in = %.1f, z_out = %.1f", z_in, z_out); @@ -985,16 +984,17 @@ int sample_halo_progenitors(gsl_rng **rng_arr, double z_in, double z_out, HaloFi // use cuda function if use_cuda is true bool use_cuda = false; // pass this as a parameter later if (use_cuda) { +#if CUDA_FOUND // get parameters needed for sigma calculation + + RGTable1D_f *sigma_table = GetSigmaInterpTable(); double x_min = sigma_table->x_min; double x_width = sigma_table->x_width; int sigma_bin = sigma_table->n_bin; float *sigma_y_arr = sigma_table->y_arr; - // Create a copy of hs_constants for passing to cuda struct HaloSamplingConstants d_hs_constants; d_hs_constants = *hs_constants; - // get in halo data float *halo_m = halofield_in->halo_masses; float *halo_star_rng = halofield_in->star_rng; @@ -1003,8 +1003,6 @@ int sample_halo_progenitors(gsl_rng **rng_arr, double z_in, double z_out, HaloFi int *halo_c = halofield_in->halo_coords; printf("Start cuda calculation for progenitors. "); - -#if CUDA_FOUND updateHaloOut(halo_m, halo_star_rng, halo_sfr_rng, halo_xray_rng, halo_c, nhalo_in, sigma_y_arr, sigma_bin, x_min, x_width, d_hs_constants, arraysize_total, halofield_out); @@ -1012,9 +1010,10 @@ int sample_halo_progenitors(gsl_rng **rng_arr, double z_in, double z_out, HaloFi #else LOG_ERROR("CUDA function updateHaloOut() called but code was not compiled for CUDA."); + Throw(ValueError); #endif } else { // CPU fallback -#pragma omp parallel num_threads(user_params_global->N_THREADS) +#pragma omp parallel num_threads(simulation_options_global->N_THREADS) { float prog_buf[MAX_HALO_CELL]; int n_prog; @@ -1070,7 +1069,7 @@ int sample_halo_progenitors(gsl_rng **rng_arr, double z_in, double z_out, HaloFi config_settings.HALO_CATALOG_MEM_FACTOR); LOG_ERROR( "If you expected to have an above average halo number try raising " - "user_params_global->MAXHALO_FACTOR"); + "config_settings.HALO_CATALOG_MEM_FACTOR"); Throw(ValueError); } @@ -1102,12 +1101,12 @@ int sample_halo_progenitors(gsl_rng **rng_arr, double z_in, double z_out, HaloFi if (ii == 0) { LOG_ULTRA_DEBUG( " HMF %d delta %.3f delta_coll %.3f delta_desc %.3f adjusted %.3f", - user_params_global->HMF, hs_constants_priv.delta, - get_delta_crit(user_params_global->HMF, hs_constants_priv.sigma_cond, + simulation_options_global->HMF, hs_constants_priv.delta, + get_delta_crit(simulation_options_global->HMF, hs_constants_priv.sigma_cond, hs_constants->growth_out), - get_delta_crit(user_params_global->HMF, hs_constants_priv.sigma_cond, + get_delta_crit(simulation_options_global->HMF, hs_constants_priv.sigma_cond, hs_constants->growth_in), - get_delta_crit(user_params_global->HMF, hs_constants_priv.sigma_cond, + get_delta_crit(simulation_options_global->HMF, hs_constants_priv.sigma_cond, hs_constants->growth_in) * hs_constants->growth_out / hs_constants->growth_in); print_hs_consts(&hs_constants_priv); @@ -1143,21 +1142,19 @@ int stochastic_halofield(unsigned long long int seed, float redshift_desc, float struct HaloSamplingConstants hs_constants; stoc_set_consts_z(&hs_constants, redshift, redshift_desc); - // get interp tables needed for sampling progenitors - RGTable1D *nhalo_table = GetNhaloTable(); - RGTable1D *mcoll_table = GetMcollTable(); - RGTable2D *nhalo_inv_table = GetNhaloInvTable(); - RGTable1D_f *sigma_table = GetSigmaInterpTable(); - bool use_cuda = false; if (use_cuda) { #if CUDA_FOUND + // get interp tables needed for sampling progenitors + RGTable1D *nhalo_table = GetNhaloTable(); + RGTable1D *mcoll_table = GetMcollTable(); + RGTable2D *nhalo_inv_table = GetNhaloInvTable(); // copy the tables to the device copyTablesToDevice(*nhalo_table, *mcoll_table, *nhalo_inv_table); // copy global variables to the device // todo: move the following operation to InitialConditions.c - updateGlobalParams(user_params_global, cosmo_params_global, astro_params_global); + updateGlobalParams(simulation_options_global, cosmo_params_global, astro_params_global); #else LOG_ERROR("CUDA function copyTablesToDevice called but code was not compiled for CUDA."); #endif @@ -1172,12 +1169,11 @@ int stochastic_halofield(unsigned long long int seed, float redshift_desc, float if (use_cuda) { // initiate rand states on the device +#if CUDA_FOUND unsigned long long int nhalo_first = halos->n_halos; int buffer_scale = HALO_CUDA_THREAD_FACTOR + 1; unsigned long long int n_rstates = nhalo_first * buffer_scale; printf("initializing %llu random states on the device... \n", n_rstates); - -#if CUDA_FOUND init_rand_states(seed, n_rstates); printf("finish initializing \n"); @@ -1186,14 +1182,15 @@ int stochastic_halofield(unsigned long long int seed, float redshift_desc, float #else LOG_ERROR( "CUDA function init_rand_states() called but code was not compiled for CUDA."); + Throw(ValueError); #endif } } else { LOG_DEBUG("Calculating halo progenitors from z=%.1f to z=%.1f | %llu", redshift_desc, redshift, halos_desc->n_halos); - sample_halo_progenitors(rng_stoc, redshift_desc, redshift, halos_desc, halos, &hs_constants, - sigma_table); + sample_halo_progenitors(rng_stoc, redshift_desc, redshift, halos_desc, halos, + &hs_constants); } LOG_DEBUG("Found %llu Halos", halos->n_halos); diff --git a/src/py21cmfast/src/_inputparams_wrapper.h b/src/py21cmfast/src/_inputparams_wrapper.h deleted file mode 100644 index 9bc974364..000000000 --- a/src/py21cmfast/src/_inputparams_wrapper.h +++ /dev/null @@ -1,186 +0,0 @@ -/*We need to explicitly define the types used by the wrapper - However, that function does not take directives, so we separate the types here -*/ -// WARNING: DO NOT #include THIS FILE IN THE C CODE EXCEPT FOR IN InputParameters.h - -typedef struct CosmoParams { - float SIGMA_8; - float hlittle; - float OMm; - float OMl; - float OMb; - float POWER_INDEX; - - float OMn; - float OMk; - float OMr; - float OMtot; - float Y_He; - float wl; - -} CosmoParams; - -typedef struct SimulationOptions { - // Parameters taken from INIT_PARAMS.H - int HII_DIM; - int DIM; - float BOX_LEN; - float NON_CUBIC_FACTOR; - int N_THREADS; - double Z_HEAT_MAX; - double ZPRIME_STEP_FACTOR; - - // Halo Sampler Options - float SAMPLER_MIN_MASS; - double SAMPLER_BUFFER_FACTOR; - int N_COND_INTERP; - int N_PROB_INTERP; - double MIN_LOGPROB; - double HALOMASS_CORRECTION; - double PARKINSON_G0; - double PARKINSON_y1; - double PARKINSON_y2; - - float INITIAL_REDSHIFT; - double DELTA_R_FACTOR; - double DENSITY_SMOOTH_RADIUS; - - double DEXM_OPTIMIZE_MINMASS; - double DEXM_R_OVERLAP; - - double CORR_STAR; - double CORR_SFR; - double CORR_LX; -} SimulationOptions; - -typedef struct MatterOptions { - bool USE_FFTW_WISDOM; - int HMF; - int USE_RELATIVE_VELOCITIES; - int POWER_SPECTRUM; - int USE_INTERPOLATION_TABLES; - bool NO_RNG; - bool PERTURB_ON_HIGH_RES; - int PERTURB_ALGORITHM; - bool MINIMIZE_MEMORY; - bool KEEP_3D_VELOCITIES; - bool DEXM_OPTIMIZE; - int FILTER; - int HALO_FILTER; - bool SMOOTH_EVOLVED_DENSITY_FIELD; - - bool USE_HALO_FIELD; - bool HALO_STOCHASTICITY; - bool FIXED_HALO_GRIDS; - int SAMPLE_METHOD; -} MatterOptions; - -typedef struct AstroParams { - float HII_EFF_FACTOR; - - // SHMR - float F_STAR10; - float ALPHA_STAR; - float ALPHA_STAR_MINI; - float SIGMA_STAR; - double UPPER_STELLAR_TURNOVER_MASS; - double UPPER_STELLAR_TURNOVER_INDEX; - float F_STAR7_MINI; - - // SFMS - float t_STAR; - double SIGMA_SFR_INDEX; - double SIGMA_SFR_LIM; - - // L_X/SFR - double L_X; - double L_X_MINI; - double SIGMA_LX; - - // Escape Fraction - float F_ESC10; - float ALPHA_ESC; - float F_ESC7_MINI; - - float T_RE; - - float M_TURN; - float R_BUBBLE_MAX; - float ION_Tvir_MIN; - double F_H2_SHIELD; - float NU_X_THRESH; - float X_RAY_SPEC_INDEX; - float X_RAY_Tvir_MIN; - - double A_LW; - double BETA_LW; - double A_VCB; - double BETA_VCB; - - double FIXED_VAVG; - double POP2_ION; - double POP3_ION; - - int N_RSD_STEPS; - double PHOTONCONS_CALIBRATION_END; - double CLUMPING_FACTOR; - double ALPHA_UVB; - - float R_MAX_TS; - int N_STEP_TS; - double DELTA_R_HII_FACTOR; - float R_BUBBLE_MIN; - double MAX_DVDR; - double NU_X_MAX; - double NU_X_BAND_MAX; -} AstroParams; - -typedef struct AstroOptions { - bool USE_MINI_HALOS; - bool USE_CMB_HEATING; // CMB Heating Flag - bool USE_LYA_HEATING; // Lya Heating Flag - bool USE_MASS_DEPENDENT_ZETA; - bool SUBCELL_RSD; - bool APPLY_RSDS; - bool INHOMO_RECO; - bool USE_TS_FLUCT; - bool M_MIN_in_Mass; - bool FIX_VCB_AVG; - bool USE_EXP_FILTER; - bool CELL_RECOMB; - int PHOTON_CONS_TYPE; - bool USE_UPPER_STELLAR_TURNOVER; - bool HALO_SCALING_RELATIONS_MEDIAN; - int HII_FILTER; - int HEAT_FILTER; - bool IONISE_ENTIRE_SPHERE; - bool AVG_BELOW_SAMPLER; - int INTEGRATION_METHOD_ATOMIC; - int INTEGRATION_METHOD_MINI; -} AstroOptions; - -typedef struct ConfigSettings { - double HALO_CATALOG_MEM_FACTOR; - - char *external_table_path; - char *wisdoms_path; -} ConfigSettings; - -/* Previously, we had a few structures spread throughout the code e.g simulation_options_ufunc which - were all globally defined and separately broadcast at different times. Several of these were used - across different files and some inside #defines (e.g indexing.h), so for now I've combined - the parameter structures to avoid confusion (we shouldn't have the possibility of two files using - different parameters). - - In future we should have a parameter structure in each .c file containing ONLY parameters - relevant to it (look at HaloBox.c), and force the broadcast at each _compute() step (or even - decorate any library call) However this would require us to be very careful about initialising - the globals when ANY function from that file is called */ -// The structs declared here defined in InputParameters.c -extern SimulationOptions *simulation_options_global; -extern MatterOptions *matter_options_global; -extern CosmoParams *cosmo_params_global; -extern AstroParams *astro_params_global; -extern AstroOptions *astro_options_global; - -extern ConfigSettings config_settings; diff --git a/src/py21cmfast/src/_outputstructs_wrapper.h b/src/py21cmfast/src/_outputstructs_wrapper.h deleted file mode 100644 index b7c79ac92..000000000 --- a/src/py21cmfast/src/_outputstructs_wrapper.h +++ /dev/null @@ -1,96 +0,0 @@ -/*We need to explicitly define the types used by the warpper using ffi.cdef() - However, that function does not take directives, so we separate the types here -*/ -// WARNING: DO NOT #include THIS FILE IN THE C CODE EXCEPT FOR IN OutputStructs.h - -typedef struct InitialConditions { - float *lowres_density, *lowres_vx, *lowres_vy, *lowres_vz; - float *lowres_vx_2LPT, *lowres_vy_2LPT, *lowres_vz_2LPT; - float *hires_density, *hires_vx, *hires_vy, *hires_vz; - float *hires_vx_2LPT, *hires_vy_2LPT, *hires_vz_2LPT; // cw addition - float *lowres_vcb; -} InitialConditions; - -typedef struct PerturbedField { - float *density, *velocity_x, *velocity_y, *velocity_z; -} PerturbedField; - -typedef struct HaloField { - long long unsigned int n_halos; - long long unsigned int buffer_size; - float *halo_masses; - int *halo_coords; - - // Halo properties for stochastic model - float *star_rng; - float *sfr_rng; - float *xray_rng; -} HaloField; - -typedef struct PerturbHaloField { - long long unsigned int n_halos; - long long unsigned int buffer_size; - float *halo_masses; - int *halo_coords; - - // Halo properties for stochastic model - float *star_rng; - float *sfr_rng; - float *xray_rng; -} PerturbHaloField; - -typedef struct HaloBox { - // Things that aren't used in radiation fields but useful outputs - float *halo_mass; - float *halo_stars; - float *halo_stars_mini; - int *count; - - // For IonisationBox.c and SpinTemperatureBox.c - float *n_ion; // weighted by F_ESC*PopN_ion - float *halo_sfr; // for x-rays and Ts stuff - float *halo_xray; - float *halo_sfr_mini; // for x-rays and Ts stuff - float *whalo_sfr; // SFR weighted by PopN_ion and F_ESC, used for Gamma12 - - // Average volume-weighted log10 Turnover masses are kept in order to compare with the expected - // MF integrals - double log10_Mcrit_ACG_ave; - double log10_Mcrit_MCG_ave; -} HaloBox; - -typedef struct XraySourceBox { - float *filtered_sfr; - float *filtered_xray; - float *filtered_sfr_mini; - - double *mean_log10_Mcrit_LW; - double *mean_sfr; - double *mean_sfr_mini; -} XraySourceBox; - -typedef struct TsBox { - float *spin_temperature; - float *xray_ionised_fraction; - float *kinetic_temp_neutral; - float *J_21_LW; -} TsBox; - -typedef struct IonizedBox { - double mean_f_coll; - double mean_f_coll_MINI; - double log10_Mturnover_ave; - double log10_Mturnover_MINI_ave; - float *neutral_fraction; - float *ionisation_rate_G12; - float *mean_free_path; - float *z_reion; - float *cumulative_recombinations; - float *kinetic_temperature; - float *unnormalised_nion; - float *unnormalised_nion_mini; -} IonizedBox; - -typedef struct BrightnessTemp { - float *brightness_temp; -} BrightnessTemp; diff --git a/src/py21cmfast/src/_wrapper.cpp b/src/py21cmfast/src/_wrapper.cpp index b76ad6095..087882813 100644 --- a/src/py21cmfast/src/_wrapper.cpp +++ b/src/py21cmfast/src/_wrapper.cpp @@ -1,15 +1,11 @@ #include #include - -#include "InputParameters.h" -// #include +#include namespace nb = nanobind; extern "C" { #include "21cmFAST.h" -#include "Constants.h" -#include "indexing.h" } NB_MODULE(c_21cmfast, m) { @@ -25,163 +21,145 @@ NB_MODULE(c_21cmfast, m) { .def_rw("OMm", &CosmoParams::OMm) .def_rw("OMl", &CosmoParams::OMl) .def_rw("OMb", &CosmoParams::OMb) + .def_rw("OMn", &CosmoParams::OMn) + .def_rw("OMk", &CosmoParams::OMk) + .def_rw("OMr", &CosmoParams::OMr) + .def_rw("OMtot", &CosmoParams::OMtot) + .def_rw("Y_He", &CosmoParams::Y_He) + .def_rw("wl", &CosmoParams::wl) .def_rw("POWER_INDEX", &CosmoParams::POWER_INDEX); - // Bind UserParams - nb::class_(m, "UserParams") + // Bind SimulationOptions + nb::class_(m, "SimulationOptions") + .def(nb::init<>()) + .def_rw("HII_DIM", &SimulationOptions::HII_DIM) + .def_rw("DIM", &SimulationOptions::DIM) + .def_rw("BOX_LEN", &SimulationOptions::BOX_LEN) + .def_rw("NON_CUBIC_FACTOR", &SimulationOptions::NON_CUBIC_FACTOR) + .def_rw("N_THREADS", &SimulationOptions::N_THREADS) + .def_rw("Z_HEAT_MAX", &SimulationOptions::Z_HEAT_MAX) + .def_rw("ZPRIME_STEP_FACTOR", &SimulationOptions::ZPRIME_STEP_FACTOR) + .def_rw("SAMPLER_MIN_MASS", &SimulationOptions::SAMPLER_MIN_MASS) + .def_rw("SAMPLER_BUFFER_FACTOR", &SimulationOptions::SAMPLER_BUFFER_FACTOR) + .def_rw("N_COND_INTERP", &SimulationOptions::N_COND_INTERP) + .def_rw("N_PROB_INTERP", &SimulationOptions::N_PROB_INTERP) + .def_rw("MIN_LOGPROB", &SimulationOptions::MIN_LOGPROB) + .def_rw("HALOMASS_CORRECTION", &SimulationOptions::HALOMASS_CORRECTION) + .def_rw("PARKINSON_G0", &SimulationOptions::PARKINSON_G0) + .def_rw("PARKINSON_y1", &SimulationOptions::PARKINSON_y1) + .def_rw("PARKINSON_y2", &SimulationOptions::PARKINSON_y2) + .def_rw("INITIAL_REDSHIFT", &SimulationOptions::INITIAL_REDSHIFT) + .def_rw("DELTA_R_FACTOR", &SimulationOptions::DELTA_R_FACTOR) + .def_rw("DENSITY_SMOOTH_RADIUS", &SimulationOptions::DENSITY_SMOOTH_RADIUS) + .def_rw("DEXM_OPTIMIZE_MINMASS", &SimulationOptions::DEXM_OPTIMIZE_MINMASS) + .def_rw("DEXM_R_OVERLAP", &SimulationOptions::DEXM_R_OVERLAP) + .def_rw("CORR_STAR", &SimulationOptions::CORR_STAR) + .def_rw("CORR_SFR", &SimulationOptions::CORR_SFR) + .def_rw("CORR_LX", &SimulationOptions::CORR_LX); + + nb::class_(m, "MatterOptions") .def(nb::init<>()) - .def_rw("HII_DIM", &UserParams::HII_DIM) - .def_rw("DIM", &UserParams::DIM) - .def_rw("BOX_LEN", &UserParams::BOX_LEN) - .def_rw("NON_CUBIC_FACTOR", &UserParams::NON_CUBIC_FACTOR) - .def_rw("USE_FFTW_WISDOM", &UserParams::USE_FFTW_WISDOM) - .def_rw("HMF", &UserParams::HMF) - .def_rw("USE_RELATIVE_VELOCITIES", &UserParams::USE_RELATIVE_VELOCITIES) - .def_rw("POWER_SPECTRUM", &UserParams::POWER_SPECTRUM) - .def_rw("N_THREADS", &UserParams::N_THREADS) - .def_rw("PERTURB_ON_HIGH_RES", &UserParams::PERTURB_ON_HIGH_RES) - .def_rw("NO_RNG", &UserParams::NO_RNG) - .def_rw("USE_INTERPOLATION_TABLES", &UserParams::USE_INTERPOLATION_TABLES) - .def_rw("INTEGRATION_METHOD_ATOMIC", &UserParams::INTEGRATION_METHOD_ATOMIC) - .def_rw("INTEGRATION_METHOD_MINI", &UserParams::INTEGRATION_METHOD_MINI) - .def_rw("USE_2LPT", &UserParams::USE_2LPT) - .def_rw("MINIMIZE_MEMORY", &UserParams::MINIMIZE_MEMORY) - .def_rw("KEEP_3D_VELOCITIES", &UserParams::KEEP_3D_VELOCITIES) - .def_rw("SAMPLER_MIN_MASS", &UserParams::SAMPLER_MIN_MASS) - .def_rw("SAMPLER_BUFFER_FACTOR", &UserParams::SAMPLER_BUFFER_FACTOR) - .def_rw("MAXHALO_FACTOR", &UserParams::MAXHALO_FACTOR) - .def_rw("N_COND_INTERP", &UserParams::N_COND_INTERP) - .def_rw("N_PROB_INTERP", &UserParams::N_PROB_INTERP) - .def_rw("MIN_LOGPROB", &UserParams::MIN_LOGPROB) - .def_rw("SAMPLE_METHOD", &UserParams::SAMPLE_METHOD) - .def_rw("AVG_BELOW_SAMPLER", &UserParams::AVG_BELOW_SAMPLER) - .def_rw("HALOMASS_CORRECTION", &UserParams::HALOMASS_CORRECTION) - .def_rw("PARKINSON_G0", &UserParams::PARKINSON_G0) - .def_rw("PARKINSON_y1", &UserParams::PARKINSON_y1) - .def_rw("PARKINSON_y2", &UserParams::PARKINSON_y2); + .def_rw("USE_FFTW_WISDOM", &MatterOptions::USE_FFTW_WISDOM) + .def_rw("HMF", &MatterOptions::HMF) + .def_rw("USE_RELATIVE_VELOCITIES", &MatterOptions::USE_RELATIVE_VELOCITIES) + .def_rw("POWER_SPECTRUM", &MatterOptions::POWER_SPECTRUM) + .def_rw("USE_INTERPOLATION_TABLES", &MatterOptions::USE_INTERPOLATION_TABLES) + .def_rw("NO_RNG", &MatterOptions::NO_RNG) + .def_rw("PERTURB_ON_HIGH_RES", &MatterOptions::PERTURB_ON_HIGH_RES) + .def_rw("PERTURB_ALGORITHM", &MatterOptions::PERTURB_ALGORITHM) + .def_rw("MINIMIZE_MEMORY", &MatterOptions::MINIMIZE_MEMORY) + .def_rw("KEEP_3D_VELOCITIES", &MatterOptions::KEEP_3D_VELOCITIES) + .def_rw("DEXM_OPTIMIZE", &MatterOptions::DEXM_OPTIMIZE) + .def_rw("FILTER", &MatterOptions::FILTER) + .def_rw("HALO_FILTER", &MatterOptions::HALO_FILTER) + .def_rw("SMOOTH_EVOLVED_DENSITY_FIELD", &MatterOptions::SMOOTH_EVOLVED_DENSITY_FIELD) + .def_rw("USE_HALO_FIELD", &MatterOptions::USE_HALO_FIELD) + .def_rw("HALO_STOCHASTICITY", &MatterOptions::HALO_STOCHASTICITY) + .def_rw("FIXED_HALO_GRIDS", &MatterOptions::FIXED_HALO_GRIDS) + .def_rw("SAMPLE_METHOD", &MatterOptions::SAMPLE_METHOD); // Bind AstroParams nb::class_(m, "AstroParams") - .def(nb::init<>()) .def_rw("HII_EFF_FACTOR", &AstroParams::HII_EFF_FACTOR) .def_rw("F_STAR10", &AstroParams::F_STAR10) .def_rw("ALPHA_STAR", &AstroParams::ALPHA_STAR) .def_rw("ALPHA_STAR_MINI", &AstroParams::ALPHA_STAR_MINI) .def_rw("SIGMA_STAR", &AstroParams::SIGMA_STAR) - .def_rw("CORR_STAR", &AstroParams::CORR_STAR) .def_rw("UPPER_STELLAR_TURNOVER_MASS", &AstroParams::UPPER_STELLAR_TURNOVER_MASS) .def_rw("UPPER_STELLAR_TURNOVER_INDEX", &AstroParams::UPPER_STELLAR_TURNOVER_INDEX) .def_rw("F_STAR7_MINI", &AstroParams::F_STAR7_MINI) .def_rw("t_STAR", &AstroParams::t_STAR) - .def_rw("CORR_SFR", &AstroParams::CORR_SFR) .def_rw("SIGMA_SFR_INDEX", &AstroParams::SIGMA_SFR_INDEX) .def_rw("SIGMA_SFR_LIM", &AstroParams::SIGMA_SFR_LIM) .def_rw("L_X", &AstroParams::L_X) .def_rw("L_X_MINI", &AstroParams::L_X_MINI) .def_rw("SIGMA_LX", &AstroParams::SIGMA_LX) - .def_rw("CORR_LX", &AstroParams::CORR_LX) .def_rw("F_ESC10", &AstroParams::F_ESC10) .def_rw("ALPHA_ESC", &AstroParams::ALPHA_ESC) .def_rw("F_ESC7_MINI", &AstroParams::F_ESC7_MINI) + .def_rw("T_RE", &AstroParams::T_RE) .def_rw("M_TURN", &AstroParams::M_TURN) .def_rw("R_BUBBLE_MAX", &AstroParams::R_BUBBLE_MAX) - .def_rw("ION_Tvir_MIN", &AstroParams::ION_Tvir_MIN); + .def_rw("ION_Tvir_MIN", &AstroParams::ION_Tvir_MIN) + .def_rw("F_H2_SHIELD", &AstroParams::F_H2_SHIELD) + .def_rw("NU_X_THRESH", &AstroParams::NU_X_THRESH) + .def_rw("X_RAY_SPEC_INDEX", &AstroParams::X_RAY_SPEC_INDEX) + .def_rw("X_RAY_Tvir_MIN", &AstroParams::X_RAY_Tvir_MIN) + .def_rw("A_LW", &AstroParams::A_LW) + .def_rw("BETA_LW", &AstroParams::BETA_LW) + .def_rw("A_VCB", &AstroParams::A_VCB) + .def_rw("BETA_VCB", &AstroParams::BETA_VCB) + .def_rw("FIXED_VAVG", &AstroParams::FIXED_VAVG) + .def_rw("POP2_ION", &AstroParams::POP2_ION) + .def_rw("POP3_ION", &AstroParams::POP3_ION) + .def_rw("N_RSD_STEPS", &AstroParams::N_RSD_STEPS) + .def_rw("PHOTONCONS_CALIBRATION_END", &AstroParams::PHOTONCONS_CALIBRATION_END) + .def_rw("CLUMPING_FACTOR", &AstroParams::CLUMPING_FACTOR) + .def_rw("ALPHA_UVB", &AstroParams::ALPHA_UVB) + .def_rw("R_MAX_TS", &AstroParams::R_MAX_TS) + .def_rw("N_STEP_TS", &AstroParams::N_STEP_TS) + .def_rw("DELTA_R_HII_FACTOR", &AstroParams::DELTA_R_HII_FACTOR) + .def_rw("R_BUBBLE_MIN", &AstroParams::R_BUBBLE_MIN) + .def_rw("MAX_DVDR", &AstroParams::MAX_DVDR) + .def_rw("NU_X_MAX", &AstroParams::NU_X_MAX) + .def_rw("NU_X_BAND_MAX", &AstroParams::NU_X_BAND_MAX); - // Bind FlagOptions - nb::class_(m, "FlagOptions") - .def(nb::init<>()) - .def_rw("USE_HALO_FIELD", &FlagOptions::USE_HALO_FIELD) - .def_rw("USE_MINI_HALOS", &FlagOptions::USE_MINI_HALOS) - .def_rw("USE_CMB_HEATING", &FlagOptions::USE_CMB_HEATING) - .def_rw("USE_LYA_HEATING", &FlagOptions::USE_LYA_HEATING) - .def_rw("USE_MASS_DEPENDENT_ZETA", &FlagOptions::USE_MASS_DEPENDENT_ZETA) - .def_rw("SUBCELL_RSD", &FlagOptions::SUBCELL_RSD) - .def_rw("APPLY_RSDS", &FlagOptions::APPLY_RSDS) - .def_rw("INHOMO_RECO", &FlagOptions::INHOMO_RECO) - .def_rw("USE_TS_FLUCT", &FlagOptions::USE_TS_FLUCT) - .def_rw("M_MIN_in_Mass", &FlagOptions::M_MIN_in_Mass) - .def_rw("FIX_VCB_AVG", &FlagOptions::FIX_VCB_AVG) - .def_rw("HALO_STOCHASTICITY", &FlagOptions::HALO_STOCHASTICITY) - .def_rw("USE_EXP_FILTER", &FlagOptions::USE_EXP_FILTER) - .def_rw("FIXED_HALO_GRIDS", &FlagOptions::FIXED_HALO_GRIDS) - .def_rw("CELL_RECOMB", &FlagOptions::CELL_RECOMB) - .def_rw("PHOTON_CONS_TYPE", &FlagOptions::PHOTON_CONS_TYPE) - .def_rw("USE_UPPER_STELLAR_TURNOVER", &FlagOptions::USE_UPPER_STELLAR_TURNOVER) - .def_rw("HALO_SCALING_RELATIONS_MEDIAN", &FlagOptions::HALO_SCALING_RELATIONS_MEDIAN); - - nb::class_(m, "GlobalParams") - .def(nb::init<>()) - .def_rw("ALPHA_UVB", &GlobalParams::ALPHA_UVB) - .def_rw("EVOLVE_DENSITY_LINEARLY", &GlobalParams::EVOLVE_DENSITY_LINEARLY) - .def_rw("SMOOTH_EVOLVED_DENSITY_FIELD", &GlobalParams::SMOOTH_EVOLVED_DENSITY_FIELD) - .def_rw("R_smooth_density", &GlobalParams::R_smooth_density) - .def_rw("HII_ROUND_ERR", &GlobalParams::HII_ROUND_ERR) - .def_rw("FIND_BUBBLE_ALGORITHM", &GlobalParams::FIND_BUBBLE_ALGORITHM) - .def_rw("N_POISSON", &GlobalParams::N_POISSON) - .def_rw("T_USE_VELOCITIES", &GlobalParams::T_USE_VELOCITIES) - .def_rw("MAX_DVDR", &GlobalParams::MAX_DVDR) - .def_rw("DELTA_R_HII_FACTOR", &GlobalParams::DELTA_R_HII_FACTOR) - .def_rw("DELTA_R_FACTOR", &GlobalParams::DELTA_R_FACTOR) - .def_rw("HII_FILTER", &GlobalParams::HII_FILTER) - .def_rw("INITIAL_REDSHIFT", &GlobalParams::INITIAL_REDSHIFT) - .def_rw("R_OVERLAP_FACTOR", &GlobalParams::R_OVERLAP_FACTOR) - .def_rw("DELTA_CRIT_MODE", &GlobalParams::DELTA_CRIT_MODE) - .def_rw("HALO_FILTER", &GlobalParams::HALO_FILTER) - .def_rw("OPTIMIZE", &GlobalParams::OPTIMIZE) - .def_rw("OPTIMIZE_MIN_MASS", &GlobalParams::OPTIMIZE_MIN_MASS) - .def_rw("CRIT_DENS_TRANSITION", &GlobalParams::CRIT_DENS_TRANSITION) - .def_rw("MIN_DENSITY_LOW_LIMIT", &GlobalParams::MIN_DENSITY_LOW_LIMIT) - .def_rw("RecombPhotonCons", &GlobalParams::RecombPhotonCons) - .def_rw("PhotonConsStart", &GlobalParams::PhotonConsStart) - .def_rw("PhotonConsEnd", &GlobalParams::PhotonConsEnd) - .def_rw("PhotonConsAsymptoteTo", &GlobalParams::PhotonConsAsymptoteTo) - .def_rw("PhotonConsEndCalibz", &GlobalParams::PhotonConsEndCalibz) - .def_rw("PhotonConsSmoothing", &GlobalParams::PhotonConsSmoothing) - .def_rw("HEAT_FILTER", &GlobalParams::HEAT_FILTER) - .def_rw("CLUMPING_FACTOR", &GlobalParams::CLUMPING_FACTOR) - .def_rw("Z_HEAT_MAX", &GlobalParams::Z_HEAT_MAX) - .def_rw("R_XLy_MAX", &GlobalParams::R_XLy_MAX) - .def_rw("NUM_FILTER_STEPS_FOR_Ts", &GlobalParams::NUM_FILTER_STEPS_FOR_Ts) - .def_rw("ZPRIME_STEP_FACTOR", &GlobalParams::ZPRIME_STEP_FACTOR) - .def_rw("TK_at_Z_HEAT_MAX", &GlobalParams::TK_at_Z_HEAT_MAX) - .def_rw("XION_at_Z_HEAT_MAX", &GlobalParams::XION_at_Z_HEAT_MAX) - .def_rw("Pop", &GlobalParams::Pop) - .def_rw("Pop2_ion", &GlobalParams::Pop2_ion) - .def_rw("Pop3_ion", &GlobalParams::Pop3_ion) - .def_rw("NU_X_BAND_MAX", &GlobalParams::NU_X_BAND_MAX) - .def_rw("NU_X_MAX", &GlobalParams::NU_X_MAX) - .def_rw("NBINS_LF", &GlobalParams::NBINS_LF) - .def_rw("P_CUTOFF", &GlobalParams::P_CUTOFF) - .def_rw("M_WDM", &GlobalParams::M_WDM) - .def_rw("g_x", &GlobalParams::g_x) - .def_rw("OMn", &GlobalParams::OMn) - .def_rw("OMk", &GlobalParams::OMk) - .def_rw("OMr", &GlobalParams::OMr) - .def_rw("OMtot", &GlobalParams::OMtot) - .def_rw("Y_He", &GlobalParams::Y_He) - .def_rw("wl", &GlobalParams::wl) - .def_rw("SHETH_b", &GlobalParams::SHETH_b) - .def_rw("SHETH_c", &GlobalParams::SHETH_c) - .def_rw("Zreion_HeII", &GlobalParams::Zreion_HeII) - .def_rw("FILTER", &GlobalParams::FILTER) - .def_ro("external_table_path", &GlobalParams::external_table_path) - .def_ro("wisdoms_path", &GlobalParams::wisdoms_path) - .def_rw("R_BUBBLE_MIN", &GlobalParams::R_BUBBLE_MIN) - .def_rw("M_MIN_INTEGRAL", &GlobalParams::M_MIN_INTEGRAL) - .def_rw("M_MAX_INTEGRAL", &GlobalParams::M_MAX_INTEGRAL) - .def_rw("T_RE", &GlobalParams::T_RE) - .def_rw("VAVG", &GlobalParams::VAVG) - .def_rw("USE_ADIABATIC_FLUCTUATIONS", &GlobalParams::USE_ADIABATIC_FLUCTUATIONS) + // TODO: the getter/setter workaround is clunky, we can go via a C++ std:string + // or try something else. + nb::class_(m, "ConfigSettings") + .def_rw("HALO_CATALOG_MEM_FACTOR", &ConfigSettings::HALO_CATALOG_MEM_FACTOR) + .def_ro("external_table_path", &ConfigSettings::external_table_path) + .def_ro("wisdoms_path", &ConfigSettings::wisdoms_path) .def("set_external_table_path", &set_external_table_path) .def("get_external_table_path", &get_external_table_path) .def("set_wisdoms_path", &set_wisdoms_path) .def("get_wisdoms_path", &get_wisdoms_path); - m.def( - "get_global_params", []() -> GlobalParams& { return global_params; }, - nb::rv_policy::reference); + nb::class_(m, "AstroOptions") + .def_rw("USE_MINI_HALOS", &AstroOptions::USE_MINI_HALOS) + .def_rw("USE_CMB_HEATING", &AstroOptions::USE_CMB_HEATING) + .def_rw("USE_LYA_HEATING", &AstroOptions::USE_LYA_HEATING) + .def_rw("USE_MASS_DEPENDENT_ZETA", &AstroOptions::USE_MASS_DEPENDENT_ZETA) + .def_rw("SUBCELL_RSD", &AstroOptions::SUBCELL_RSD) + .def_rw("APPLY_RSDS", &AstroOptions::APPLY_RSDS) + .def_rw("INHOMO_RECO", &AstroOptions::INHOMO_RECO) + .def_rw("USE_TS_FLUCT", &AstroOptions::USE_TS_FLUCT) + .def_rw("M_MIN_in_Mass", &AstroOptions::M_MIN_in_Mass) + .def_rw("FIX_VCB_AVG", &AstroOptions::FIX_VCB_AVG) + .def_rw("USE_EXP_FILTER", &AstroOptions::USE_EXP_FILTER) + .def_rw("CELL_RECOMB", &AstroOptions::CELL_RECOMB) + .def_rw("PHOTON_CONS_TYPE", &AstroOptions::PHOTON_CONS_TYPE) + .def_rw("USE_UPPER_STELLAR_TURNOVER", &AstroOptions::USE_UPPER_STELLAR_TURNOVER) + .def_rw("HALO_SCALING_RELATIONS_MEDIAN", &AstroOptions::HALO_SCALING_RELATIONS_MEDIAN) + .def_rw("HII_FILTER", &AstroOptions::HII_FILTER) + .def_rw("HEAT_FILTER", &AstroOptions::HEAT_FILTER) + .def_rw("IONISE_ENTIRE_SPHERE", &AstroOptions::IONISE_ENTIRE_SPHERE) + .def_rw("AVG_BELOW_SAMPLER", &AstroOptions::AVG_BELOW_SAMPLER) + .def_rw("INTEGRATION_METHOD_ATOMIC", &AstroOptions::INTEGRATION_METHOD_ATOMIC) + .def_rw("INTEGRATION_METHOD_MINI", &AstroOptions::INTEGRATION_METHOD_MINI); - // Bind output parameters + // Output Struct Bindings + // Bind InitialConditions nb::class_(m, "InitialConditions") .def(nb::init<>()) .def_rw("lowres_density", &InitialConditions::lowres_density) @@ -200,6 +178,7 @@ NB_MODULE(c_21cmfast, m) { .def_rw("hires_vz_2LPT", &InitialConditions::hires_vz_2LPT) .def_rw("lowres_vcb", &InitialConditions::lowres_vcb); + // Bind PerturbedField nb::class_(m, "PerturbedField") .def(nb::init<>()) .def_rw("density", &PerturbedField::density) @@ -207,6 +186,7 @@ NB_MODULE(c_21cmfast, m) { .def_rw("velocity_y", &PerturbedField::velocity_y) .def_rw("velocity_z", &PerturbedField::velocity_z); + // Bind HaloField nb::class_(m, "HaloField") .def(nb::init<>()) .def_rw("n_halos", &HaloField::n_halos) @@ -217,6 +197,7 @@ NB_MODULE(c_21cmfast, m) { .def_rw("sfr_rng", &HaloField::sfr_rng) .def_rw("xray_rng", &HaloField::xray_rng); + // Bind PerturbHaloField nb::class_(m, "PerturbHaloField") .def(nb::init<>()) .def_rw("n_halos", &PerturbHaloField::n_halos) @@ -227,6 +208,7 @@ NB_MODULE(c_21cmfast, m) { .def_rw("sfr_rng", &PerturbHaloField::sfr_rng) .def_rw("xray_rng", &PerturbHaloField::xray_rng); + // Bind HaloBox nb::class_(m, "HaloBox") .def(nb::init<>()) .def_rw("halo_mass", &HaloBox::halo_mass) @@ -241,6 +223,7 @@ NB_MODULE(c_21cmfast, m) { .def_rw("log10_Mcrit_ACG_ave", &HaloBox::log10_Mcrit_ACG_ave) .def_rw("log10_Mcrit_MCG_ave", &HaloBox::log10_Mcrit_MCG_ave); + // Bind XraySourceBox nb::class_(m, "XraySourceBox") .def(nb::init<>()) .def_rw("filtered_sfr", &XraySourceBox::filtered_sfr) @@ -250,33 +233,36 @@ NB_MODULE(c_21cmfast, m) { .def_rw("mean_sfr", &XraySourceBox::mean_sfr) .def_rw("mean_sfr_mini", &XraySourceBox::mean_sfr_mini); + // Bind TsBox nb::class_(m, "TsBox") .def(nb::init<>()) - .def_rw("Ts_box", &TsBox::Ts_box) - .def_rw("x_e_box", &TsBox::x_e_box) - .def_rw("Tk_box", &TsBox::Tk_box) - .def_rw("J_21_LW_box", &TsBox::J_21_LW_box); + .def_rw("spin_temperature", &TsBox::spin_temperature) + .def_rw("xray_ionised_fraction", &TsBox::xray_ionised_fraction) + .def_rw("kinetic_temp_neutral", &TsBox::kinetic_temp_neutral) + .def_rw("J_21_LW", &TsBox::J_21_LW); + // Bind IonizedBox nb::class_(m, "IonizedBox") .def(nb::init<>()) .def_rw("mean_f_coll", &IonizedBox::mean_f_coll) .def_rw("mean_f_coll_MINI", &IonizedBox::mean_f_coll_MINI) .def_rw("log10_Mturnover_ave", &IonizedBox::log10_Mturnover_ave) .def_rw("log10_Mturnover_MINI_ave", &IonizedBox::log10_Mturnover_MINI_ave) - .def_rw("xH_box", &IonizedBox::xH_box) - .def_rw("Gamma12_box", &IonizedBox::Gamma12_box) - .def_rw("MFP_box", &IonizedBox::MFP_box) - .def_rw("z_re_box", &IonizedBox::z_re_box) - .def_rw("dNrec_box", &IonizedBox::dNrec_box) - .def_rw("temp_kinetic_all_gas", &IonizedBox::temp_kinetic_all_gas) - .def_rw("Fcoll", &IonizedBox::Fcoll) - .def_rw("Fcoll_MINI", &IonizedBox::Fcoll_MINI); + .def_rw("neutral_fraction", &IonizedBox::neutral_fraction) + .def_rw("ionisation_rate_G12", &IonizedBox::ionisation_rate_G12) + .def_rw("mean_free_path", &IonizedBox::mean_free_path) + .def_rw("z_reion", &IonizedBox::z_reion) + .def_rw("cumulative_recombinations", &IonizedBox::cumulative_recombinations) + .def_rw("kinetic_temperature", &IonizedBox::kinetic_temperature) + .def_rw("unnormalised_nion", &IonizedBox::unnormalised_nion) + .def_rw("unnormalised_nion_mini", &IonizedBox::unnormalised_nion_mini); - nb::class_(m, "BrightnessTemp ") + // Bind BrightnessTemp + nb::class_(m, "BrightnessTemp") .def(nb::init<>()) .def_rw("brightness_temp", &BrightnessTemp::brightness_temp); - // Bind functions + // OutputStruct COMPUTE FUNCTIONS m.def("ComputeInitialConditions", &ComputeInitialConditions); m.def("ComputePerturbField", &ComputePerturbField); m.def("ComputeHaloField", &ComputeHaloField); @@ -286,6 +272,8 @@ NB_MODULE(c_21cmfast, m) { m.def("ComputeBrightnessTemp", &ComputeBrightnessTemp); m.def("ComputeHaloBox", &ComputeHaloBox); m.def("UpdateXraySourceBox", &UpdateXraySourceBox); + + // PHOTON CONSERVATION MODEL FUNCTIONS m.def("InitialisePhotonCons", &InitialisePhotonCons); m.def("PhotonCons_Calibration", &PhotonCons_Calibration); m.def("ComputeZstart_PhotonCons", &ComputeZstart_PhotonCons); @@ -293,28 +281,51 @@ NB_MODULE(c_21cmfast, m) { m.def("determine_deltaz_for_photoncons", &determine_deltaz_for_photoncons); m.def("ObtainPhotonConsData", &ObtainPhotonConsData); m.def("FreePhotonConsMemory", &FreePhotonConsMemory); - m.def( - "photon_cons_allocated", []() -> bool { return photon_cons_allocated; }, - "Returns whether photon conservation memory is allocated"); m.def("set_alphacons_params", &set_alphacons_params); + + // Non-OutputStruct data products m.def("ComputeLF", &ComputeLF); m.def("ComputeTau", &ComputeTau); + + // Initialisation functions needed in the wrapper m.def("init_ps", &init_ps); m.def("init_heat", &init_heat); m.def("CreateFFTWWisdoms", &CreateFFTWWisdoms); m.def("Broadcast_struct_global_noastro", &Broadcast_struct_global_noastro); m.def("Broadcast_struct_global_all", &Broadcast_struct_global_all); m.def("initialiseSigmaMInterpTable", &initialiseSigmaMInterpTable); + m.def("initialise_GL", &initialise_GL); + + // Integration routines + m.def("get_sigma", &get_sigma); + m.def("get_condition_integrals", &get_condition_integrals); + m.def("get_halo_chmf_interval", &get_halo_chmf_interval); + m.def("get_halomass_at_probability", &get_halomass_at_probability); + m.def("get_global_SFRD_z", &get_global_SFRD_z); + m.def("get_global_Nion_z", &get_global_Nion_z); + m.def("get_conditional_FgtrM", &get_conditional_FgtrM); + m.def("get_conditional_SFRD", &get_conditional_SFRD); + m.def("get_conditional_Nion", &get_conditional_Nion); + m.def("get_conditional_Xray", &get_conditional_Xray); + + // Error framework testing m.def("SomethingThatCatches", &SomethingThatCatches); m.def("FunctionThatCatches", &FunctionThatCatches); m.def("FunctionThatThrows", &FunctionThatThrows); + + // Test Outputs For Specific Models m.def("single_test_sample", &single_test_sample); m.def("test_halo_props", &test_halo_props); m.def("test_filter", &test_filter); + + // Functions required to access cosmology & mass functions directly m.def("dicke", &dicke); m.def("sigma_z0", &sigma_z0); m.def("dsigmasqdm_z0", &dsigmasqdm_z0); + m.def("power_in_k", &power_in_k); m.def("get_delta_crit", &get_delta_crit); m.def("atomic_cooling_threshold", &atomic_cooling_threshold); + m.def("unconditional_hmf", &unconditional_hmf); + m.def("conditional_hmf", &conditional_hmf); m.def("expected_nhalo", &expected_nhalo); } diff --git a/src/py21cmfast/src/filtering.h b/src/py21cmfast/src/filtering.h index 0bc689623..0b5fa8808 100644 --- a/src/py21cmfast/src/filtering.h +++ b/src/py21cmfast/src/filtering.h @@ -1,6 +1,7 @@ #ifndef _FILTERING_H #define _FILTERING_H +#include #include #ifdef __cplusplus diff --git a/src/py21cmfast/src/meson.build b/src/py21cmfast/src/meson.build index 09700ecda..b7d2f5ce8 100644 --- a/src/py21cmfast/src/meson.build +++ b/src/py21cmfast/src/meson.build @@ -1,5 +1,5 @@ # Define the source files that contribute to the 21cmFast build -source_files = [ +source_files = files([ 'BrightnessTemperatureBox.c', 'HaloBox.c', 'HaloField.c', @@ -19,19 +19,18 @@ source_files = [ 'filtering.c', 'heating_helper_progs.c', 'hmf.c', + 'integral_wrappers.c', 'interp_tables.c', 'interpolation.c', 'photoncons.c', 'recombinations.c', + 'rng.c', + 'scaling_relations.c', 'subcell_rsds.c', 'thermochem.c', 'MapMass_cpu.c', - 'rng.c', - '_functionprototypes_wrapper.h', - '_inputparams_wrapper.h', - '_outputstructs_wrapper.h', '_wrapper.cpp', -] +]) # Define the 21cmFast dependencies omp = dependency('openmp') @@ -53,5 +52,5 @@ py.extension_module( source_files, dependencies: deps, install: true, - subdir:'py21cmfast', + subdir:'py21cmfast', ) From aa75563ef56a4332634b01f9748c86284446bf77 Mon Sep 17 00:00:00 2001 From: daviesje Date: Fri, 9 May 2025 19:43:44 +0200 Subject: [PATCH 115/145] imports --- src/py21cmfast/_cfg.py | 13 ++++--------- src/py21cmfast/drivers/_param_config.py | 3 ++- src/py21cmfast/meson.build | 1 + src/py21cmfast/src/_wrapper.cpp | 7 +++++++ src/py21cmfast/wrapper/cfuncs.py | 6 +++--- src/py21cmfast/wrapper/inputs.py | 10 ---------- src/py21cmfast/wrapper/structs.py | 8 +------- 7 files changed, 18 insertions(+), 30 deletions(-) diff --git a/src/py21cmfast/_cfg.py b/src/py21cmfast/_cfg.py index 058d7ef78..511f8a145 100644 --- a/src/py21cmfast/_cfg.py +++ b/src/py21cmfast/_cfg.py @@ -8,9 +8,10 @@ from pathlib import Path from typing import ClassVar +import py21cmfast.c_21cmfast as lib + from . import yaml from ._data import DATA_PATH -from .c_21cmfast import ffi, lib from .wrapper.structs import StructInstanceWrapper @@ -33,7 +34,7 @@ class Config(dict): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) # keep the config settings from the C library here - self._c_config_settings = StructInstanceWrapper(lib.config_settings, ffi) + self._c_config_settings = StructInstanceWrapper(lib.get_config_settings) for k, v in self._defaults.items(): if k not in self: @@ -60,13 +61,7 @@ def __setitem__(self, key, value): def _pass_to_backend(self, key, value): """Set the value in the backend.""" - # we should possibly do a typemap for the ffi - if isinstance(value, Path | str): - setattr( - self._c_config_settings, key, ffi.new("char[]", str(value).encode()) - ) - else: - setattr(self._c_config_settings, key, value) + setattr(self._c_config_settings, key, value) @contextlib.contextmanager def use(self, **kwargs): diff --git a/src/py21cmfast/drivers/_param_config.py b/src/py21cmfast/drivers/_param_config.py index 862a9c457..39cf19a7c 100644 --- a/src/py21cmfast/drivers/_param_config.py +++ b/src/py21cmfast/drivers/_param_config.py @@ -11,7 +11,8 @@ import attrs import numpy as np -from ..c_21cmfast import ffi, lib +import py21cmfast.c_21cmfast as lib + from ..io import h5 from ..io.caching import OutputCache from ..wrapper.cfuncs import broadcast_input_struct, construct_fftw_wisdoms diff --git a/src/py21cmfast/meson.build b/src/py21cmfast/meson.build index 961020972..7ad598eca 100644 --- a/src/py21cmfast/meson.build +++ b/src/py21cmfast/meson.build @@ -17,6 +17,7 @@ py.install_sources( pure_subdirs = [ 'drivers', + 'io', 'wrapper', '_data', ] diff --git a/src/py21cmfast/src/_wrapper.cpp b/src/py21cmfast/src/_wrapper.cpp index 087882813..a86ed896f 100644 --- a/src/py21cmfast/src/_wrapper.cpp +++ b/src/py21cmfast/src/_wrapper.cpp @@ -80,6 +80,7 @@ NB_MODULE(c_21cmfast, m) { // Bind AstroParams nb::class_(m, "AstroParams") + .def(nb::init<>()) .def_rw("HII_EFF_FACTOR", &AstroParams::HII_EFF_FACTOR) .def_rw("F_STAR10", &AstroParams::F_STAR10) .def_rw("ALPHA_STAR", &AstroParams::ALPHA_STAR) @@ -127,6 +128,7 @@ NB_MODULE(c_21cmfast, m) { // TODO: the getter/setter workaround is clunky, we can go via a C++ std:string // or try something else. nb::class_(m, "ConfigSettings") + .def(nb::init<>()) .def_rw("HALO_CATALOG_MEM_FACTOR", &ConfigSettings::HALO_CATALOG_MEM_FACTOR) .def_ro("external_table_path", &ConfigSettings::external_table_path) .def_ro("wisdoms_path", &ConfigSettings::wisdoms_path) @@ -136,6 +138,7 @@ NB_MODULE(c_21cmfast, m) { .def("get_wisdoms_path", &get_wisdoms_path); nb::class_(m, "AstroOptions") + .def(nb::init<>()) .def_rw("USE_MINI_HALOS", &AstroOptions::USE_MINI_HALOS) .def_rw("USE_CMB_HEATING", &AstroOptions::USE_CMB_HEATING) .def_rw("USE_LYA_HEATING", &AstroOptions::USE_LYA_HEATING) @@ -328,4 +331,8 @@ NB_MODULE(c_21cmfast, m) { m.def("unconditional_hmf", &unconditional_hmf); m.def("conditional_hmf", &conditional_hmf); m.def("expected_nhalo", &expected_nhalo); + + m.def( + "get_config_settings", []() -> ConfigSettings& { return config_settings; }, + nb::rv_policy::reference); } diff --git a/src/py21cmfast/wrapper/cfuncs.py b/src/py21cmfast/wrapper/cfuncs.py index 2a1000011..608a35ead 100644 --- a/src/py21cmfast/wrapper/cfuncs.py +++ b/src/py21cmfast/wrapper/cfuncs.py @@ -442,9 +442,9 @@ def evaluate_sigma( lib.get_sigma( masses.size, - masses, - sigma, - dsigmasq, + masses.ctypes.data, + sigma.ctypes.data, + dsigmasq.ctypes.data, ) return sigma, dsigmasq diff --git a/src/py21cmfast/wrapper/inputs.py b/src/py21cmfast/wrapper/inputs.py index 2b026d2de..878dfaf1d 100644 --- a/src/py21cmfast/wrapper/inputs.py +++ b/src/py21cmfast/wrapper/inputs.py @@ -101,11 +101,6 @@ class InputStruct: .. warning:: This class will *not* deal well with parameters of the struct which are pointers. All parameters should be primitive types, except for strings, which are dealt with specially. - - Parameters - ---------- - ffi : cffi object - The ffi object from any cffi-wrapped library. """ _subclasses: ClassVar = {} @@ -170,11 +165,6 @@ def cstruct(self) -> StructWrapper: cdict = self.cdict for k in self.struct.fieldnames: val = cdict[k] - - if isinstance(val, str): - # If it is a string, need to convert it to C string ourselves. - val = self.ffi.new("char[]", val.encode()) - setattr(self.struct.cstruct, k, val) return self.struct.cstruct diff --git a/src/py21cmfast/wrapper/structs.py b/src/py21cmfast/wrapper/structs.py index d13b048b8..de3fd5897 100644 --- a/src/py21cmfast/wrapper/structs.py +++ b/src/py21cmfast/wrapper/structs.py @@ -12,7 +12,6 @@ import py21cmfast.c_21cmfast as lib from .. import __version__ -from .._cfg import config from ._utils import ( asarray, float_to_string_precision, @@ -53,12 +52,7 @@ def __init__(self, *args): This instantiates the memory associated with the C struct, attached to this inst. """ self.__attrs_init__(*args) - if args[0] == "InitialConditions": - self._cobj = lib.InitialConditions - else: - raise NotImplementedError( - f"Wrapped class {args[0]} not listed as an option in StructWrapper." - ) + self._cobj = getattr(lib, self._name) self.cstruct = self._new() def _new(self): From 5f41f42ea83023a6c9006d168315e786bf48c601 Mon Sep 17 00:00:00 2001 From: daviesje Date: Sat, 10 May 2025 22:10:44 +0200 Subject: [PATCH 116/145] ics computed --- src/py21cmfast/_cfg.py | 4 +- src/py21cmfast/meson.build | 1 + src/py21cmfast/src/InputParameters.c | 21 --- src/py21cmfast/src/InputParameters.h | 4 +- src/py21cmfast/src/_wrapper.cpp | 248 +++++++++++++++++++-------- src/py21cmfast/wrapper/cfuncs.py | 23 ++- src/py21cmfast/wrapper/structs.py | 70 +++++--- 7 files changed, 247 insertions(+), 124 deletions(-) diff --git a/src/py21cmfast/_cfg.py b/src/py21cmfast/_cfg.py index 511f8a145..bc42b0b84 100644 --- a/src/py21cmfast/_cfg.py +++ b/src/py21cmfast/_cfg.py @@ -34,7 +34,7 @@ class Config(dict): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) # keep the config settings from the C library here - self._c_config_settings = StructInstanceWrapper(lib.get_config_settings) + self._c_config_settings = StructInstanceWrapper(lib.get_config_settings()) for k, v in self._defaults.items(): if k not in self: @@ -61,6 +61,8 @@ def __setitem__(self, key, value): def _pass_to_backend(self, key, value): """Set the value in the backend.""" + if isinstance(value, Path): + value = str(value) setattr(self._c_config_settings, key, value) @contextlib.contextmanager diff --git a/src/py21cmfast/meson.build b/src/py21cmfast/meson.build index 7ad598eca..cf82292e1 100644 --- a/src/py21cmfast/meson.build +++ b/src/py21cmfast/meson.build @@ -18,6 +18,7 @@ py.install_sources( pure_subdirs = [ 'drivers', 'io', + 'templates', 'wrapper', '_data', ] diff --git a/src/py21cmfast/src/InputParameters.c b/src/py21cmfast/src/InputParameters.c index 37417f44c..6abd76fb5 100644 --- a/src/py21cmfast/src/InputParameters.c +++ b/src/py21cmfast/src/InputParameters.c @@ -27,26 +27,5 @@ CosmoParams *cosmo_params_global; AstroParams *astro_params_global; AstroOptions *astro_options_global; -void set_external_table_path(ConfigSettings *params, const char *value) { - if (params->external_table_path != 0) { - free(params->external_table_path); - } - params->external_table_path = (char *)malloc(strlen(value) + 1); - strcpy(params->external_table_path, value); -} -char *get_external_table_path(ConfigSettings *params) { - return params->external_table_path ? params->external_table_path : ""; -} -void set_wisdoms_path(ConfigSettings *params, const char *value) { - if (params->wisdoms_path != 0) { - free(params->wisdoms_path); - } - params->wisdoms_path = (char *)malloc(strlen(value) + 1); - strcpy(params->wisdoms_path, value); -} -char *get_wisdoms_path(ConfigSettings *params) { - return params->wisdoms_path ? params->wisdoms_path : ""; -} - // data paths, wisdoms, etc ConfigSettings config_settings; diff --git a/src/py21cmfast/src/InputParameters.h b/src/py21cmfast/src/InputParameters.h index 6c9502c6e..71d293b32 100644 --- a/src/py21cmfast/src/InputParameters.h +++ b/src/py21cmfast/src/InputParameters.h @@ -165,8 +165,8 @@ typedef struct AstroOptions { typedef struct ConfigSettings { double HALO_CATALOG_MEM_FACTOR; - char *external_table_path; - char *wisdoms_path; + char external_table_path[200]; + char wisdoms_path[200]; } ConfigSettings; void Broadcast_struct_global_all(SimulationOptions *simulation_options, diff --git a/src/py21cmfast/src/_wrapper.cpp b/src/py21cmfast/src/_wrapper.cpp index a86ed896f..a55d4526e 100644 --- a/src/py21cmfast/src/_wrapper.cpp +++ b/src/py21cmfast/src/_wrapper.cpp @@ -57,6 +57,7 @@ NB_MODULE(c_21cmfast, m) { .def_rw("CORR_SFR", &SimulationOptions::CORR_SFR) .def_rw("CORR_LX", &SimulationOptions::CORR_LX); + // Bind MatterOptions nb::class_(m, "MatterOptions") .def(nb::init<>()) .def_rw("USE_FFTW_WISDOM", &MatterOptions::USE_FFTW_WISDOM) @@ -125,18 +126,7 @@ NB_MODULE(c_21cmfast, m) { .def_rw("NU_X_MAX", &AstroParams::NU_X_MAX) .def_rw("NU_X_BAND_MAX", &AstroParams::NU_X_BAND_MAX); - // TODO: the getter/setter workaround is clunky, we can go via a C++ std:string - // or try something else. - nb::class_(m, "ConfigSettings") - .def(nb::init<>()) - .def_rw("HALO_CATALOG_MEM_FACTOR", &ConfigSettings::HALO_CATALOG_MEM_FACTOR) - .def_ro("external_table_path", &ConfigSettings::external_table_path) - .def_ro("wisdoms_path", &ConfigSettings::wisdoms_path) - .def("set_external_table_path", &set_external_table_path) - .def("get_external_table_path", &get_external_table_path) - .def("set_wisdoms_path", &set_wisdoms_path) - .def("get_wisdoms_path", &get_wisdoms_path); - + // Bind AstroOptions nb::class_(m, "AstroOptions") .def(nb::init<>()) .def_rw("USE_MINI_HALOS", &AstroOptions::USE_MINI_HALOS) @@ -161,88 +151,181 @@ NB_MODULE(c_21cmfast, m) { .def_rw("INTEGRATION_METHOD_ATOMIC", &AstroOptions::INTEGRATION_METHOD_ATOMIC) .def_rw("INTEGRATION_METHOD_MINI", &AstroOptions::INTEGRATION_METHOD_MINI); + // Bind ConfigSettings + nb::class_(m, "ConfigSettings") + .def(nb::init<>()) + .def_rw("HALO_CATALOG_MEM_FACTOR", &ConfigSettings::HALO_CATALOG_MEM_FACTOR) + .def("set_external_table_path", + [](ConfigSettings& self, const std::string& path) { + strcpy(self.external_table_path, path.c_str()); + }) + .def("get_external_table_path", + [](ConfigSettings& self) { return std::string(self.external_table_path); }) + .def("set_wisdoms_path", + [](ConfigSettings& self, const std::string& path) { + strcpy(self.wisdoms_path, path.c_str()); + }) + .def("get_wisdoms_path", + [](ConfigSettings& self) { return std::string(self.wisdoms_path); }); + // Output Struct Bindings // Bind InitialConditions nb::class_(m, "InitialConditions") .def(nb::init<>()) - .def_rw("lowres_density", &InitialConditions::lowres_density) - .def_rw("lowres_vx", &InitialConditions::lowres_vx) - .def_rw("lowres_vy", &InitialConditions::lowres_vy) - .def_rw("lowres_vz", &InitialConditions::lowres_vz) - .def_rw("lowres_vx_2LPT", &InitialConditions::lowres_vx_2LPT) - .def_rw("lowres_vy_2LPT", &InitialConditions::lowres_vy_2LPT) - .def_rw("lowres_vz_2LPT", &InitialConditions::lowres_vz_2LPT) - .def_rw("hires_density", &InitialConditions::hires_density) - .def_rw("hires_vx", &InitialConditions::hires_vx) - .def_rw("hires_vy", &InitialConditions::hires_vy) - .def_rw("hires_vz", &InitialConditions::hires_vz) - .def_rw("hires_vx_2LPT", &InitialConditions::hires_vx_2LPT) - .def_rw("hires_vy_2LPT", &InitialConditions::hires_vy_2LPT) - .def_rw("hires_vz_2LPT", &InitialConditions::hires_vz_2LPT) - .def_rw("lowres_vcb", &InitialConditions::lowres_vcb); + .def("set_lowres_density", + [](InitialConditions& self, nb::ndarray array) { + self.lowres_density = array.data(); + }) + .def("set_lowres_vx", [](InitialConditions& self, + nb::ndarray array) { self.lowres_vx = array.data(); }) + .def("set_lowres_vy", [](InitialConditions& self, + nb::ndarray array) { self.lowres_vy = array.data(); }) + .def("set_lowres_vz", [](InitialConditions& self, + nb::ndarray array) { self.lowres_vz = array.data(); }) + .def("set_lowres_vx_2LPT", + [](InitialConditions& self, nb::ndarray array) { + self.lowres_vx_2LPT = array.data(); + }) + .def("set_lowres_vy_2LPT", + [](InitialConditions& self, nb::ndarray array) { + self.lowres_vy_2LPT = array.data(); + }) + .def("set_lowres_vz_2LPT", + [](InitialConditions& self, nb::ndarray array) { + self.lowres_vz_2LPT = array.data(); + }) + .def("set_hires_density", + [](InitialConditions& self, nb::ndarray array) { + self.hires_density = array.data(); + }) + .def("set_hires_vx", [](InitialConditions& self, + nb::ndarray array) { self.hires_vx = array.data(); }) + .def("set_hires_vy", [](InitialConditions& self, + nb::ndarray array) { self.hires_vy = array.data(); }) + .def("set_hires_vz", [](InitialConditions& self, + nb::ndarray array) { self.hires_vz = array.data(); }) + .def("set_hires_vx_2LPT", + [](InitialConditions& self, nb::ndarray array) { + self.hires_vx_2LPT = array.data(); + }) + .def("set_hires_vy_2LPT", + [](InitialConditions& self, nb::ndarray array) { + self.hires_vy_2LPT = array.data(); + }) + .def("set_hires_vz_2LPT", + [](InitialConditions& self, nb::ndarray array) { + self.hires_vz_2LPT = array.data(); + }) + .def("set_lowres_vcb", [](InitialConditions& self, nb::ndarray array) { + self.lowres_vcb = array.data(); + }); // Bind PerturbedField nb::class_(m, "PerturbedField") .def(nb::init<>()) - .def_rw("density", &PerturbedField::density) - .def_rw("velocity_x", &PerturbedField::velocity_x) - .def_rw("velocity_y", &PerturbedField::velocity_y) - .def_rw("velocity_z", &PerturbedField::velocity_z); + .def("set_density", + [](PerturbedField& self, nb::ndarray array) { self.density = array.data(); }) + .def("set_velocity_x", + [](PerturbedField& self, nb::ndarray array) { self.velocity_x = array.data(); }) + .def("set_velocity_y", + [](PerturbedField& self, nb::ndarray array) { self.velocity_y = array.data(); }) + .def("set_velocity_z", [](PerturbedField& self, nb::ndarray array) { + self.velocity_z = array.data(); + }); // Bind HaloField nb::class_(m, "HaloField") .def(nb::init<>()) .def_rw("n_halos", &HaloField::n_halos) .def_rw("buffer_size", &HaloField::buffer_size) - .def_rw("halo_masses", &HaloField::halo_masses) - .def_rw("halo_coords", &HaloField::halo_coords) - .def_rw("star_rng", &HaloField::star_rng) - .def_rw("sfr_rng", &HaloField::sfr_rng) - .def_rw("xray_rng", &HaloField::xray_rng); + .def("set_halo_masses", + [](HaloField& self, nb::ndarray array) { self.halo_masses = array.data(); }) + .def("set_halo_coords", + [](HaloField& self, nb::ndarray array) { self.halo_coords = array.data(); }) + .def("set_star_rng", + [](HaloField& self, nb::ndarray array) { self.star_rng = array.data(); }) + .def("set_sfr_rng", + [](HaloField& self, nb::ndarray array) { self.sfr_rng = array.data(); }) + .def("set_xray_rng", + [](HaloField& self, nb::ndarray array) { self.xray_rng = array.data(); }); // Bind PerturbHaloField nb::class_(m, "PerturbHaloField") .def(nb::init<>()) .def_rw("n_halos", &PerturbHaloField::n_halos) .def_rw("buffer_size", &PerturbHaloField::buffer_size) - .def_rw("halo_masses", &PerturbHaloField::halo_masses) - .def_rw("halo_coords", &PerturbHaloField::halo_coords) - .def_rw("star_rng", &PerturbHaloField::star_rng) - .def_rw("sfr_rng", &PerturbHaloField::sfr_rng) - .def_rw("xray_rng", &PerturbHaloField::xray_rng); + .def("set_halo_masses", [](PerturbHaloField& self, + nb::ndarray array) { self.halo_masses = array.data(); }) + .def("set_halo_coords", [](PerturbHaloField& self, + nb::ndarray array) { self.halo_coords = array.data(); }) + .def("set_star_rng", + [](PerturbHaloField& self, nb::ndarray array) { self.star_rng = array.data(); }) + .def("set_sfr_rng", + [](PerturbHaloField& self, nb::ndarray array) { self.sfr_rng = array.data(); }) + .def("set_xray_rng", [](PerturbHaloField& self, nb::ndarray array) { + self.xray_rng = array.data(); + }); // Bind HaloBox nb::class_(m, "HaloBox") .def(nb::init<>()) - .def_rw("halo_mass", &HaloBox::halo_mass) - .def_rw("halo_stars", &HaloBox::halo_stars) - .def_rw("halo_stars_mini", &HaloBox::halo_stars_mini) - .def_rw("count", &HaloBox::count) - .def_rw("n_ion", &HaloBox::n_ion) - .def_rw("halo_sfr", &HaloBox::halo_sfr) - .def_rw("halo_xray", &HaloBox::halo_xray) - .def_rw("halo_sfr_mini", &HaloBox::halo_sfr_mini) - .def_rw("whalo_sfr", &HaloBox::whalo_sfr) + .def("set_halo_mass", + [](HaloBox& self, nb::ndarray array) { self.halo_mass = array.data(); }) + .def("set_halo_stars", + [](HaloBox& self, nb::ndarray array) { self.halo_stars = array.data(); }) + .def("set_halo_stars_mini", + [](HaloBox& self, nb::ndarray array) { self.halo_stars_mini = array.data(); }) + .def("set_count", [](HaloBox& self, nb::ndarray array) { self.count = array.data(); }) + .def("set_n_ion", + [](HaloBox& self, nb::ndarray array) { self.n_ion = array.data(); }) + .def("set_halo_sfr", + [](HaloBox& self, nb::ndarray array) { self.halo_sfr = array.data(); }) + .def("set_halo_xray", + [](HaloBox& self, nb::ndarray array) { self.halo_xray = array.data(); }) + .def("set_halo_sfr_mini", + [](HaloBox& self, nb::ndarray array) { self.halo_sfr_mini = array.data(); }) + .def("set_whalo_sfr", + [](HaloBox& self, nb::ndarray array) { self.whalo_sfr = array.data(); }) .def_rw("log10_Mcrit_ACG_ave", &HaloBox::log10_Mcrit_ACG_ave) .def_rw("log10_Mcrit_MCG_ave", &HaloBox::log10_Mcrit_MCG_ave); // Bind XraySourceBox nb::class_(m, "XraySourceBox") .def(nb::init<>()) - .def_rw("filtered_sfr", &XraySourceBox::filtered_sfr) - .def_rw("filtered_xray", &XraySourceBox::filtered_xray) - .def_rw("filtered_sfr_mini", &XraySourceBox::filtered_sfr_mini) - .def_rw("mean_log10_Mcrit_LW", &XraySourceBox::mean_log10_Mcrit_LW) - .def_rw("mean_sfr", &XraySourceBox::mean_sfr) - .def_rw("mean_sfr_mini", &XraySourceBox::mean_sfr_mini); + .def("set_filtered_sfr", [](XraySourceBox& self, + nb::ndarray array) { self.filtered_sfr = array.data(); }) + .def("set_filtered_xray", + [](XraySourceBox& self, nb::ndarray array) { + self.filtered_xray = array.data(); + }) + .def("set_filtered_sfr_mini", + [](XraySourceBox& self, nb::ndarray array) { + self.filtered_sfr_mini = array.data(); + }) + .def("set_mean_log10_Mcrit_LW", + [](XraySourceBox& self, nb::ndarray array) { + self.mean_log10_Mcrit_LW = array.data(); + }) + .def("set_mean_sfr", + [](XraySourceBox& self, nb::ndarray array) { self.mean_sfr = array.data(); }) + .def("set_mean_sfr_mini", [](XraySourceBox& self, nb::ndarray array) { + self.mean_sfr_mini = array.data(); + }); // Bind TsBox nb::class_(m, "TsBox") .def(nb::init<>()) - .def_rw("spin_temperature", &TsBox::spin_temperature) - .def_rw("xray_ionised_fraction", &TsBox::xray_ionised_fraction) - .def_rw("kinetic_temp_neutral", &TsBox::kinetic_temp_neutral) - .def_rw("J_21_LW", &TsBox::J_21_LW); + .def("set_spin_temperature", + [](TsBox& self, nb::ndarray array) { self.spin_temperature = array.data(); }) + .def("set_xray_ionised_fraction", + [](TsBox& self, nb::ndarray array) { + self.xray_ionised_fraction = array.data(); + }) + .def( + "set_kinetic_temp_neutral", + [](TsBox& self, nb::ndarray array) { self.kinetic_temp_neutral = array.data(); }) + .def("set_J_21_LW", + [](TsBox& self, nb::ndarray array) { self.J_21_LW = array.data(); }); // Bind IonizedBox nb::class_(m, "IonizedBox") @@ -251,20 +334,42 @@ NB_MODULE(c_21cmfast, m) { .def_rw("mean_f_coll_MINI", &IonizedBox::mean_f_coll_MINI) .def_rw("log10_Mturnover_ave", &IonizedBox::log10_Mturnover_ave) .def_rw("log10_Mturnover_MINI_ave", &IonizedBox::log10_Mturnover_MINI_ave) - .def_rw("neutral_fraction", &IonizedBox::neutral_fraction) - .def_rw("ionisation_rate_G12", &IonizedBox::ionisation_rate_G12) - .def_rw("mean_free_path", &IonizedBox::mean_free_path) - .def_rw("z_reion", &IonizedBox::z_reion) - .def_rw("cumulative_recombinations", &IonizedBox::cumulative_recombinations) - .def_rw("kinetic_temperature", &IonizedBox::kinetic_temperature) - .def_rw("unnormalised_nion", &IonizedBox::unnormalised_nion) - .def_rw("unnormalised_nion_mini", &IonizedBox::unnormalised_nion_mini); + .def("set_neutral_fraction", + [](IonizedBox& self, nb::ndarray array) { + self.neutral_fraction = array.data(); + }) + .def("set_ionisation_rate_G12", + [](IonizedBox& self, nb::ndarray array) { + self.ionisation_rate_G12 = array.data(); + }) + .def("set_mean_free_path", + [](IonizedBox& self, nb::ndarray array) { self.mean_free_path = array.data(); }) + .def("set_z_reion", + [](IonizedBox& self, nb::ndarray array) { self.z_reion = array.data(); }) + .def("set_cumulative_recombinations", + [](IonizedBox& self, nb::ndarray array) { + self.cumulative_recombinations = array.data(); + }) + .def("set_kinetic_temperature", + [](IonizedBox& self, nb::ndarray array) { + self.kinetic_temperature = array.data(); + }) + .def("set_unnormalised_nion", + [](IonizedBox& self, nb::ndarray array) { + self.unnormalised_nion = array.data(); + }) + .def("set_unnormalised_nion_mini", [](IonizedBox& self, nb::ndarray array) { + self.unnormalised_nion_mini = array.data(); + }); // Bind BrightnessTemp nb::class_(m, "BrightnessTemp") .def(nb::init<>()) - .def_rw("brightness_temp", &BrightnessTemp::brightness_temp); + .def("set_brightness_temp", [](BrightnessTemp& self, nb::ndarray array) { + self.brightness_temp = array.data(); + }); + // Function Bindings // OutputStruct COMPUTE FUNCTIONS m.def("ComputeInitialConditions", &ComputeInitialConditions); m.def("ComputePerturbField", &ComputePerturbField); @@ -300,7 +405,10 @@ NB_MODULE(c_21cmfast, m) { m.def("initialise_GL", &initialise_GL); // Integration routines - m.def("get_sigma", &get_sigma); + m.def("get_sigma", [](int size, nb::ndarray masses, nb::ndarray sigma, + nb::ndarray dsigmasq) { + return get_sigma(size, masses.data(), sigma.data(), dsigmasq.data()); + }); m.def("get_condition_integrals", &get_condition_integrals); m.def("get_halo_chmf_interval", &get_halo_chmf_interval); m.def("get_halomass_at_probability", &get_halomass_at_probability); diff --git a/src/py21cmfast/wrapper/cfuncs.py b/src/py21cmfast/wrapper/cfuncs.py index 608a35ead..eeb8efb5e 100644 --- a/src/py21cmfast/wrapper/cfuncs.py +++ b/src/py21cmfast/wrapper/cfuncs.py @@ -1,7 +1,6 @@ """Low-level python wrappers of C functions.""" import logging -import warnings from collections.abc import Callable, Sequence from functools import cache from typing import Literal @@ -17,14 +16,22 @@ from .inputs import ( InputParameters, ) -from .outputs import InitialConditions, PerturbHaloField logger = logging.getLogger(__name__) # Ideally, backend functions that we access here should do all the broadcasting/initialisation themselves # These decorators are for lower functions which are called directly in one or two lines, like delta_crit -# TODO: a lot of these assume input as numpy arrays via use of .shape, explicitly require this +# NOTE: On casting to C pointers: +# ------------------------------- +# Currently our wrapper functions directly take C type pointers, which +# requires us to cast the data to the correct type before passing it to the C. +# This is made annoying by the fact that CAMB (which is indirectly imported somewhere) +# appears to have overwritten the ctypes library pointer types which cause errors. +# We will use the nanobind ndarray casters, which allow us to pass +# numpy arrays directly to C++ functions, with size and type information. +# We will have to translate the `integral_wrapper.c` functions to C++ and (maybe?) define +# some wrapper layer functions in C++ for the output struct functions to parse the array data. def broadcast_input_struct(inputs: InputParameters): @@ -437,14 +444,14 @@ def evaluate_sigma( Uses the 21cmfast backend """ masses = masses.astype("f8") - sigma = np.zeros_like(masses) - dsigmasq = np.zeros_like(masses) + sigma = np.zeros_like(masses, dtype="f8") + dsigmasq = np.zeros_like(masses, dtype="f8") lib.get_sigma( masses.size, - masses.ctypes.data, - sigma.ctypes.data, - dsigmasq.ctypes.data, + masses, + sigma, + dsigmasq, ) return sigma, dsigmasq diff --git a/src/py21cmfast/wrapper/structs.py b/src/py21cmfast/wrapper/structs.py index de3fd5897..28e6c6b77 100644 --- a/src/py21cmfast/wrapper/structs.py +++ b/src/py21cmfast/wrapper/structs.py @@ -40,7 +40,7 @@ class StructWrapper: _name: str = attrs.field(converter=str) cstruct = attrs.field(default=None) - _TYPEMAP = bidict({"float32": "float *", "float64": "double *", "int32": "int *"}) + primitive_types = (bool, str, int, float) @_name.default def _name_default(self): @@ -52,8 +52,8 @@ def __init__(self, *args): This instantiates the memory associated with the C struct, attached to this inst. """ self.__attrs_init__(*args) - self._cobj = getattr(lib, self._name) - self.cstruct = self._new() + self._cobj = getattr(lib, self._name) # The wrapped class + self.cstruct = self._new() # The instance of the wrapped class def _new(self): """Return a new empty C structure corresponding to this class.""" @@ -63,9 +63,9 @@ def _new(self): def fields(self) -> dict[str, Any]: """A list of fields of the underlying C struct (a list of tuples of "name, type").""" result = {} - for attr in dir(self._cobj): - if not attr.startswith("__") and not callable(getattr(self._cobj, attr)): - result[attr] = type(getattr(self._cobj, attr)) + for attr in dir(self.cstruct): + if not attr.startswith("__") and not callable(getattr(self.cstruct, attr)): + result[attr] = type(getattr(self.cstruct, attr)) return result @property @@ -76,12 +76,12 @@ def fieldnames(self) -> list[str]: @property def pointer_fields(self) -> list[str]: """A list of names of fields which have pointer type in the C struct.""" - return [f for f, t in self.fields.items() if t.type.kind == "pointer"] + return [f.split("set_")[0] for f in self.fields if f.startswith("set_")] @property def primitive_fields(self) -> list[str]: """The list of names of fields which have primitive type in the C struct.""" - return [f for f, t in self.fields.items() if t.type.kind == "primitive"] + return [f for f, t in self.fields.items() if t in self.primitive_types] def __getstate__(self): """Return the current state of the class without pointers.""" @@ -94,11 +94,13 @@ def expose_to_c(self, array: Array, name: str): if not array.state.initialized: raise ValueError("Array must be initialized before exposing to C") - # TODO: check if we need to cast or anything like that try: - setattr(self.cstruct, name, array.value) - except TypeError as e: - raise TypeError(f"Error setting {name}") from e + setter = getattr(self.cstruct, "set_" + name) + setter(array.value) + except AttributeError as e: + raise TypeError( + f"Error setting {name} on {self.__class__.__name__}, no setter found" + ) from e class StructInstanceWrapper: @@ -112,36 +114,60 @@ class StructInstanceWrapper: The reference to the C object to wrap. """ + # NOTE: currently assumes that the C object is not internally changed + # We get all the values from C on initialization, and pass changes back to C + # The StructInstanceWrapper holds the attributes as they appear in python, + # whereas ._cobj holds primitives and getters/setters for pointers. + # TODO: we should ditch the object attributes and just use the C object + # with a custom __getattr__ def __init__(self, wrapped): self._cobj = wrapped - - # nanobind does not supply a list of fileds like CFFI does, so we do + # nanobind does not supply a list of fields like CFFI does, so we do # this instead to return a list of members for attr in dir(self._cobj): - if not attr.startswith("__") and not callable(getattr(self._cobj, attr)): - setattr(self, attr, getattr(self._cobj, attr)) + # ignore dunders + if not attr.startswith("__"): + if attr.startswith("get_"): + # If the attribute is a setter, we need to set the value + # to the value of the attribute without the "set_" prefix + setattr(self, attr[4:], getattr(self._cobj, attr)()) + elif not callable(getattr(self._cobj, attr)): + # Otherwise, we just set the attribute to the value + setattr(self, attr, getattr(self._cobj, attr)) # Get the name of the structure - # WIP: CFFI Refactor self._ctype = type(self._cobj).__name__ def __setattr__(self, name, value): """Set an attribute of the instance, attempting to change it in the C struct as well.""" - with contextlib.suppress(AttributeError): - setattr(self._cobj, name, value) + # use the non-overridden __setattr__ to set the attribute in Python object.__setattr__(self, name, value) + # Set the attribute in the C struct + if not name.startswith("_"): + if "set_" + name in dir(self._cobj): + getattr(self._cobj, "set_" + name)(value) + elif name in dir(self._cobj): + setattr(self._cobj, name, value) + else: + raise AttributeError( + f"Attribute {name} not found in {self.__class__.__name__}" + ) + def items(self): """Yield (name, value) pairs for each element of the struct.""" # nanobind does not supply a list of fileds like CFFI does, so we do # this instead to return a list of members for attr in dir(self._cobj): - if not attr.startswith("__") and not callable(getattr(self._cobj, attr)): - yield attr, getattr(self, attr) + if not attr.startswith("__"): + if attr.startswith("get_"): + yield attr[4:], getattr(self._cobj, attr)() + elif not attr.startswith("set_"): + yield attr, getattr(self._cobj, attr) def keys(self): """Return a list of names of elements in the struct.""" - return [nm for nm, tp in self.items()] + return [nm for nm, _ in self.items()] def __iter__(self): """Iterate over the object like a dict.""" From 89d09498215e161be621ba67c05ccd08727e26b3 Mon Sep 17 00:00:00 2001 From: daviesje Date: Sun, 11 May 2025 17:36:35 +0200 Subject: [PATCH 117/145] change the rest of the wrapper to use nb::ndarray --- src/py21cmfast/src/_wrapper.cpp | 189 ++++++++++++++++++++++++++++--- src/py21cmfast/wrapper/cfuncs.py | 13 --- 2 files changed, 173 insertions(+), 29 deletions(-) diff --git a/src/py21cmfast/src/_wrapper.cpp b/src/py21cmfast/src/_wrapper.cpp index a55d4526e..94e4bd926 100644 --- a/src/py21cmfast/src/_wrapper.cpp +++ b/src/py21cmfast/src/_wrapper.cpp @@ -405,29 +405,186 @@ NB_MODULE(c_21cmfast, m) { m.def("initialise_GL", &initialise_GL); // Integration routines - m.def("get_sigma", [](int size, nb::ndarray masses, nb::ndarray sigma, - nb::ndarray dsigmasq) { - return get_sigma(size, masses.data(), sigma.data(), dsigmasq.data()); + // TODO: it may be a better choice to rewrite integral_wrappers in C++ directly + m.def("get_sigma", [](nb::ndarray mass_values, nb::ndarray sigma_out, + nb::ndarray dsigmasqdm_out) { + size_t n_masses = mass_values.shape(0); + if (sigma_out.shape(0) != n_masses || dsigmasqdm_out.shape(0) != n_masses) { + throw std::runtime_error("Array sizes do not match the number of masses."); + } + get_sigma(n_masses, mass_values.data(), sigma_out.data(), dsigmasqdm_out.data()); }); - m.def("get_condition_integrals", &get_condition_integrals); - m.def("get_halo_chmf_interval", &get_halo_chmf_interval); - m.def("get_halomass_at_probability", &get_halomass_at_probability); - m.def("get_global_SFRD_z", &get_global_SFRD_z); - m.def("get_global_Nion_z", &get_global_Nion_z); - m.def("get_conditional_FgtrM", &get_conditional_FgtrM); - m.def("get_conditional_SFRD", &get_conditional_SFRD); - m.def("get_conditional_Nion", &get_conditional_Nion); - m.def("get_conditional_Xray", &get_conditional_Xray); + + m.def("get_condition_integrals", + [](double redshift, double z_prev, nb::ndarray cond_values, + nb::ndarray out_n_exp, nb::ndarray out_m_exp) { + size_t n_conditions = cond_values.shape(0); + if (out_n_exp.shape(0) != n_conditions || out_m_exp.shape(0) != n_conditions) { + throw std::runtime_error("Array sizes do not match the number of conditions."); + } + get_condition_integrals(redshift, z_prev, n_conditions, cond_values.data(), + out_n_exp.data(), out_m_exp.data()); + }); + + m.def("get_halo_chmf_interval", + [](double redshift, double z_prev, nb::ndarray cond_values, + nb::ndarray lnM_lo, nb::ndarray lnM_hi, nb::ndarray out_n) { + size_t n_conditions = cond_values.shape(0); + size_t n_masslim = lnM_lo.shape(0); + if (lnM_hi.shape(0) != n_masslim || out_n.shape(0) != n_conditions || + out_n.shape(1) != n_masslim) { + throw std::runtime_error("Array sizes do not match the specified dimensions."); + } + get_halo_chmf_interval(redshift, z_prev, n_conditions, cond_values.data(), n_masslim, + lnM_lo.data(), lnM_hi.data(), out_n.data()); + }); + + m.def("get_halomass_at_probability", + [](double redshift, double z_prev, nb::ndarray cond_values, + nb::ndarray probabilities, nb::ndarray out_mass) { + size_t n_conditions = cond_values.shape(0); + if (probabilities.shape(0) != n_conditions || out_mass.shape(0) != n_conditions) { + throw std::runtime_error("Array sizes do not match the number of conditions."); + } + get_halomass_at_probability(redshift, z_prev, n_conditions, cond_values.data(), + probabilities.data(), out_mass.data()); + }); + + m.def("get_global_SFRD_z", + [](nb::ndarray redshifts, nb::ndarray log10_turnovers_mcg, + nb::ndarray out_sfrd, nb::ndarray out_sfrd_mini) { + size_t n_redshift = redshifts.shape(0); + if (log10_turnovers_mcg.shape(0) != n_redshift || out_sfrd.shape(0) != n_redshift || + out_sfrd_mini.shape(0) != n_redshift) { + throw std::runtime_error("Array sizes do not match the number of redshifts."); + } + get_global_SFRD_z(n_redshift, redshifts.data(), log10_turnovers_mcg.data(), + out_sfrd.data(), out_sfrd_mini.data()); + }); + + m.def("get_global_Nion_z", + [](nb::ndarray redshifts, nb::ndarray log10_turnovers_mcg, + nb::ndarray out_nion, nb::ndarray out_nion_mini) { + size_t n_redshift = redshifts.shape(0); + if (log10_turnovers_mcg.shape(0) != n_redshift || out_nion.shape(0) != n_redshift || + out_nion_mini.shape(0) != n_redshift) { + throw std::runtime_error("Array sizes do not match the number of redshifts."); + } + get_global_Nion_z(n_redshift, redshifts.data(), log10_turnovers_mcg.data(), + out_nion.data(), out_nion_mini.data()); + }); + + m.def("get_conditional_FgtrM", + [](double redshift, double R, nb::ndarray densities, + nb::ndarray out_fcoll, nb::ndarray out_dfcoll) { + size_t n_densities = densities.shape(0); + if (out_fcoll.shape(0) != n_densities || out_dfcoll.shape(0) != n_densities) { + throw std::runtime_error("Array sizes do not match the number of densities."); + } + get_conditional_FgtrM(redshift, R, n_densities, densities.data(), out_fcoll.data(), + out_dfcoll.data()); + }); + + m.def("get_conditional_SFRD", [](double redshift, double R, nb::ndarray densities, + nb::ndarray log10_mturns, nb::ndarray out_sfrd, + nb::ndarray out_sfrd_mini) { + size_t n_densities = densities.shape(0); + if (log10_mturns.shape(0) != n_densities || out_sfrd.shape(0) != n_densities || + out_sfrd_mini.shape(0) != n_densities) { + throw std::runtime_error("Array sizes do not match the number of densities."); + } + get_conditional_SFRD(redshift, R, n_densities, densities.data(), log10_mturns.data(), + out_sfrd.data(), out_sfrd_mini.data()); + }); + + m.def("get_conditional_Nion", [](double redshift, double R, nb::ndarray densities, + nb::ndarray log10_mturns_acg, + nb::ndarray log10_mturns_mcg, + nb::ndarray out_nion, + nb::ndarray out_nion_mini) { + size_t n_densities = densities.shape(0); + if (log10_mturns_acg.shape(0) != n_densities || log10_mturns_mcg.shape(0) != n_densities || + out_nion.shape(0) != n_densities || out_nion_mini.shape(0) != n_densities) { + throw std::runtime_error("Array sizes do not match the number of densities."); + } + get_conditional_Nion(redshift, R, n_densities, densities.data(), log10_mturns_acg.data(), + log10_mturns_mcg.data(), out_nion.data(), out_nion_mini.data()); + }); + + m.def("get_conditional_Xray", + [](double redshift, double R, nb::ndarray densities, + nb::ndarray log10_mturns, nb::ndarray out_xray) { + size_t n_densities = densities.shape(0); + if (log10_mturns.shape(0) != n_densities || out_xray.shape(0) != n_densities) { + throw std::runtime_error("Array sizes do not match the number of densities."); + } + get_conditional_Xray(redshift, R, n_densities, densities.data(), log10_mturns.data(), + out_xray.data()); + }); // Error framework testing m.def("SomethingThatCatches", &SomethingThatCatches); m.def("FunctionThatCatches", &FunctionThatCatches); m.def("FunctionThatThrows", &FunctionThatThrows); - // Test Outputs For Specific Models - m.def("single_test_sample", &single_test_sample); - m.def("test_halo_props", &test_halo_props); - m.def("test_filter", &test_filter); + m.def("single_test_sample", + [](unsigned long long int seed, nb::ndarray conditions, nb::ndarray cond_crd, + double z_out, double z_in, nb::ndarray out_n_tot, nb::ndarray out_n_cell, + nb::ndarray out_n_exp, nb::ndarray out_m_cell, + nb::ndarray out_m_exp, nb::ndarray out_halo_masses, + nb::ndarray out_halo_coords) { + size_t n_condition = conditions.shape(0); + if (cond_crd.shape(0) != n_condition || cond_crd.shape(1) != 3) { + throw std::runtime_error("cond_crd must have shape (n_condition, 3)."); + } + if (out_n_cell.shape(0) != n_condition || out_n_exp.shape(0) != n_condition || + out_m_cell.shape(0) != n_condition || out_m_exp.shape(0) != n_condition) { + throw std::runtime_error("Output arrays must match the number of conditions."); + } + int status = single_test_sample(seed, n_condition, conditions.data(), cond_crd.data(), + z_out, z_in, out_n_tot.data(), out_n_cell.data(), + out_n_exp.data(), out_m_cell.data(), out_m_exp.data(), + out_halo_masses.data(), out_halo_coords.data()); + if (status != 0) { + throw std::runtime_error("single_test_sample failed with status: " + + std::to_string(status)); + } + }); + + m.def("test_halo_props", + [](double redshift, nb::ndarray vcb_grid, nb::ndarray J21_LW_grid, + nb::ndarray z_re_grid, nb::ndarray Gamma12_ion_grid, + nb::ndarray halo_masses, nb::ndarray halo_coords, + nb::ndarray star_rng, nb::ndarray sfr_rng, nb::ndarray xray_rng, + nb::ndarray halo_props_out) { + size_t n_halos = halo_masses.shape(0); + if (halo_coords.shape(0) != n_halos || halo_coords.shape(1) != 3 || + star_rng.shape(0) != n_halos || sfr_rng.shape(0) != n_halos || + xray_rng.shape(0) != n_halos || halo_props_out.shape(0) != n_halos || + halo_props_out.shape(1) != 12) { + throw std::runtime_error("Input/output arrays must match the number of halos."); + } + int status = test_halo_props(redshift, vcb_grid.data(), J21_LW_grid.data(), + z_re_grid.data(), Gamma12_ion_grid.data(), n_halos, + halo_masses.data(), halo_coords.data(), star_rng.data(), + sfr_rng.data(), xray_rng.data(), halo_props_out.data()); + if (status != 0) { + throw std::runtime_error("test_halo_props failed with status: " + + std::to_string(status)); + } + }); + + m.def("test_filter", [](nb::ndarray input_box, double R, double R_param, int filter_flag, + nb::ndarray result) { + size_t n_elements = input_box.size(); + if (result.size() != n_elements) { + throw std::runtime_error("result array must have the same size as input_box."); + } + int status = test_filter(input_box.data(), R, R_param, filter_flag, result.data()); + if (status != 0) { + throw std::runtime_error("test_filter failed with status: " + std::to_string(status)); + } + }); // Functions required to access cosmology & mass functions directly m.def("dicke", &dicke); diff --git a/src/py21cmfast/wrapper/cfuncs.py b/src/py21cmfast/wrapper/cfuncs.py index eeb8efb5e..116611469 100644 --- a/src/py21cmfast/wrapper/cfuncs.py +++ b/src/py21cmfast/wrapper/cfuncs.py @@ -448,7 +448,6 @@ def evaluate_sigma( dsigmasq = np.zeros_like(masses, dtype="f8") lib.get_sigma( - masses.size, masses, sigma, dsigmasq, @@ -521,7 +520,6 @@ def evaluate_condition_integrals( lib.get_condition_integrals( redshift, redshift_prev if redshift_prev is not None else -1, - cond_array.size, cond_array, n_halo, m_coll, @@ -552,9 +550,7 @@ def integrate_chmf_interval( lib.get_halo_chmf_interval( redshift, redshift_prev if redshift_prev is not None else -1, - len(cond_values), cond_values, - len(lnm_lower), lnm_lower, lnm_upper, out_prob, @@ -588,7 +584,6 @@ def evaluate_inverse_table( lib.get_halomass_at_probability( redshift, redshift_prev, - cond_array.size, cond_array, probabilities, masses, @@ -612,7 +607,6 @@ def evaluate_FgtrM_cond( lib.get_conditional_FgtrM( redshift, R, - densities.size, densities, fcoll, dfcoll, @@ -640,7 +634,6 @@ def evaluate_SFRD_z( sfrd_mini = np.zeros_like(redshifts) lib.get_global_SFRD_z( - redshifts.size, redshifts, log10mturns, sfrd, @@ -670,7 +663,6 @@ def evaluate_Nion_z( nion_mini = np.zeros_like(redshifts) lib.get_global_Nion_z( - redshifts.size, redshifts, log10mturns, nion, @@ -703,7 +695,6 @@ def evaluate_SFRD_cond( lib.get_conditional_SFRD( redshift, radius, - densities.size, densities, log10mturns, sfrd, @@ -738,7 +729,6 @@ def evaluate_Nion_cond( lib.get_conditional_Nion( redshift, radius, - densities.size, densities, l10mturns_acg, l10mturns_mcg, @@ -772,7 +762,6 @@ def evaluate_Xray_cond( lib.get_conditional_Xray( redshift, radius, - densities.size, densities, log10mturns, xray, @@ -810,7 +799,6 @@ def sample_halos_from_conditions( lib.single_test_sample( inputs.random_seed, - n_cond, cond_array, crd_in, redshift, @@ -904,7 +892,6 @@ def convert_halo_properties( J_21_LW_grid, z_re_grid, Gamma12_grid, - n_halos, halo_masses, halo_coords, star_rng, From 9b8d9ff2d3fce18c8fc2ae0508b4f5d28ca822bf Mon Sep 17 00:00:00 2001 From: James Davies Date: Mon, 12 May 2025 16:51:32 +0200 Subject: [PATCH 118/145] fix up wrapper function arguments --- pyproject.toml | 3 + src/py21cmfast/drivers/coeval.py | 8 +- src/py21cmfast/drivers/lightcone.py | 8 +- src/py21cmfast/src/_wrapper.cpp | 126 ++++++++++++++++++--------- src/py21cmfast/wrapper/cfuncs.py | 86 ++++++++---------- src/py21cmfast/wrapper/exceptions.py | 4 +- tests/test_exceptions.py | 8 +- tests/test_filtering.py | 10 +-- 8 files changed, 146 insertions(+), 107 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 5d6aa0bfb..314b7588a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -110,6 +110,9 @@ Changelog="https://github.com/21cmFAST/21cmFAST" build-backend = 'mesonpy' requires = ['meson-python','nanobind>=2.4.0'] +[tool.pytest.ini_options] +testpaths = ["tests"] + [tool.ruff] line-length = 88 target-version = "py311" diff --git a/src/py21cmfast/drivers/coeval.py b/src/py21cmfast/drivers/coeval.py index 3a5d27888..bfd5535ee 100644 --- a/src/py21cmfast/drivers/coeval.py +++ b/src/py21cmfast/drivers/coeval.py @@ -31,7 +31,11 @@ PerturbHaloField, TsBox, ) -from ..wrapper.photoncons import _get_photon_nonconservation_data, setup_photon_cons +from ..wrapper.photoncons import ( + _get_photon_nonconservation_data, + _photoncons_state, + setup_photon_cons, +) from . import single_field as sf from ._param_config import high_level_func @@ -481,7 +485,7 @@ def generate_coeval( ): yield coeval, coeval.redshift in out_redshifts - if lib.photon_cons_allocated: + if _photoncons_state.c_memory_allocated: lib.FreePhotonConsMemory() diff --git a/src/py21cmfast/drivers/lightcone.py b/src/py21cmfast/drivers/lightcone.py index 52537a4cd..03751a8cb 100644 --- a/src/py21cmfast/drivers/lightcone.py +++ b/src/py21cmfast/drivers/lightcone.py @@ -31,7 +31,11 @@ PerturbHaloField, TsBox, ) -from ..wrapper.photoncons import _get_photon_nonconservation_data, setup_photon_cons +from ..wrapper.photoncons import ( + _get_photon_nonconservation_data, + _photoncons_state, + setup_photon_cons, +) from . import exhaust from . import single_field as sf from ._param_config import high_level_func @@ -532,7 +536,7 @@ def _run_lightcone_from_perturbed_fields( # last redshift things if iz == len(scrollz) - 1: - if lib.photon_cons_allocated: + if _photoncons_state.c_memory_allocated: lib.FreePhotonConsMemory() if isinstance(lightcone, AngularLightcone) and lightconer.get_los_velocity: diff --git a/src/py21cmfast/src/_wrapper.cpp b/src/py21cmfast/src/_wrapper.cpp index 94e4bd926..5dc7cca48 100644 --- a/src/py21cmfast/src/_wrapper.cpp +++ b/src/py21cmfast/src/_wrapper.cpp @@ -392,8 +392,35 @@ NB_MODULE(c_21cmfast, m) { m.def("set_alphacons_params", &set_alphacons_params); // Non-OutputStruct data products - m.def("ComputeLF", &ComputeLF); - m.def("ComputeTau", &ComputeTau); + m.def("ComputeLF", + [](int component, size_t n_bins_mass, nb::ndarray z_LF, nb::ndarray M_TURNs, + nb::ndarray M_uv_z, nb::ndarray M_h_z, nb::ndarray log10phi) { + size_t n_redshifts = z_LF.shape(0); + if (M_h_z.shape(0) != n_redshifts || M_h_z.shape(1) != n_bins_mass || + M_uv_z.shape(0) != n_redshifts || M_uv_z.shape(1) != n_bins_mass || + log10phi.shape(0) != n_redshifts || log10phi.shape(1) != n_bins_mass || + M_TURNs.shape(0) != n_redshifts) { + throw std::runtime_error( + "Array size mismatch: M_h_z shape: " + std::to_string(M_h_z.shape(0)) + "x" + + std::to_string(M_h_z.shape(1)) + ", M_uv_z shape: " + + std::to_string(M_uv_z.shape(0)) + "x" + std::to_string(M_uv_z.shape(1)) + + ", log10phi shape: " + std::to_string(log10phi.shape(0)) + "x" + + std::to_string(log10phi.shape(1)) + + ", M_TURNs shape: " + std::to_string(M_TURNs.shape(0))); + } + ComputeLF(n_bins_mass, component, n_redshifts, z_LF.data(), M_TURNs.data(), + M_h_z.data(), M_uv_z.data(), log10phi.data()); + }); + m.def("ComputeTau", + [](nb::ndarray redshifts, nb::ndarray global_xHI, float z_re_HeII) { + size_t n_redshifts = redshifts.shape(0); + if (global_xHI.shape(0) != n_redshifts) { + throw std::runtime_error("XHI array size" + std::to_string(global_xHI.shape(0)) + + "does not match the number of redshifts." + + std::to_string(n_redshifts)); + } + return ComputeTau(n_redshifts, redshifts.data(), global_xHI.data(), z_re_HeII); + }); // Initialisation functions needed in the wrapper m.def("init_ps", &init_ps); @@ -442,8 +469,9 @@ NB_MODULE(c_21cmfast, m) { m.def("get_halomass_at_probability", [](double redshift, double z_prev, nb::ndarray cond_values, nb::ndarray probabilities, nb::ndarray out_mass) { - size_t n_conditions = cond_values.shape(0); - if (probabilities.shape(0) != n_conditions || out_mass.shape(0) != n_conditions) { + size_t n_conditions = cond_values.shape(0) * cond_values.shape(1); + if (probabilities.shape(0) * probabilities.shape(1) != n_conditions || + out_mass.shape(0) * out_mass.shape(1) != n_conditions) { throw std::runtime_error("Array sizes do not match the number of conditions."); } get_halomass_at_probability(redshift, z_prev, n_conditions, cond_values.data(), @@ -453,9 +481,9 @@ NB_MODULE(c_21cmfast, m) { m.def("get_global_SFRD_z", [](nb::ndarray redshifts, nb::ndarray log10_turnovers_mcg, nb::ndarray out_sfrd, nb::ndarray out_sfrd_mini) { - size_t n_redshift = redshifts.shape(0); - if (log10_turnovers_mcg.shape(0) != n_redshift || out_sfrd.shape(0) != n_redshift || - out_sfrd_mini.shape(0) != n_redshift) { + size_t n_redshift = redshifts.size(); + if (log10_turnovers_mcg.size() != n_redshift || out_sfrd.size() != n_redshift || + out_sfrd_mini.size() != n_redshift) { throw std::runtime_error("Array sizes do not match the number of redshifts."); } get_global_SFRD_z(n_redshift, redshifts.data(), log10_turnovers_mcg.data(), @@ -465,9 +493,9 @@ NB_MODULE(c_21cmfast, m) { m.def("get_global_Nion_z", [](nb::ndarray redshifts, nb::ndarray log10_turnovers_mcg, nb::ndarray out_nion, nb::ndarray out_nion_mini) { - size_t n_redshift = redshifts.shape(0); - if (log10_turnovers_mcg.shape(0) != n_redshift || out_nion.shape(0) != n_redshift || - out_nion_mini.shape(0) != n_redshift) { + size_t n_redshift = redshifts.size(); + if (log10_turnovers_mcg.size() != n_redshift || out_nion.size() != n_redshift || + out_nion_mini.size() != n_redshift) { throw std::runtime_error("Array sizes do not match the number of redshifts."); } get_global_Nion_z(n_redshift, redshifts.data(), log10_turnovers_mcg.data(), @@ -477,8 +505,8 @@ NB_MODULE(c_21cmfast, m) { m.def("get_conditional_FgtrM", [](double redshift, double R, nb::ndarray densities, nb::ndarray out_fcoll, nb::ndarray out_dfcoll) { - size_t n_densities = densities.shape(0); - if (out_fcoll.shape(0) != n_densities || out_dfcoll.shape(0) != n_densities) { + size_t n_densities = densities.size(); + if (out_fcoll.size() != n_densities || out_dfcoll.size() != n_densities) { throw std::runtime_error("Array sizes do not match the number of densities."); } get_conditional_FgtrM(redshift, R, n_densities, densities.data(), out_fcoll.data(), @@ -488,9 +516,9 @@ NB_MODULE(c_21cmfast, m) { m.def("get_conditional_SFRD", [](double redshift, double R, nb::ndarray densities, nb::ndarray log10_mturns, nb::ndarray out_sfrd, nb::ndarray out_sfrd_mini) { - size_t n_densities = densities.shape(0); - if (log10_mturns.shape(0) != n_densities || out_sfrd.shape(0) != n_densities || - out_sfrd_mini.shape(0) != n_densities) { + size_t n_densities = densities.size(); + if (log10_mturns.size() != n_densities || out_sfrd.size() != n_densities || + out_sfrd_mini.size() != n_densities) { throw std::runtime_error("Array sizes do not match the number of densities."); } get_conditional_SFRD(redshift, R, n_densities, densities.data(), log10_mturns.data(), @@ -502,9 +530,9 @@ NB_MODULE(c_21cmfast, m) { nb::ndarray log10_mturns_mcg, nb::ndarray out_nion, nb::ndarray out_nion_mini) { - size_t n_densities = densities.shape(0); - if (log10_mturns_acg.shape(0) != n_densities || log10_mturns_mcg.shape(0) != n_densities || - out_nion.shape(0) != n_densities || out_nion_mini.shape(0) != n_densities) { + size_t n_densities = densities.size(); + if (log10_mturns_acg.size() != n_densities || log10_mturns_mcg.size() != n_densities || + out_nion.size() != n_densities || out_nion_mini.size() != n_densities) { throw std::runtime_error("Array sizes do not match the number of densities."); } get_conditional_Nion(redshift, R, n_densities, densities.data(), log10_mturns_acg.data(), @@ -514,8 +542,8 @@ NB_MODULE(c_21cmfast, m) { m.def("get_conditional_Xray", [](double redshift, double R, nb::ndarray densities, nb::ndarray log10_mturns, nb::ndarray out_xray) { - size_t n_densities = densities.shape(0); - if (log10_mturns.shape(0) != n_densities || out_xray.shape(0) != n_densities) { + size_t n_densities = densities.size(); + if (log10_mturns.size() != n_densities || out_xray.size() != n_densities) { throw std::runtime_error("Array sizes do not match the number of densities."); } get_conditional_Xray(redshift, R, n_densities, densities.data(), log10_mturns.data(), @@ -524,7 +552,9 @@ NB_MODULE(c_21cmfast, m) { // Error framework testing m.def("SomethingThatCatches", &SomethingThatCatches); - m.def("FunctionThatCatches", &FunctionThatCatches); + m.def("FunctionThatCatches", [](bool sub_func, bool pass, nb::ndarray answer) { + return FunctionThatCatches(sub_func, pass, answer.data()); + }); m.def("FunctionThatThrows", &FunctionThatThrows); m.def("single_test_sample", @@ -551,28 +581,36 @@ NB_MODULE(c_21cmfast, m) { } }); - m.def("test_halo_props", - [](double redshift, nb::ndarray vcb_grid, nb::ndarray J21_LW_grid, - nb::ndarray z_re_grid, nb::ndarray Gamma12_ion_grid, - nb::ndarray halo_masses, nb::ndarray halo_coords, - nb::ndarray star_rng, nb::ndarray sfr_rng, nb::ndarray xray_rng, - nb::ndarray halo_props_out) { - size_t n_halos = halo_masses.shape(0); - if (halo_coords.shape(0) != n_halos || halo_coords.shape(1) != 3 || - star_rng.shape(0) != n_halos || sfr_rng.shape(0) != n_halos || - xray_rng.shape(0) != n_halos || halo_props_out.shape(0) != n_halos || - halo_props_out.shape(1) != 12) { - throw std::runtime_error("Input/output arrays must match the number of halos."); - } - int status = test_halo_props(redshift, vcb_grid.data(), J21_LW_grid.data(), - z_re_grid.data(), Gamma12_ion_grid.data(), n_halos, - halo_masses.data(), halo_coords.data(), star_rng.data(), - sfr_rng.data(), xray_rng.data(), halo_props_out.data()); - if (status != 0) { - throw std::runtime_error("test_halo_props failed with status: " + - std::to_string(status)); - } - }); + m.def("test_halo_props", [](double redshift, nb::ndarray vcb_grid, + nb::ndarray J21_LW_grid, nb::ndarray z_re_grid, + nb::ndarray Gamma12_ion_grid, nb::ndarray halo_masses, + nb::ndarray halo_coords, nb::ndarray star_rng, + nb::ndarray sfr_rng, nb::ndarray xray_rng, + nb::ndarray halo_props_out) { + size_t n_halos = halo_masses.shape(0); + if (halo_coords.shape(0) != n_halos || halo_coords.shape(1) != 3 || + star_rng.shape(0) != n_halos || sfr_rng.shape(0) != n_halos || + xray_rng.shape(0) != n_halos || halo_props_out.shape(0) != n_halos || + halo_props_out.shape(1) != 12) { + throw std::runtime_error( + "Input/output arrays must have the same shape as the number of halos. halo_coords " + "shape: " + + std::to_string(halo_coords.shape(0)) + "x" + std::to_string(halo_coords.shape(1)) + + ", " + "halo_masses shape: " + std::to_string(halo_masses.shape(0)) + ", " + + "star_rng shape: " + std::to_string(star_rng.shape(0)) + ", " + + "sfr_rng shape: " + std::to_string(sfr_rng.shape(0)) + ", " + + "halo_props_out shape: " + std::to_string(halo_props_out.shape(0)) + "x" + + std::to_string(halo_props_out.shape(1))); + } + int status = test_halo_props(redshift, vcb_grid.data(), J21_LW_grid.data(), + z_re_grid.data(), Gamma12_ion_grid.data(), n_halos, + halo_masses.data(), halo_coords.data(), star_rng.data(), + sfr_rng.data(), xray_rng.data(), halo_props_out.data()); + if (status != 0) { + throw std::runtime_error("test_halo_props failed with status: " + + std::to_string(status)); + } + }); m.def("test_filter", [](nb::ndarray input_box, double R, double R_param, int filter_flag, nb::ndarray result) { @@ -600,4 +638,6 @@ NB_MODULE(c_21cmfast, m) { m.def( "get_config_settings", []() -> ConfigSettings& { return config_settings; }, nb::rv_policy::reference); + + m.attr("photon_cons_allocated") = nb::cast(&photon_cons_allocated); } diff --git a/src/py21cmfast/wrapper/cfuncs.py b/src/py21cmfast/wrapper/cfuncs.py index 116611469..1f9186d9c 100644 --- a/src/py21cmfast/wrapper/cfuncs.py +++ b/src/py21cmfast/wrapper/cfuncs.py @@ -201,14 +201,10 @@ def compute_tau( redshifts = np.array(redshifts, dtype="float32") global_xHI = np.array(global_xHI, dtype="float32") - z = redshifts - xHI = global_xHI - # Run the C code return lib.ComputeTau( - len(redshifts), - z, - xHI, + redshifts, + global_xHI, z_re_HeII, ) @@ -290,23 +286,19 @@ def compute_luminosity_function( ) component = "acg" - # NOTE from v4 GPU build update: There was a multi-step process here - # which I'm sure was there for a reason, so when it breaks re-do: - # lfunc = np.zeros(x*y); lfunc.shape = (x,y), c_lfunc = lfunc.... - lfunc = np.zeros(len(redshifts), nbins) - Muvfunc = np.zeros(len(redshifts), nbins) - Mhfunc = np.zeros(len(redshifts), nbins) + lfunc = np.zeros((len(redshifts), nbins)) + Muvfunc = np.zeros((len(redshifts), nbins)) + Mhfunc = np.zeros((len(redshifts), nbins)) - lfunc_MINI = np.zeros(len(redshifts), nbins) - Muvfunc_MINI = np.zeros(len(redshifts), nbins) - Mhfunc_MINI = np.zeros(len(redshifts), nbins) + lfunc_MINI = np.zeros((len(redshifts), nbins)) + Muvfunc_MINI = np.zeros((len(redshifts), nbins)) + Mhfunc_MINI = np.zeros((len(redshifts), nbins)) if component in ("both", "acg"): # Run the C code errcode = lib.ComputeLF( - nbins, 1, - len(redshifts), + nbins, redshifts, mturnovers, Muvfunc, @@ -318,18 +310,16 @@ def compute_luminosity_function( errcode, lib.ComputeLF, ( - nbins, 1, - len(redshifts), + nbins, ), ) if component in ("both", "mcg"): # Run the C code errcode = lib.ComputeLF( - nbins, 2, - len(redshifts), + nbins, redshifts, mturnovers_mini, Muvfunc_MINI, @@ -341,21 +331,16 @@ def compute_luminosity_function( errcode, lib.ComputeLF, ( - nbins, 2, - len(redshifts), + nbins, ), ) if component == "both": # redo the Muv range using the faintest (most likely MINI) and the brightest (most likely massive) - lfunc_all = np.zeros(len(redshifts) * nbins) - Muvfunc_all = np.zeros(len(redshifts) * nbins) - Mhfunc_all = np.zeros(len(redshifts) * nbins * 2) - - lfunc_all.shape = (len(redshifts), nbins) - Muvfunc_all.shape = (len(redshifts), nbins) - Mhfunc_all.shape = (len(redshifts), nbins, 2) + lfunc_all = np.zeros((len(redshifts), nbins)) + Muvfunc_all = np.zeros((len(redshifts), nbins)) + Mhfunc_all = np.zeros((len(redshifts), nbins, 2)) for iz in range(len(redshifts)): Muvfunc_all[iz] = np.linspace( @@ -786,7 +771,7 @@ def sample_halos_from_conditions( n_cond = cond_array.size # all coordinates zero - crd_in = np.zeros(3 * n_cond).astype("i4") + crd_in = np.zeros((n_cond, 3)).astype("i4") cond_array = cond_array.astype("f4") nhalo_out = np.zeros(1).astype("i4") @@ -795,7 +780,7 @@ def sample_halos_from_conditions( exp_M = np.zeros(n_cond).astype("f8") exp_N = np.zeros(n_cond).astype("f8") halomass_out = np.zeros(buffer_size).astype("f4") - halocrd_out = np.zeros(int(3 * buffer_size)).astype("i4") + halocrd_out = np.zeros((buffer_size, 3)).astype("i4") lib.single_test_sample( inputs.random_seed, @@ -861,11 +846,12 @@ def convert_halo_properties( raise ValueError("Halo masses and rng shapes must be identical.") n_halos = halo_masses.size + orig_shape = halo_masses.shape out_buffer = np.zeros((n_halos, 12), dtype="f4") lo_dim = (inputs.simulation_options.HII_DIM,) * 3 if halo_coords is None: - halo_coords = np.zeros(3 * n_halos) + halo_coords = np.zeros((n_halos, 3)) if vcb_grid is None: vcb_grid = np.zeros(lo_dim) if J_21_LW_grid is None: @@ -880,11 +866,11 @@ def convert_halo_properties( z_re_grid = z_re_grid.astype("f4") Gamma12_grid = Gamma12_grid.astype("f4") - halo_masses = halo_masses.astype("f4") - halo_coords = halo_coords.astype("f4") - star_rng = star_rng.astype("f4") - sfr_rng = sfr_rng.astype("f4") - xray_rng = xray_rng.astype("f4") + halo_masses = halo_masses.reshape(n_halos).astype("f4") + halo_coords = halo_coords.reshape(n_halos, 3).astype("f4") + star_rng = star_rng.reshape(n_halos).astype("f4") + sfr_rng = sfr_rng.reshape(n_halos).astype("f4") + xray_rng = xray_rng.reshape(n_halos).astype("f4") lib.test_halo_props( redshift, @@ -903,18 +889,18 @@ def convert_halo_properties( out_buffer = out_buffer.reshape(n_halos, 12) return { - "halo_mass": out_buffer[:, 0].reshape(halo_masses.shape), - "halo_stars": out_buffer[:, 1].reshape(halo_masses.shape), - "halo_sfr": out_buffer[:, 2].reshape(halo_masses.shape), - "halo_xray": out_buffer[:, 3].reshape(halo_masses.shape), - "n_ion": out_buffer[:, 4].reshape(halo_masses.shape), - "halo_wsfr": out_buffer[:, 5].reshape(halo_masses.shape), - "halo_stars_mini": out_buffer[:, 6].reshape(halo_masses.shape), - "halo_sfr_mini": out_buffer[:, 7].reshape(halo_masses.shape), - "mturn_a": out_buffer[:, 8].reshape(halo_masses.shape), - "mturn_m": out_buffer[:, 9].reshape(halo_masses.shape), - "mturn_r": out_buffer[:, 10].reshape(halo_masses.shape), - "metallicity": out_buffer[:, 11].reshape(halo_masses.shape), + "halo_mass": out_buffer[:, 0].reshape(orig_shape), + "halo_stars": out_buffer[:, 1].reshape(orig_shape), + "halo_sfr": out_buffer[:, 2].reshape(orig_shape), + "halo_xray": out_buffer[:, 3].reshape(orig_shape), + "n_ion": out_buffer[:, 4].reshape(orig_shape), + "halo_wsfr": out_buffer[:, 5].reshape(orig_shape), + "halo_stars_mini": out_buffer[:, 6].reshape(orig_shape), + "halo_sfr_mini": out_buffer[:, 7].reshape(orig_shape), + "mturn_a": out_buffer[:, 8].reshape(orig_shape), + "mturn_m": out_buffer[:, 9].reshape(orig_shape), + "mturn_r": out_buffer[:, 10].reshape(orig_shape), + "metallicity": out_buffer[:, 11].reshape(orig_shape), } diff --git a/src/py21cmfast/wrapper/exceptions.py b/src/py21cmfast/wrapper/exceptions.py index b2c37494c..e64982551 100644 --- a/src/py21cmfast/wrapper/exceptions.py +++ b/src/py21cmfast/wrapper/exceptions.py @@ -103,7 +103,9 @@ class CUDAError(FatalCError): def _process_exitcode(exitcode, fnc, args): """Determine what happens for different values of the (integer) exit code from a C function.""" if exitcode != SUCCESS: - logger.error(f"In function: {fnc.__name__}. Arguments: {args}") + logger.error( + f"Error code {exitcode} in function: {fnc.__name__}. Arguments: {args}" + ) if exitcode: try: diff --git a/tests/test_exceptions.py b/tests/test_exceptions.py index bdba08f46..1b5a968c0 100644 --- a/tests/test_exceptions.py +++ b/tests/test_exceptions.py @@ -6,7 +6,7 @@ import py21cmfast.c_21cmfast as lib from py21cmfast.wrapper.exceptions import ( PHOTONCONSERROR, - ParameterError, + PhotonConsError, _process_exitcode, ) @@ -21,12 +21,12 @@ def test_basic(subfunc): def test_simple(subfunc): answer = np.array([0], dtype="f8") - status = lib.FunctionThatCatches(subfunc, False) - with pytest.raises(ParameterError): + status = lib.FunctionThatCatches(subfunc, False, answer) + with pytest.raises(PhotonConsError): _process_exitcode( status, lib.FunctionThatCatches, - (False, answer), + (subfunc, False, answer), ) diff --git a/tests/test_filtering.py b/tests/test_filtering.py index c67ffba8c..a0753cc23 100644 --- a/tests/test_filtering.py +++ b/tests/test_filtering.py @@ -105,19 +105,19 @@ def test_filters(filter_flag, R, plt): output_box_centre = np.zeros((up.HII_DIM,) * 3, dtype="f8") # use MFP=20 for the exp filter, use a 4 cell shell for the annular filter if filter_flag == 3: - R_param = 20 + R_param = 20.0 elif filter_flag == 4: - R_param = max(R - 4 * (up.BOX_LEN / up.HII_DIM), 0) + R_param = max(R - 4 * (up.BOX_LEN / up.HII_DIM), 0.0) else: - R_param = 0 + R_param = 0.0 broadcast_input_struct(inputs) lib.test_filter( - input_box_centre.ctypes.data, + input_box_centre, R, R_param, filter_flag, - output_box_centre.ctypes.data, + output_box_centre, ) # expected outputs given in cell units From 28e5723d7b5eac8bbaa1cebf505950379ce37b5c Mon Sep 17 00:00:00 2001 From: James Davies Date: Mon, 12 May 2025 17:28:22 +0200 Subject: [PATCH 119/145] compact integration test warnings --- tests/produce_integration_test_data.py | 33 +++++++------------------- 1 file changed, 9 insertions(+), 24 deletions(-) diff --git a/tests/produce_integration_test_data.py b/tests/produce_integration_test_data.py index 120731fd0..211a834e1 100644 --- a/tests/produce_integration_test_data.py +++ b/tests/produce_integration_test_data.py @@ -574,42 +574,27 @@ def print_failure_stats(test, truth, inputs, abs_tol, rel_tol, name): return False failed_idx = np.where(sel_failed) - warnings.warn( - f"{name}: atol {abs_tol} rtol {rel_tol} failed {sel_failed.sum()} of {sel_failed.size} {sel_failed.sum() / sel_failed.size * 100:.4f}%", - stacklevel=2, - ) - warnings.warn( - f"subcube of failures [min] [max] {[f.min() for f in failed_idx]} {[f.max() for f in failed_idx]}", - stacklevel=2, - ) - warnings.warn( - f"failure range truth ({truth[sel_failed].min():.3e},{truth[sel_failed].max():.3e}) test ({test[sel_failed].min():.3e},{test[sel_failed].max():.3e})", - stacklevel=2, - ) - warnings.warn( - f"max abs diff of failures {np.fabs(truth - test)[sel_failed].max():.4e} relative {(np.fabs(truth - test) / truth)[sel_failed].max():.4e}", - stacklevel=2, - ) + message = f"{name}: atol {abs_tol} rtol {rel_tol} failed {sel_failed.sum()} of {sel_failed.size} {sel_failed.sum() / sel_failed.size * 100:.4f} %\n" + message += f"subcube of failures [min] [max] {[f.min() for f in failed_idx]} {[f.max() for f in failed_idx]}\n" + message += f"failure range truth ({truth[sel_failed].min():.3e},{truth[sel_failed].max():.3e}) test ({test[sel_failed].min():.3e},{test[sel_failed].max():.3e})\n" + message += f"max abs diff of failures {np.fabs(truth - test)[sel_failed].max():.4e} relative {(np.fabs(truth - test) / truth)[sel_failed].max():.4e}\n" failed_inp = [ inp[sel_failed if inp.shape == test.shape else failed_idx[i]] for i, inp in enumerate(inputs) ] for i, _inp in enumerate(inputs): - warnings.warn( - f"failure range of inputs axis {i} {failed_inp[i].min():.2e} {failed_inp[i].max():.2e}", - stacklevel=2, - ) + message += f"failure range of inputs axis {i} {failed_inp[i].min():.2e} {failed_inp[i].max():.2e}\n" - warnings.warn("----- First 10 -----", stacklevel=2) + message += "----- First 10 -----\n" for j in range(min(10, sel_failed.sum())): input_arr = [f"{failed_inp[i][j]:.2e}" for i, finp in enumerate(failed_inp)] - warnings.warn( + message += ( f"CRD {input_arr}" - + f" {truth[sel_failed].flatten()[j]:.4e} {test[sel_failed].flatten()[j]:.4e}", - stacklevel=2, + + f" {truth[sel_failed].flatten()[j]:.4e} {test[sel_failed].flatten()[j]:.4e}\n", ) + warnings.warn(message, stacklevel=2) return True From b8a1fb9e4866b78206b651c3b4a80f4f12582cec Mon Sep 17 00:00:00 2001 From: daviesje Date: Mon, 12 May 2025 22:58:43 +0200 Subject: [PATCH 120/145] get photoncons working --- src/py21cmfast/src/_wrapper.cpp | 52 ++++++++++++++++++++++++-- src/py21cmfast/wrapper/_utils.py | 42 ++++++++++++++++++--- src/py21cmfast/wrapper/photoncons.py | 12 +++--- tests/produce_integration_test_data.py | 2 +- tests/test_integration_features.py | 2 +- 5 files changed, 91 insertions(+), 19 deletions(-) diff --git a/src/py21cmfast/src/_wrapper.cpp b/src/py21cmfast/src/_wrapper.cpp index 5dc7cca48..81bc85421 100644 --- a/src/py21cmfast/src/_wrapper.cpp +++ b/src/py21cmfast/src/_wrapper.cpp @@ -383,11 +383,55 @@ NB_MODULE(c_21cmfast, m) { // PHOTON CONSERVATION MODEL FUNCTIONS m.def("InitialisePhotonCons", &InitialisePhotonCons); - m.def("PhotonCons_Calibration", &PhotonCons_Calibration); - m.def("ComputeZstart_PhotonCons", &ComputeZstart_PhotonCons); - m.def("adjust_redshifts_for_photoncons", &adjust_redshifts_for_photoncons); + m.def("PhotonCons_Calibration", + [](nb::ndarray z_estimate, nb::ndarray xH_estimate) { + int n_spline = z_estimate.size(); + if (xH_estimate.size() != n_spline) { + throw std::runtime_error("Array sizes do not match the specified NSpline."); + } + int status = PhotonCons_Calibration(z_estimate.data(), xH_estimate.data(), n_spline); + if (status != 0) { + throw std::runtime_error("PhotonCons_Calibration failed with status: " + + std::to_string(status)); + } + }); + m.def("ComputeZstart_PhotonCons", [](nb::ndarray zstart) { + if (zstart.size() != 1) { + throw std::runtime_error("zstart array must have size 1."); + } + int status = ComputeZstart_PhotonCons(zstart.data()); + if (status != 0) { + throw std::runtime_error("ComputeZstart_PhotonCons failed with status: " + + std::to_string(status)); + } + }); + m.def("adjust_redshifts_for_photoncons", + [](double z_step_factor, nb::ndarray redshift, nb::ndarray stored_redshift, + nb::ndarray absolute_delta_z) { + adjust_redshifts_for_photoncons(z_step_factor, redshift.data(), + stored_redshift.data(), absolute_delta_z.data()); + }); m.def("determine_deltaz_for_photoncons", &determine_deltaz_for_photoncons); - m.def("ObtainPhotonConsData", &ObtainPhotonConsData); + m.def("ObtainPhotonConsData", + [](nb::ndarray z_at_Q_data, nb::ndarray Q_data, + nb::ndarray Ndata_analytic, nb::ndarray z_cal_data, + nb::ndarray nf_cal_data, nb::ndarray Ndata_calibration, + nb::ndarray PhotonCons_NFdata, nb::ndarray PhotonCons_deltaz, + nb::ndarray Ndata_PhotonCons) { + if (Ndata_analytic.size() != 1 || Ndata_calibration.size() != 1 || + Ndata_PhotonCons.size() != 1) { + throw std::runtime_error( + "Ndata_analytic, Ndata_calibration, and Ndata_PhotonCons must have size 1."); + } + int status = ObtainPhotonConsData( + z_at_Q_data.data(), Q_data.data(), Ndata_analytic.data(), z_cal_data.data(), + nf_cal_data.data(), Ndata_calibration.data(), PhotonCons_NFdata.data(), + PhotonCons_deltaz.data(), Ndata_PhotonCons.data()); + if (status != 0) { + throw std::runtime_error("ObtainPhotonConsData failed with status: " + + std::to_string(status)); + } + }); m.def("FreePhotonConsMemory", &FreePhotonConsMemory); m.def("set_alphacons_params", &set_alphacons_params); diff --git a/src/py21cmfast/wrapper/_utils.py b/src/py21cmfast/wrapper/_utils.py index 352fb2a95..e04c40f82 100644 --- a/src/py21cmfast/wrapper/_utils.py +++ b/src/py21cmfast/wrapper/_utils.py @@ -41,19 +41,49 @@ def asarray(ptr, shape): return array +def _nb_initialise_return_value(arg_string): + """Return a zero-initialised object of the correct type given a nanobind signature.""" + # If it's a wrapped class, return the class + if "py21cmfast.c_21cmfast" in arg_string: + return getattr(lib, arg_string.split("py21cmfast.c_21cmfast")[-1])() + + # Mapping of nanobind types to Python types + nb_to_py_types = { + "float": float, + "double": float, + "int": int, + "bool": bool, + "str": str, + "void": type(None), + } + + if "*" in arg_string or "ndarray" in arg_string: + base_type = arg_string.split("dtype=")[1].split("]")[0] + # TODO: pass a size argument? + return np.zeros(1, dtype=nb_to_py_types[base_type]) + + raise ValueError( + f"Cannot create a zero-initialised object of type {arg_string}." + "As it is not a pointer, array or class. Please check the function signature." + ) + + def _call_c_simple(fnc, *args): """Call a simple C function that just returns an object. - Any such function should be defined such that the last argument is an int pointer generating - the status. + Assumes that the last argument is a pointer to an object that will be filled in by the C function. + This argument is initialised here and returned. """ # Parse the function to get the type of the last argument - cdata = getattr(lib, fnc.__name__) # TODO: finish - kind = cdata.split("(")[-1].split(")")[0].split(",")[-1] - result = getattr(lib, kind)() # TODO:finish + cdata = fnc.__nb_signature__[0][0] + # Nanobind signature is 'def fnc.__name__(arg0: type0, arg1: type1, ..., argN: typeN, /) -> returntype' + signature_string = ( + cdata.split("(")[-1].split(")")[0].split(",")[-2].replace("arg: ", "").strip() + ) + result = _nb_initialise_return_value(signature_string) status = fnc(*args, result) _process_exitcode(status, fnc, args) - return result[0] + return result def camel_to_snake(word: str, depublicize: bool = False): diff --git a/src/py21cmfast/wrapper/photoncons.py b/src/py21cmfast/wrapper/photoncons.py index 077811d67..19febdba5 100644 --- a/src/py21cmfast/wrapper/photoncons.py +++ b/src/py21cmfast/wrapper/photoncons.py @@ -94,9 +94,7 @@ def _init_photon_conservation_correction(*, inputs): return lib.InitialisePhotonCons() -def _calibrate_photon_conservation_correction( - *, redshifts_estimate, nf_estimate, NSpline -): +def _calibrate_photon_conservation_correction(*, redshifts_estimate, nf_estimate): # This function passes the calibration simulation results to C, # Storing a clipped version in global arrays nf_vals and z_vals, # and constructing the GSL interpolator z_NFHistory_spline @@ -107,7 +105,7 @@ def _calibrate_photon_conservation_correction( xHI = nf_estimate logger.debug(f"PhotonCons nf estimates: {nf_estimate}") - return lib.PhotonCons_Calibration(z, xHI, NSpline) + return lib.PhotonCons_Calibration(z, xHI) def _calc_zstart_photon_cons(): @@ -115,7 +113,7 @@ def _calc_zstart_photon_cons(): # Set by neutral fraction astro_params.PHOTONCONS_ZSTART from ._utils import _call_c_simple - return _call_c_simple(lib.ComputeZstart_PhotonCons) + return _call_c_simple(lib.ComputeZstart_PhotonCons)[0] def _get_photon_nonconservation_data() -> dict: @@ -300,7 +298,8 @@ def calibrate_photon_cons( prev_perturb = None # Arrays for redshift and neutral fraction for the calibration curve - neutral_fraction_photon_cons = [] + # TODO: double check, this was empty before, was that a bug? + neutral_fraction_photon_cons = [1.0] # Initialise the analytic expression for the reionisation history logger.info("About to start photon conservation correction") @@ -362,7 +361,6 @@ def calibrate_photon_cons( _calibrate_photon_conservation_correction( redshifts_estimate=fast_node_redshifts, nf_estimate=neutral_fraction_photon_cons, - NSpline=len(fast_node_redshifts), ) diff --git a/tests/produce_integration_test_data.py b/tests/produce_integration_test_data.py index 211a834e1..169b97188 100644 --- a/tests/produce_integration_test_data.py +++ b/tests/produce_integration_test_data.py @@ -591,7 +591,7 @@ def print_failure_stats(test, truth, inputs, abs_tol, rel_tol, name): input_arr = [f"{failed_inp[i][j]:.2e}" for i, finp in enumerate(failed_inp)] message += ( f"CRD {input_arr}" - + f" {truth[sel_failed].flatten()[j]:.4e} {test[sel_failed].flatten()[j]:.4e}\n", + + f" {truth[sel_failed].flatten()[j]:.4e} {test[sel_failed].flatten()[j]:.4e}\n" ) warnings.warn(message, stacklevel=2) diff --git a/tests/test_integration_features.py b/tests/test_integration_features.py index 2ce4ee975..68f12bf5d 100644 --- a/tests/test_integration_features.py +++ b/tests/test_integration_features.py @@ -75,7 +75,7 @@ def test_power_spectra_coeval(name, module_direc, plt): [test_k], abs_tol=0, rel_tol=1e-4, - name=key, + name=f"{name} - {key}", ) any_failed = True # TODO:remove this testing line From 53e9e00bac8939e32301d934bb3074f408776628 Mon Sep 17 00:00:00 2001 From: James Davies Date: Tue, 13 May 2025 14:38:50 +0200 Subject: [PATCH 121/145] add compiler options for log level and debug builds --- src/py21cmfast/src/meson.build | 22 ++++++++++++++++++++++ src/py21cmfast/wrapper/_utils.py | 6 +++++- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/src/py21cmfast/src/meson.build b/src/py21cmfast/src/meson.build index b7d2f5ce8..acb642225 100644 --- a/src/py21cmfast/src/meson.build +++ b/src/py21cmfast/src/meson.build @@ -45,6 +45,28 @@ search_paths = [ '/usr/lib', '/usr/local/lib', '/opt/homebrew/lib' ] fftw = cc.find_library ('fftw3f', required: true, dirs: search_paths) fftw_threads = cc.find_library ('fftw3f_threads', required: true, dirs: search_paths) +# Define a mapping of string values to integers (enum-like structure) +log_level_map = { + 'NO_LOG': 0, + 'ERROR': 1, + 'WARNING': 2, + 'INFO': 3, + 'DEBUG': 4, + 'SUPER_DEBUG': 5, + 'ULTRA_DEBUG': 6, +} + +# Get the LOG_LEVEL environment variable (default to 'WARNING' if not set) +log_level_str = get_option('log_level') + +# Convert the string to an integer using the map, defaulting to 2 (warnings) if the key is invalid +log_level = log_level_map.get(log_level_str, 2) + +# Print the selected log level for debugging purposes +message('Selected log level: ' + log_level.to_string()) + +add_project_arguments('-DLOG_LEVEL=' + log_level.to_string(), language: 'c') + # Define the Python extension module deps = [omp,gsl,fftw,fftw_threads,nanobind] py.extension_module( diff --git a/src/py21cmfast/wrapper/_utils.py b/src/py21cmfast/wrapper/_utils.py index e04c40f82..67cca90a1 100644 --- a/src/py21cmfast/wrapper/_utils.py +++ b/src/py21cmfast/wrapper/_utils.py @@ -42,7 +42,10 @@ def asarray(ptr, shape): def _nb_initialise_return_value(arg_string): - """Return a zero-initialised object of the correct type given a nanobind signature.""" + """Return a zero-initialised object of the correct type given a nanobind signature. + + Currently only works with wrapped structures or numpy arrays of size 1. + """ # If it's a wrapped class, return the class if "py21cmfast.c_21cmfast" in arg_string: return getattr(lib, arg_string.split("py21cmfast.c_21cmfast")[-1])() @@ -77,6 +80,7 @@ def _call_c_simple(fnc, *args): # Parse the function to get the type of the last argument cdata = fnc.__nb_signature__[0][0] # Nanobind signature is 'def fnc.__name__(arg0: type0, arg1: type1, ..., argN: typeN, /) -> returntype' + # We wish to extract the type of the last argument only. signature_string = ( cdata.split("(")[-1].split(")")[0].split(",")[-2].replace("arg: ", "").strip() ) From fb6a0275b27a3bc5edaa43a26e6444538c63617c Mon Sep 17 00:00:00 2001 From: James Davies Date: Tue, 13 May 2025 15:27:53 +0200 Subject: [PATCH 122/145] add files, fix call_c_simple --- install_custom.py | 70 ++++++++++++++++++++++++++++++++ meson.options | 2 + src/py21cmfast/wrapper/_utils.py | 18 ++------ 3 files changed, 76 insertions(+), 14 deletions(-) create mode 100755 install_custom.py create mode 100644 meson.options diff --git a/install_custom.py b/install_custom.py new file mode 100755 index 000000000..9e5744f10 --- /dev/null +++ b/install_custom.py @@ -0,0 +1,70 @@ +#!/bin/python +""" +install_custom.py provides a custom installation process for the 21cmFAST package. + +Allows users to specify various build and configuration options via command-line arguments. + +Features: +- Allows setting the log level for the installation process. +- Provides an option to enable debug symbols for the build. +- Enables customization of the optimization level for the build process. + +Command-line Arguments: +- --log-level: Specifies the log level for the build process. Options include: + NO_LOG, ERROR, WARNING, INFO, DEBUG, SUPER_DEBUG, ULTRA_DEBUG. Defaults to WARNING. +- --debug: Enables debug symbols for the build, which can be useful for debugging. +- -o, --optimization: Sets the optimization level for the build (e.g., -O0, -O1, -O2, -O3). + Defaults to 3. + +Usage: +Run the script from the command line to install 21cmFAST with the desired options: + python install_custom.py [options] + +Example: + python install_custom.py --log-level DEBUG --debug -o 2 + +Dependencies: +- Python 3.x +- pip (Python package installer) + +Note: +This script uses the `subprocess` module to invoke the `pip install` command with +custom configuration settings. + +""" + +import argparse +import subprocess + +# Define the command-line arguments +parser = argparse.ArgumentParser(description="Install 21cmFAST with custom options.") +parser.add_argument( + "--log-level", + type=str, + default="WARNING", + help="Set the log level (NO_LOG, ERROR, WARNING, INFO, DEBUG, SUPER_DEBUG, ULTRA_DEBUG)", +) +parser.add_argument("--debug", action="store_true", help="Enable debug symbols") +parser.add_argument( + "-o", + "--optimization", + help="optimisation level (i,e -O0, -O1, -O2, -O3)", + default="3", +) + +args = parser.parse_args() + +# Get the LOG_LEVEL environment variable (default to 'WARNING' if not set) +log_level_str = args.log_level +setup_args = [ + f"--config-setting=setup-args=-Dlog_level={log_level_str}", +] + +if args.debug: + setup_args += ["--config-setting=setup-args=-Dbuildtype=debugoptimized"] # -O2 + +setup_args += [f"--config-setting=setup-args=-Doptimization={args.optimization}"] + + +# Run pip install with the specified options +subprocess.run(["pip", "install", ".", *setup_args]) diff --git a/meson.options b/meson.options new file mode 100644 index 000000000..9550b27a3 --- /dev/null +++ b/meson.options @@ -0,0 +1,2 @@ +# Define the log level option +option('log_level', type: 'string', value: 'WARNING', description: 'Set the log level (e.g., NO_LOG, ERROR, WARNING, INFO, DEBUG, SUPER_DEBUG, ULTRA_DEBUG)') diff --git a/src/py21cmfast/wrapper/_utils.py b/src/py21cmfast/wrapper/_utils.py index 67cca90a1..11a339b74 100644 --- a/src/py21cmfast/wrapper/_utils.py +++ b/src/py21cmfast/wrapper/_utils.py @@ -41,29 +41,18 @@ def asarray(ptr, shape): return array -def _nb_initialise_return_value(arg_string): +def _nb_initialise_return_value(arg_string, out_shape=(1,)): """Return a zero-initialised object of the correct type given a nanobind signature. - Currently only works with wrapped structures or numpy arrays of size 1. + Currently only works with wrapped structures or numpy arrays. """ # If it's a wrapped class, return the class if "py21cmfast.c_21cmfast" in arg_string: return getattr(lib, arg_string.split("py21cmfast.c_21cmfast")[-1])() - # Mapping of nanobind types to Python types - nb_to_py_types = { - "float": float, - "double": float, - "int": int, - "bool": bool, - "str": str, - "void": type(None), - } - if "*" in arg_string or "ndarray" in arg_string: base_type = arg_string.split("dtype=")[1].split("]")[0] - # TODO: pass a size argument? - return np.zeros(1, dtype=nb_to_py_types[base_type]) + return np.zeros(out_shape, dtype=getattr(np, base_type)) raise ValueError( f"Cannot create a zero-initialised object of type {arg_string}." @@ -84,6 +73,7 @@ def _call_c_simple(fnc, *args): signature_string = ( cdata.split("(")[-1].split(")")[0].split(",")[-2].replace("arg: ", "").strip() ) + # NOTE: This uses the default return size == 1 for arrays result = _nb_initialise_return_value(signature_string) status = fnc(*args, result) _process_exitcode(status, fnc, args) From c53cace4cc75a481a20dbb60e6168ca0c41a81d3 Mon Sep 17 00:00:00 2001 From: James Davies Date: Tue, 13 May 2025 16:41:32 +0200 Subject: [PATCH 123/145] non-integration tests pass --- src/py21cmfast/plotting.py | 2 +- src/py21cmfast/wrapper/structs.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/py21cmfast/plotting.py b/src/py21cmfast/plotting.py index 17dff789a..b1eab47d0 100644 --- a/src/py21cmfast/plotting.py +++ b/src/py21cmfast/plotting.py @@ -169,7 +169,7 @@ def coeval_sliceplot( """ if kind is None: if isinstance(struct, outputs.OutputStruct): - kind = struct.struct.fieldnames[0] + kind = next(iter(struct.arrays.keys())) elif isinstance(struct, Coeval): kind = "brightness_temp" diff --git a/src/py21cmfast/wrapper/structs.py b/src/py21cmfast/wrapper/structs.py index 28e6c6b77..3949c74fd 100644 --- a/src/py21cmfast/wrapper/structs.py +++ b/src/py21cmfast/wrapper/structs.py @@ -64,7 +64,7 @@ def fields(self) -> dict[str, Any]: """A list of fields of the underlying C struct (a list of tuples of "name, type").""" result = {} for attr in dir(self.cstruct): - if not attr.startswith("__") and not callable(getattr(self.cstruct, attr)): + if not attr.startswith("__"): result[attr] = type(getattr(self.cstruct, attr)) return result @@ -76,7 +76,7 @@ def fieldnames(self) -> list[str]: @property def pointer_fields(self) -> list[str]: """A list of names of fields which have pointer type in the C struct.""" - return [f.split("set_")[0] for f in self.fields if f.startswith("set_")] + return [f.split("set_")[1] for f in self.fields if f.startswith("set_")] @property def primitive_fields(self) -> list[str]: @@ -128,8 +128,8 @@ def __init__(self, wrapped): # ignore dunders if not attr.startswith("__"): if attr.startswith("get_"): - # If the attribute is a setter, we need to set the value - # to the value of the attribute without the "set_" prefix + # If the attribute is a getter, we need to set the value in python + # to the value of the C++ attribute without the "get_" prefix setattr(self, attr[4:], getattr(self._cobj, attr)()) elif not callable(getattr(self._cobj, attr)): # Otherwise, we just set the attribute to the value From 53c344438a0b7127200dd43024a964e4ed733515 Mon Sep 17 00:00:00 2001 From: James Davies Date: Fri, 16 May 2025 13:33:40 +0200 Subject: [PATCH 124/145] tweak installation parameters --- install_custom.py | 6 ++++-- pyproject.toml | 3 +++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/install_custom.py b/install_custom.py index 9e5744f10..a2a7624d4 100755 --- a/install_custom.py +++ b/install_custom.py @@ -53,6 +53,8 @@ ) args = parser.parse_args() +if args.optimization not in ["0", "g", "1", "2", "3", "s"]: + raise ValueError("Invalid optimization level. Choose from 0, g, 1, 2, 3 or s") # Get the LOG_LEVEL environment variable (default to 'WARNING' if not set) log_level_str = args.log_level @@ -60,9 +62,9 @@ f"--config-setting=setup-args=-Dlog_level={log_level_str}", ] -if args.debug: - setup_args += ["--config-setting=setup-args=-Dbuildtype=debugoptimized"] # -O2 +debug = str(args.debug).lower() +setup_args += [f"--config-setting=setup-args=-Ddebug={args.debug}"] setup_args += [f"--config-setting=setup-args=-Doptimization={args.optimization}"] diff --git a/pyproject.toml b/pyproject.toml index 314b7588a..f5d4271fd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -110,6 +110,9 @@ Changelog="https://github.com/21cmFAST/21cmFAST" build-backend = 'mesonpy' requires = ['meson-python','nanobind>=2.4.0'] +[tool.meson-python.args] +setup = ["-Dbuildtype=release"] + [tool.pytest.ini_options] testpaths = ["tests"] From 937557126f44b70553f552400826d694e0b67492 Mon Sep 17 00:00:00 2001 From: James Davies Date: Tue, 22 Jul 2025 09:32:16 +1000 Subject: [PATCH 125/145] rename/remove old files --- src/py21cmfast/meson.build | 2 +- src/py21cmfast/src/meson.build | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/src/py21cmfast/meson.build b/src/py21cmfast/meson.build index cf82292e1..491b3d209 100644 --- a/src/py21cmfast/meson.build +++ b/src/py21cmfast/meson.build @@ -3,7 +3,7 @@ source_files = [ '_cfg.py', '_logging.py', 'cli.py', - 'lightcones.py', + 'lightconers.py', 'plotting.py', 'run_templates.py', 'utils.py', diff --git a/src/py21cmfast/src/meson.build b/src/py21cmfast/src/meson.build index acb642225..cf069f82f 100644 --- a/src/py21cmfast/src/meson.build +++ b/src/py21cmfast/src/meson.build @@ -26,7 +26,6 @@ source_files = files([ 'recombinations.c', 'rng.c', 'scaling_relations.c', - 'subcell_rsds.c', 'thermochem.c', 'MapMass_cpu.c', '_wrapper.cpp', From 9d3d4d9405e785fb1e1b5d35a1a2b6b51f4c1dd0 Mon Sep 17 00:00:00 2001 From: James Davies Date: Wed, 23 Jul 2025 10:38:41 +1000 Subject: [PATCH 126/145] fix paramaeter and field names --- pyproject.toml | 3 ++- src/py21cmfast/meson.build | 1 + src/py21cmfast/src/InputParameters.h | 1 - src/py21cmfast/src/OutputStructs.h | 5 +++-- src/py21cmfast/src/_wrapper.cpp | 26 ++++++++++++++------------ src/py21cmfast/src/meson.build | 1 + 6 files changed, 21 insertions(+), 16 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index cd59a26b4..df7f0f0ab 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -89,7 +89,8 @@ dev = [ "powerbox", "pytest-plt", "pytest-benchmark", - "questionary", + "tyro", + "rich", "pytest-xdist", "pytest-mock", "nbsphinx", diff --git a/src/py21cmfast/meson.build b/src/py21cmfast/meson.build index 491b3d209..08087425d 100644 --- a/src/py21cmfast/meson.build +++ b/src/py21cmfast/meson.build @@ -5,6 +5,7 @@ source_files = [ 'cli.py', 'lightconers.py', 'plotting.py', + 'rsds.py', 'run_templates.py', 'utils.py', 'yaml.py', diff --git a/src/py21cmfast/src/InputParameters.h b/src/py21cmfast/src/InputParameters.h index 71d293b32..7d3ee808d 100644 --- a/src/py21cmfast/src/InputParameters.h +++ b/src/py21cmfast/src/InputParameters.h @@ -63,7 +63,6 @@ typedef struct MatterOptions { int USE_RELATIVE_VELOCITIES; int POWER_SPECTRUM; int USE_INTERPOLATION_TABLES; - bool NO_RNG; bool PERTURB_ON_HIGH_RES; int PERTURB_ALGORITHM; bool MINIMIZE_MEMORY; diff --git a/src/py21cmfast/src/OutputStructs.h b/src/py21cmfast/src/OutputStructs.h index 8230a77c0..3f9f183cb 100644 --- a/src/py21cmfast/src/OutputStructs.h +++ b/src/py21cmfast/src/OutputStructs.h @@ -22,7 +22,7 @@ typedef struct HaloField { long long unsigned int n_halos; long long unsigned int buffer_size; float *halo_masses; - int *halo_coords; + float *halo_coords; // Halo properties for stochastic model float *star_rng; @@ -34,7 +34,7 @@ typedef struct PerturbHaloField { long long unsigned int n_halos; long long unsigned int buffer_size; float *halo_masses; - int *halo_coords; + float *halo_coords; // Halo properties for stochastic model float *star_rng; @@ -96,6 +96,7 @@ typedef struct IonizedBox { typedef struct BrightnessTemp { float *brightness_temp; + float *tau_21; } BrightnessTemp; #endif diff --git a/src/py21cmfast/src/_wrapper.cpp b/src/py21cmfast/src/_wrapper.cpp index 81bc85421..d4e479637 100644 --- a/src/py21cmfast/src/_wrapper.cpp +++ b/src/py21cmfast/src/_wrapper.cpp @@ -65,7 +65,6 @@ NB_MODULE(c_21cmfast, m) { .def_rw("USE_RELATIVE_VELOCITIES", &MatterOptions::USE_RELATIVE_VELOCITIES) .def_rw("POWER_SPECTRUM", &MatterOptions::POWER_SPECTRUM) .def_rw("USE_INTERPOLATION_TABLES", &MatterOptions::USE_INTERPOLATION_TABLES) - .def_rw("NO_RNG", &MatterOptions::NO_RNG) .def_rw("PERTURB_ON_HIGH_RES", &MatterOptions::PERTURB_ON_HIGH_RES) .def_rw("PERTURB_ALGORITHM", &MatterOptions::PERTURB_ALGORITHM) .def_rw("MINIMIZE_MEMORY", &MatterOptions::MINIMIZE_MEMORY) @@ -241,7 +240,7 @@ NB_MODULE(c_21cmfast, m) { .def("set_halo_masses", [](HaloField& self, nb::ndarray array) { self.halo_masses = array.data(); }) .def("set_halo_coords", - [](HaloField& self, nb::ndarray array) { self.halo_coords = array.data(); }) + [](HaloField& self, nb::ndarray array) { self.halo_coords = array.data(); }) .def("set_star_rng", [](HaloField& self, nb::ndarray array) { self.star_rng = array.data(); }) .def("set_sfr_rng", @@ -257,7 +256,7 @@ NB_MODULE(c_21cmfast, m) { .def("set_halo_masses", [](PerturbHaloField& self, nb::ndarray array) { self.halo_masses = array.data(); }) .def("set_halo_coords", [](PerturbHaloField& self, - nb::ndarray array) { self.halo_coords = array.data(); }) + nb::ndarray array) { self.halo_coords = array.data(); }) .def("set_star_rng", [](PerturbHaloField& self, nb::ndarray array) { self.star_rng = array.data(); }) .def("set_sfr_rng", @@ -365,9 +364,12 @@ NB_MODULE(c_21cmfast, m) { // Bind BrightnessTemp nb::class_(m, "BrightnessTemp") .def(nb::init<>()) - .def("set_brightness_temp", [](BrightnessTemp& self, nb::ndarray array) { - self.brightness_temp = array.data(); - }); + .def("set_brightness_temp", + [](BrightnessTemp& self, nb::ndarray array) { + self.brightness_temp = array.data(); + }) + .def("set_tau_21", + [](BrightnessTemp& self, nb::ndarray array) { self.tau_21 = array.data(); }); // Function Bindings // OutputStruct COMPUTE FUNCTIONS @@ -602,11 +604,11 @@ NB_MODULE(c_21cmfast, m) { m.def("FunctionThatThrows", &FunctionThatThrows); m.def("single_test_sample", - [](unsigned long long int seed, nb::ndarray conditions, nb::ndarray cond_crd, - double z_out, double z_in, nb::ndarray out_n_tot, nb::ndarray out_n_cell, - nb::ndarray out_n_exp, nb::ndarray out_m_cell, - nb::ndarray out_m_exp, nb::ndarray out_halo_masses, - nb::ndarray out_halo_coords) { + [](unsigned long long int seed, nb::ndarray conditions, + nb::ndarray cond_crd, double z_out, double z_in, nb::ndarray out_n_tot, + nb::ndarray out_n_cell, nb::ndarray out_n_exp, + nb::ndarray out_m_cell, nb::ndarray out_m_exp, + nb::ndarray out_halo_masses, nb::ndarray out_halo_coords) { size_t n_condition = conditions.shape(0); if (cond_crd.shape(0) != n_condition || cond_crd.shape(1) != 3) { throw std::runtime_error("cond_crd must have shape (n_condition, 3)."); @@ -628,7 +630,7 @@ NB_MODULE(c_21cmfast, m) { m.def("test_halo_props", [](double redshift, nb::ndarray vcb_grid, nb::ndarray J21_LW_grid, nb::ndarray z_re_grid, nb::ndarray Gamma12_ion_grid, nb::ndarray halo_masses, - nb::ndarray halo_coords, nb::ndarray star_rng, + nb::ndarray halo_coords, nb::ndarray star_rng, nb::ndarray sfr_rng, nb::ndarray xray_rng, nb::ndarray halo_props_out) { size_t n_halos = halo_masses.shape(0); diff --git a/src/py21cmfast/src/meson.build b/src/py21cmfast/src/meson.build index cf069f82f..2c843f88b 100644 --- a/src/py21cmfast/src/meson.build +++ b/src/py21cmfast/src/meson.build @@ -19,6 +19,7 @@ source_files = files([ 'filtering.c', 'heating_helper_progs.c', 'hmf.c', + 'indexing.c', 'integral_wrappers.c', 'interp_tables.c', 'interpolation.c', From dde85998d62a40030cfd60c29849a175f3166c6d Mon Sep 17 00:00:00 2001 From: James Davies Date: Wed, 23 Jul 2025 13:30:22 +1000 Subject: [PATCH 127/145] fix log level, move halo overflow error outside of parallel loop --- src/py21cmfast/src/Stochasticity.c | 25 +++++++++++++++---------- src/py21cmfast/src/cosmology.c | 8 ++++---- src/py21cmfast/src/meson.build | 9 +++++++-- 3 files changed, 26 insertions(+), 16 deletions(-) diff --git a/src/py21cmfast/src/Stochasticity.c b/src/py21cmfast/src/Stochasticity.c index 1bbe05918..184660b2e 100644 --- a/src/py21cmfast/src/Stochasticity.c +++ b/src/py21cmfast/src/Stochasticity.c @@ -812,6 +812,8 @@ int sample_halo_grids(gsl_rng **rng_arr, double redshift, float *dens_field, double total_volume_dexm = 0.; double cell_volume = VOLUME / pow((double)simulation_options_global->HII_DIM, 3); + bool halo_overflow_error = false; + #pragma omp parallel num_threads(simulation_options_global->N_THREADS) { // PRIVATE VARIABLES @@ -881,16 +883,9 @@ int sample_halo_grids(gsl_rng **rng_arr, double redshift, float *dens_field, // we do not want to save these if (hm_buf[i] < simulation_options_global->SAMPLER_MIN_MASS) continue; - if (count >= arraysize_local) { - LOG_ERROR( - "More than %llu halos (expected %.1e) with buffer size factor %.1f", - arraysize_local, - arraysize_local / config_settings.HALO_CATALOG_MEM_FACTOR, - config_settings.HALO_CATALOG_MEM_FACTOR); - LOG_ERROR( - "If you expected to have an above average halo number try raising " - "config_settings.HALO_CATALOG_MEM_FACTOR"); - Throw(ValueError); + if (halo_overflow_error || count >= arraysize_local) { + halo_overflow_error = true; + continue; } random_point_in_cell((int[3]){x, y, z}, @@ -930,6 +925,16 @@ int sample_halo_grids(gsl_rng **rng_arr, double redshift, float *dens_field, nhalo_threads[threadnum] = count; } + if (halo_overflow_error) { + LOG_ERROR("More than %llu halos (expected %.1e) with buffer size factor %.1f", + arraysize_local, arraysize_local / config_settings.HALO_CATALOG_MEM_FACTOR, + config_settings.HALO_CATALOG_MEM_FACTOR); + LOG_ERROR( + "If you expected to have an above average halo number try raising " + "config_settings.HALO_CATALOG_MEM_FACTOR"); + Throw(ValueError); + } + LOG_SUPER_DEBUG("Total dexm volume %.6e Total volume excluded %.6e (In units of HII_DIM cells)", total_volume_dexm, total_volume_excluded); condense_sparse_halolist(halofield_out, istart_threads, nhalo_threads); diff --git a/src/py21cmfast/src/cosmology.c b/src/py21cmfast/src/cosmology.c index 6e489bfc5..b1cde18aa 100644 --- a/src/py21cmfast/src/cosmology.c +++ b/src/py21cmfast/src/cosmology.c @@ -126,7 +126,7 @@ double transfer_function_CLASS(double k, int flag_int, int flag_dv) { int gsl_status; FILE *F; - static bool warning_printed; + static bool warning_printed = false; static double eh_ratio_at_kmax; char filename[500]; @@ -137,7 +137,6 @@ double transfer_function_CLASS(double k, int flag_int, int flag_dv) { LOG_ERROR("Unable to open file: %s for reading.", filename); Throw(IOError); } - warning_printed = false; int nscans; for (i = 0; i < CLASS_LENGTH; i++) { @@ -190,9 +189,10 @@ double transfer_function_CLASS(double k, int flag_int, int flag_dv) { if (k > kclass[CLASS_LENGTH - 1]) { // k>kmax if (!warning_printed) { LOG_WARNING( - "Called transfer_function_CLASS with k=%f, larger than kmax! performing linear " + "Called transfer_function_CLASS with k=%f > %f, larger than kmax! performing " + "linear " "extrapolation with Eisenstein & Hu", - k); + k, kclass[CLASS_LENGTH - 1]); warning_printed = true; } if (flag_dv == 0) { // output is density diff --git a/src/py21cmfast/src/meson.build b/src/py21cmfast/src/meson.build index 2c843f88b..f9dcd6e93 100644 --- a/src/py21cmfast/src/meson.build +++ b/src/py21cmfast/src/meson.build @@ -56,8 +56,13 @@ log_level_map = { 'ULTRA_DEBUG': 6, } -# Get the LOG_LEVEL environment variable (default to 'WARNING' if not set) -log_level_str = get_option('log_level') +# Check for environment variable first, then fall back to meson option +env_log_level = run_command('printenv', 'LOG_LEVEL', check: false).stdout().strip() +if env_log_level != '' + log_level_str = env_log_level +else + log_level_str = get_option('log_level') +endif # Convert the string to an integer using the map, defaulting to 2 (warnings) if the key is invalid log_level = log_level_map.get(log_level_str, 2) From 3910e79df9df4f857c7865dd165f54c70351ec2d Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 23 Jul 2025 03:41:25 +0000 Subject: [PATCH 128/145] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/py21cmfast/src/DeviceConstants.cuh | 2 +- src/py21cmfast/src/MapMass_cpu.c | 8 ++--- src/py21cmfast/src/Stochasticity.cu | 40 ++++++++++++------------ src/py21cmfast/src/Stochasticity.cuh | 2 +- src/py21cmfast/src/cuda_hello_world.cu | 2 +- src/py21cmfast/src/cuda_hello_world.cuh | 2 +- src/py21cmfast/src/cuda_utils.cuh | 2 +- src/py21cmfast/src/device_rng.cu | 4 +-- src/py21cmfast/src/device_rng.cuh | 2 +- src/py21cmfast/src/hmf.cuh | 2 +- src/py21cmfast/src/interp_tables.c | 12 +++---- src/py21cmfast/src/interp_tables.cu | 7 ++--- src/py21cmfast/src/interpolation.cu | 1 - src/py21cmfast/src/interpolation.cuh | 2 +- src/py21cmfast/src/interpolation_types.h | 20 +++++------- tests/test_halo_sampler.py | 1 - 16 files changed, 50 insertions(+), 59 deletions(-) diff --git a/src/py21cmfast/src/DeviceConstants.cuh b/src/py21cmfast/src/DeviceConstants.cuh index f7b51486a..759a731f7 100644 --- a/src/py21cmfast/src/DeviceConstants.cuh +++ b/src/py21cmfast/src/DeviceConstants.cuh @@ -12,4 +12,4 @@ extern __constant__ double d_test_params; #endif -#endif \ No newline at end of file +#endif diff --git a/src/py21cmfast/src/MapMass_cpu.c b/src/py21cmfast/src/MapMass_cpu.c index 07a17b11e..fe451f5d9 100644 --- a/src/py21cmfast/src/MapMass_cpu.c +++ b/src/py21cmfast/src/MapMass_cpu.c @@ -35,11 +35,9 @@ double *MapMass_cpu(InitialConditions *boxes, double *resampled_box, int dimensi int dimension_z = simulation_options_global->NON_CUBIC_FACTOR * dimension_pt; int dimension_ic = simulation_options_global->DIM; -#pragma omp parallel shared(init_growth_factor, boxes, f_pixel_factor, resampled_box, \ - dimension) private(i, j, k, xi, xf, yi, yf, zi, zf, HII_i, HII_j, \ - HII_k, d_x, d_y, d_z, t_x, t_y, t_z, xp1, \ - yp1, zp1) \ - num_threads(simulation_options_global -> N_THREADS) +#pragma omp parallel shared(init_growth_factor, boxes, f_pixel_factor, resampled_box, dimension) \ + private(i, j, k, xi, xf, yi, yf, zi, zf, HII_i, HII_j, HII_k, d_x, d_y, d_z, t_x, t_y, t_z, \ + xp1, yp1, zp1) num_threads(simulation_options_global -> N_THREADS) { #pragma omp for for (i = 0; i < dimension_ic; i++) { diff --git a/src/py21cmfast/src/Stochasticity.cu b/src/py21cmfast/src/Stochasticity.cu index f226578a8..2d0e4712c 100644 --- a/src/py21cmfast/src/Stochasticity.cu +++ b/src/py21cmfast/src/Stochasticity.cu @@ -100,7 +100,7 @@ void condense_device_vector() // // Wrap the raw device pointer into a thrust device pointer // thrust::device_ptr d_array_ptr(d_array); -// // Remove elements with mask value +// // Remove elements with mask value // // i.e.move elements not equal to mask value to the beginning of the array without changing order // auto new_end = thrust::remove(d_array_ptr, d_array_ptr + original_size, mask_value); @@ -133,7 +133,7 @@ int condenseDeviceArray(T *d_array, int original_size, T mask_value) return valid_size; } -// todo: maybe add python wrapper for test functions +// todo: maybe add python wrapper for test functions void testCondenseDeviceArray() { // Input data @@ -291,7 +291,7 @@ struct GridLayout{ int n_threads; int n_blocks; }; -// calculate workload +// calculate workload // todo: add more checks on sparsity GridLayout getWorkload(int sparsity, unsigned long long int n_halos){ GridLayout res; @@ -387,7 +387,7 @@ __device__ void fix_mass_sample(curandState *state, double exp_M, float *M_prog, int random_idx; double last_M_del; int sel = curand(state) % 2; - // int sel = 1; //tmp: implement the first case + // int sel = 1; //tmp: implement the first case if (sel) { if (fabs(*M_prog - M_out[write_limit] - exp_M) < fabs(*M_prog - exp_M)) @@ -412,7 +412,7 @@ __device__ void fix_mass_sample(curandState *state, double exp_M, float *M_prog, M_out[random_idx] = last_M_del; *M_prog += last_M_del; } - + } } @@ -573,13 +573,13 @@ __device__ void set_prop_rng(curandState *state, bool from_catalog, double *inte return; } -__global__ void update_halo_constants(float *d_halo_masses, float *d_star_rng_in, float *d_sfr_rng_in, float *d_xray_rng_in, +__global__ void update_halo_constants(float *d_halo_masses, float *d_star_rng_in, float *d_sfr_rng_in, float *d_xray_rng_in, int *d_halo_coords_in, float *d_y_arr, double x_min, double x_width, unsigned long long int n_halos, int n_bin, struct HaloSamplingConstants d_hs_constants, int HMF, float *d_halo_masses_out, float *d_star_rng_out, float *d_sfr_rng_out, float *d_xray_rng_out, int *d_halo_coords_out, int *d_sum_check, - int *d_further_process, int *d_nprog_predict, int sparsity, unsigned long long int write_offset, + int *d_further_process, int *d_nprog_predict, int sparsity, unsigned long long int write_offset, double *expected_mass, int *d_n_prog, int offset_shared) { // Define shared memory for block-level reduction @@ -600,11 +600,11 @@ __global__ void update_halo_constants(float *d_halo_masses, float *d_star_rng_in for (int i=0;i<3;i++){ shared_prop_rng[tid+i*offset_shared] = 0.0f; } - + // get global thread idx int ind = blockIdx.x * blockDim.x + threadIdx.x; - + // get halo idx int hid = ind / sparsity; if (hid >= n_halos) @@ -661,7 +661,7 @@ __global__ void update_halo_constants(float *d_halo_masses, float *d_star_rng_in // printf("tmp res4 on gpu: %.17f \n", res4); // // tiger tmp: debug (end) // } - + curandState local_state = d_randStates[ind]; // if (blockIdx.x > 100000){ // // printf("check here. \n"); @@ -681,7 +681,7 @@ __global__ void update_halo_constants(float *d_halo_masses, float *d_star_rng_in // get stochastic halo properties set_prop_rng(&local_state, true, corr_arr, prop_in, &shared_prop_rng[tid*3]); - + __syncthreads(); @@ -716,11 +716,11 @@ __global__ void update_halo_constants(float *d_halo_masses, float *d_star_rng_in break; } } - + if (meetCondition){ // correct the mass samples int n_prog = write_limit +1; - + fix_mass_sample(&local_state, d_hs_constants.expected_M, &Mprog, &shared_mass[tid], write_limit, &n_prog); // record number of progenitors @@ -903,7 +903,7 @@ int updateHaloOut(float *halo_masses, float *star_rng, float *sfr_rng, float *xr printf("start launching kernel function.\n"); update_halo_constants<<>>(d_halo_masses, d_star_rng, d_sfr_rng, d_xray_rng, d_halo_coords, d_y_arr, x_min, x_width, n_halos_tbp, n_bin_y, hs_constants, HMF, d_halo_masses_out, d_star_rng_out, - d_sfr_rng_out, d_xray_rng_out, d_halo_coords_out, d_sum_check, d_further_process, d_nprog_predict, sparsity, write_offset, d_expected_mass, + d_sfr_rng_out, d_xray_rng_out, d_halo_coords_out, d_sum_check, d_further_process, d_nprog_predict, sparsity, write_offset, d_expected_mass, d_n_prog, offset_shared); // Check kernel launch errors @@ -915,7 +915,7 @@ int updateHaloOut(float *halo_masses, float *star_rng, float *sfr_rng, float *xr n_halos_tbp = filterWithMask(d_halo_masses, d_further_process, n_halos_tbp); printf("The number of halos for further processing: %d \n", n_halos_tbp); - // // tmp 2025-01-19: check d_halo_masses_out writing out + // // tmp 2025-01-19: check d_halo_masses_out writing out // float *h_halo_masses_out_check; // CALL_CUDA(cudaHostAlloc((void **)&h_halo_masses_out_check, buffer_size, cudaHostAllocDefault)); // CALL_CUDA(cudaMemcpy(h_halo_masses_out_check, d_halo_masses_out, buffer_size, cudaMemcpyDeviceToHost)); @@ -935,7 +935,7 @@ int updateHaloOut(float *halo_masses, float *star_rng, float *sfr_rng, float *xr // condense halo mass array on the device n_processed_prog = condenseDeviceArray(d_halo_masses_out, d_n_buffer, 0.0f); printf("The number of progenitors written in out halo field so far: %d \n", n_processed_prog); - + // condense other halo field arrays on the device unsigned long long int n_processed_star_rng = condenseDeviceArray(d_star_rng_out, d_n_buffer, 0.0f); printf("The number of star prop rng written in out halo field so far: %d \n", n_processed_star_rng); @@ -976,7 +976,7 @@ int updateHaloOut(float *halo_masses, float *star_rng, float *sfr_rng, float *xr // reset mask array CALL_CUDA(cudaMemset(d_further_process, 0, sizeof(int) * n_halos)); - + // copy data from device to host int h_sum_check; CALL_CUDA(cudaMemcpy(&h_sum_check, d_sum_check, sizeof(int), cudaMemcpyDeviceToHost)); @@ -990,7 +990,7 @@ int updateHaloOut(float *halo_masses, float *star_rng, float *sfr_rng, float *xr // write data back to the host halofield_out->n_halos = n_processed_prog; size_t out_size = sizeof(float) * n_processed_prog; - + // float *h_halo_masses_out; // CALL_CUDA(cudaHostAlloc((void **)&h_halo_masses_out, out_size, cudaHostAllocDefault)); CALL_CUDA(cudaGetLastError()); @@ -998,7 +998,7 @@ int updateHaloOut(float *halo_masses, float *star_rng, float *sfr_rng, float *xr CALL_CUDA(cudaMemcpy(halofield_out->halo_masses, d_halo_masses_out, out_size, cudaMemcpyDeviceToHost)); - + CALL_CUDA(cudaMemcpy(halofield_out->star_rng, d_star_rng_out, out_size, cudaMemcpyDeviceToHost)); CALL_CUDA(cudaMemcpy(halofield_out->sfr_rng, d_sfr_rng_out, out_size, cudaMemcpyDeviceToHost)); CALL_CUDA(cudaMemcpy(halofield_out->xray_rng, d_xray_rng_out, out_size, cudaMemcpyDeviceToHost)); @@ -1006,7 +1006,7 @@ int updateHaloOut(float *halo_masses, float *star_rng, float *sfr_rng, float *xr size_t out_coords_size = sizeof(int) * n_processed_prog * 3; CALL_CUDA(cudaMemcpy(halofield_out->halo_coords, d_halo_coords_out, out_coords_size, cudaMemcpyDeviceToHost)); - + // Free device memory CALL_CUDA(cudaFree(d_halo_masses)); CALL_CUDA(cudaFree(d_y_arr)); diff --git a/src/py21cmfast/src/Stochasticity.cuh b/src/py21cmfast/src/Stochasticity.cuh index 4c71d5705..25a1670aa 100644 --- a/src/py21cmfast/src/Stochasticity.cuh +++ b/src/py21cmfast/src/Stochasticity.cuh @@ -14,4 +14,4 @@ extern "C" } #endif -#endif \ No newline at end of file +#endif diff --git a/src/py21cmfast/src/cuda_hello_world.cu b/src/py21cmfast/src/cuda_hello_world.cu index b6bf6298b..f63633844 100644 --- a/src/py21cmfast/src/cuda_hello_world.cu +++ b/src/py21cmfast/src/cuda_hello_world.cu @@ -28,4 +28,4 @@ void print_key_device_properties(){ printf("Memory pitch: %zu bytes \n", deviceProp.memPitch); printf("Max threads per block: %d \n", deviceProp.maxThreadsPerBlock); printf("Total constant memory: %zu bytes \n", deviceProp.totalConstMem); -} \ No newline at end of file +} diff --git a/src/py21cmfast/src/cuda_hello_world.cuh b/src/py21cmfast/src/cuda_hello_world.cuh index 27cf56995..5a34921db 100644 --- a/src/py21cmfast/src/cuda_hello_world.cuh +++ b/src/py21cmfast/src/cuda_hello_world.cuh @@ -11,4 +11,4 @@ extern "C" } #endif -#endif // _CUDA_HELLO_WORLD_CUH \ No newline at end of file +#endif // _CUDA_HELLO_WORLD_CUH diff --git a/src/py21cmfast/src/cuda_utils.cuh b/src/py21cmfast/src/cuda_utils.cuh index 57e596cb9..f2e992651 100644 --- a/src/py21cmfast/src/cuda_utils.cuh +++ b/src/py21cmfast/src/cuda_utils.cuh @@ -15,4 +15,4 @@ } \ } while (0) -#endif \ No newline at end of file +#endif diff --git a/src/py21cmfast/src/device_rng.cu b/src/py21cmfast/src/device_rng.cu index 218058369..eda652e39 100644 --- a/src/py21cmfast/src/device_rng.cu +++ b/src/py21cmfast/src/device_rng.cu @@ -15,11 +15,11 @@ __global__ void initRandStates(unsigned long long int random_seed, int totalStat { // get thread idx int ind = blockIdx.x * blockDim.x + threadIdx.x; - + if (ind < totalStates){ curand_init(random_seed, ind, 0, &d_randStates[ind]); - // todo: add the following block to debug + // todo: add the following block to debug if (ind < 2) { printf("temp check rng init.\n"); diff --git a/src/py21cmfast/src/device_rng.cuh b/src/py21cmfast/src/device_rng.cuh index 2f94321ab..4d2b449d8 100644 --- a/src/py21cmfast/src/device_rng.cuh +++ b/src/py21cmfast/src/device_rng.cuh @@ -21,4 +21,4 @@ extern "C" } #endif -#endif +#endif diff --git a/src/py21cmfast/src/hmf.cuh b/src/py21cmfast/src/hmf.cuh index d942de874..84316a5ef 100644 --- a/src/py21cmfast/src/hmf.cuh +++ b/src/py21cmfast/src/hmf.cuh @@ -21,4 +21,4 @@ __device__ double sheth_delc_fixed(double del, double sig); __device__ double get_delta_crit(int HMF, double sigma, double growthf); // #endif -#endif \ No newline at end of file +#endif diff --git a/src/py21cmfast/src/interp_tables.c b/src/py21cmfast/src/interp_tables.c index 25d67caf4..9ed0b88d2 100644 --- a/src/py21cmfast/src/interp_tables.c +++ b/src/py21cmfast/src/interp_tables.c @@ -115,7 +115,7 @@ void initialise_SFRD_spline(int Nbin, float zmin, float zmax, struct ScalingCons SFRD_z_table_MINI.y_width = (LOG10_MTURN_MAX - LOG10_MTURN_MIN) / ((double)NMTURN - 1.); } -#pragma omp parallel private(i, j) num_threads(simulation_options_global->N_THREADS) +#pragma omp parallel private(i, j) num_threads(simulation_options_global -> N_THREADS) { struct ScalingConstants sc_sfrd; sc_sfrd = evolve_scaling_constants_sfr(sc); @@ -181,7 +181,7 @@ void initialise_Nion_Ts_spline(int Nbin, float zmin, float zmax, struct ScalingC Nion_z_table_MINI.y_width = (LOG10_MTURN_MAX - LOG10_MTURN_MIN) / ((double)NMTURN - 1.); } -#pragma omp parallel private(i, j) num_threads(simulation_options_global->N_THREADS) +#pragma omp parallel private(i, j) num_threads(simulation_options_global -> N_THREADS) { struct ScalingConstants sc_z; double mturn_mcg; @@ -353,7 +353,7 @@ void initialise_Nion_Conditional_spline(double z, double min_density, double max } } -#pragma omp parallel private(i, j) num_threads(simulation_options_global->N_THREADS) +#pragma omp parallel private(i, j) num_threads(simulation_options_global -> N_THREADS) { #pragma omp for for (i = 0; i < NDELTA; i++) { @@ -451,7 +451,7 @@ void initialise_SFRD_Conditional_table(double z, double min_density, double max_ struct ScalingConstants sc_sfrd = evolve_scaling_constants_sfr(sc); -#pragma omp parallel private(i, k) num_threads(simulation_options_global->N_THREADS) +#pragma omp parallel private(i, k) num_threads(simulation_options_global -> N_THREADS) { double curr_dens; #pragma omp for @@ -530,7 +530,7 @@ void initialise_Xray_Conditional_table(double redshift, double min_density, doub Xray_conditional_table_1D.x_width = (max_density - min_density) / (NDELTA - 1.); } -#pragma omp parallel private(i, k) num_threads(simulation_options_global->N_THREADS) +#pragma omp parallel private(i, k) num_threads(simulation_options_global -> N_THREADS) { double curr_dens; #pragma omp for @@ -1143,7 +1143,7 @@ void initialiseSigmaMInterpTable(float M_min, float M_max) { dSigmasqdm_InterpTable.x_min = log(M_min); dSigmasqdm_InterpTable.x_width = (log(M_max) - log(M_min)) / (N_MASS_INTERP - 1.); -#pragma omp parallel private(i) num_threads(simulation_options_global->N_THREADS) +#pragma omp parallel private(i) num_threads(simulation_options_global -> N_THREADS) { float Mass; #pragma omp for diff --git a/src/py21cmfast/src/interp_tables.cu b/src/py21cmfast/src/interp_tables.cu index c47013c4a..1f0702af5 100644 --- a/src/py21cmfast/src/interp_tables.cu +++ b/src/py21cmfast/src/interp_tables.cu @@ -26,7 +26,7 @@ __constant__ double d_Mcoll_yarr[device_n_max]; // copy tables to gpu void copyTablesToDevice(RGTable1D h_Nhalo_table, RGTable1D h_Mcoll_table, RGTable2D h_Nhalo_inv_table) { - // copy Nhalo table and its member y_arr + // copy Nhalo table and its member y_arr size_t size_Nhalo_yarr = sizeof(double) * h_Nhalo_table.n_bin; // get a copy of the Nhalo table RGTable1D h_Nhalo_table_to_device = h_Nhalo_table; @@ -66,7 +66,7 @@ void copyTablesToDevice(RGTable1D h_Nhalo_table, RGTable1D h_Mcoll_table, RGTabl size_t size_Nhalo_inv_flatten_data = sizeof(double) * h_Nhalo_inv_table.nx_bin * h_Nhalo_inv_table.ny_bin; // get a copy of Nhalo_inv_table RGTable2D h_Nhalo_inv_table_to_device = h_Nhalo_inv_table; - + double *d_Nhalo_flatten_data; CALL_CUDA(cudaMalloc(&d_Nhalo_flatten_data, size_Nhalo_inv_flatten_data)); CALL_CUDA(cudaMemcpy(d_Nhalo_flatten_data, h_Nhalo_inv_table.flatten_data, size_Nhalo_inv_flatten_data, cudaMemcpyHostToDevice)); @@ -82,7 +82,7 @@ void copyTablesToDevice(RGTable1D h_Nhalo_table, RGTable1D h_Mcoll_table, RGTabl CALL_CUDA(cudaMalloc(&d_z_arr, size_z_arr)); CALL_CUDA(cudaMemcpy(d_z_arr, z_arr_to_device, size_z_arr, cudaMemcpyHostToDevice)); - + // free data after it's been copied to the device CALL_CUDA(cudaFreeHost(z_arr_to_device)); @@ -155,4 +155,3 @@ __device__ double EvaluateNhalo(double condition, double growthf, double lnMmin, // todo: implement Nhalo_Conditional return 0; } - diff --git a/src/py21cmfast/src/interpolation.cu b/src/py21cmfast/src/interpolation.cu index fe057373d..761ea6b94 100644 --- a/src/py21cmfast/src/interpolation.cu +++ b/src/py21cmfast/src/interpolation.cu @@ -40,4 +40,3 @@ __device__ double EvaluateRGTable2D(double x, double y, RGTable2D *table) return result; } - diff --git a/src/py21cmfast/src/interpolation.cuh b/src/py21cmfast/src/interpolation.cuh index 1206ef8a1..2ea3bffb7 100644 --- a/src/py21cmfast/src/interpolation.cuh +++ b/src/py21cmfast/src/interpolation.cuh @@ -11,4 +11,4 @@ __device__ double EvaluateRGTable2D(double x, double y, RGTable2D *table); #endif -#endif \ No newline at end of file +#endif diff --git a/src/py21cmfast/src/interpolation_types.h b/src/py21cmfast/src/interpolation_types.h index 3d1f3c649..c869dc248 100644 --- a/src/py21cmfast/src/interpolation_types.h +++ b/src/py21cmfast/src/interpolation_types.h @@ -1,8 +1,7 @@ #ifndef _INTERPOLATION_TYPES_H #define _INTERPOLATION_TYPES_H -typedef struct RGTable1D -{ +typedef struct RGTable1D { int n_bin; double x_min; double x_width; @@ -11,21 +10,19 @@ typedef struct RGTable1D bool allocated; } RGTable1D; -typedef struct RGTable2D -{ +typedef struct RGTable2D { int nx_bin, ny_bin; double x_min, y_min; double x_width, y_width; double **z_arr; double *flatten_data; - - double saved_ll, saved_ul; // for future acceleration + + double saved_ll, saved_ul; // for future acceleration bool allocated; } RGTable2D; -typedef struct RGTable1D_f -{ +typedef struct RGTable1D_f { int n_bin; double x_min; double x_width; @@ -34,16 +31,15 @@ typedef struct RGTable1D_f bool allocated; } RGTable1D_f; -typedef struct RGTable2D_f -{ +typedef struct RGTable2D_f { int nx_bin, ny_bin; double x_min, y_min; double x_width, y_width; float **z_arr; - double saved_ll, saved_ul; // for future acceleration + double saved_ll, saved_ul; // for future acceleration bool allocated; } RGTable2D_f; -#endif \ No newline at end of file +#endif diff --git a/tests/test_halo_sampler.py b/tests/test_halo_sampler.py index b3eb0c927..961417b19 100644 --- a/tests/test_halo_sampler.py +++ b/tests/test_halo_sampler.py @@ -4,7 +4,6 @@ import numpy as np import pytest -import py21cmfast.c_21cmfast as lib from py21cmfast import ( compute_halo_grid, compute_initial_conditions, From 8f168ab6c34c34fb6381f8719843e316f0915e48 Mon Sep 17 00:00:00 2001 From: James Davies Date: Wed, 23 Jul 2025 17:32:02 +1000 Subject: [PATCH 129/145] fix code duplication from merge --- src/py21cmfast/src/Stochasticity.c | 69 +++++++++++------------------- 1 file changed, 24 insertions(+), 45 deletions(-) diff --git a/src/py21cmfast/src/Stochasticity.c b/src/py21cmfast/src/Stochasticity.c index 184660b2e..5a0bb2d2f 100644 --- a/src/py21cmfast/src/Stochasticity.c +++ b/src/py21cmfast/src/Stochasticity.c @@ -1002,6 +1002,7 @@ int sample_halo_progenitors(gsl_rng **rng_arr, double z_in, double z_out, HaloFi Throw(ValueError); #endif } else { // CPU fallback + bool halo_overflow_error = false; #pragma omp parallel num_threads(simulation_options_global->N_THREADS) { float prog_buf[MAX_HALO_CELL]; @@ -1055,24 +1056,10 @@ int sample_halo_progenitors(gsl_rng **rng_arr, double z_in, double z_out, HaloFi // we do not want to save these if (prog_buf[jj] < simulation_options_global->SAMPLER_MIN_MASS) continue; - if (count >= arraysize_local) { - LOG_ERROR( - "More than %llu halos (expected %.1e) with buffer size factor %.1f", - arraysize_local, - arraysize_local / config_settings.HALO_CATALOG_MEM_FACTOR, - config_settings.HALO_CATALOG_MEM_FACTOR); - LOG_ERROR( - "Input Mass = %.2e at %llu of %llu, something went wrong in the input " - "catalogue", - M2, ii, nhalo_in); - Throw(ValueError); + if (halo_overflow_error || count >= arraysize_local) { + halo_overflow_error = true; + continue; } - // set condition-dependent variables for sampling - stoc_set_consts_cond(&hs_constants_priv, M2); - - // Sample the CMF set by the descendant - stoc_sample(&hs_constants_priv, rng_arr[threadnum], &n_prog, prog_buf); - halofield_out->halo_masses[istart + count] = prog_buf[jj]; // Place the progenitor in a random position within the condition @@ -1082,6 +1069,7 @@ int sample_halo_progenitors(gsl_rng **rng_arr, double z_in, double z_out, HaloFi random_point_in_sphere(pos_desc, R2 - R1, rng_arr[threadnum], pos_prog); wrap_position(pos_prog, boxlen); + set_prop_rng(rng_arr[threadnum], true, corr_arr, propbuf_in, propbuf_out); halofield_out->halo_coords[3 * (istart + count) + 0] = pos_prog[0]; halofield_out->halo_coords[3 * (istart + count) + 1] = pos_prog[1]; halofield_out->halo_coords[3 * (istart + count) + 2] = pos_prog[2]; @@ -1102,55 +1090,46 @@ int sample_halo_progenitors(gsl_rng **rng_arr, double z_in, double z_out, HaloFi Throw(ValueError); } - set_prop_rng(rng_arr[threadnum], true, corr_arr, propbuf_in, propbuf_out); - - halofield_out->halo_masses[istart + count] = prog_buf[jj]; - halofield_out->halo_coords[3 * (istart + count) + 0] = - halofield_in->halo_coords[3 * ii + 0]; - halofield_out->halo_coords[3 * (istart + count) + 1] = - halofield_in->halo_coords[3 * ii + 1]; - halofield_out->halo_coords[3 * (istart + count) + 2] = - halofield_in->halo_coords[3 * ii + 2]; - - halofield_out->star_rng[istart + count] = propbuf_out[0]; - halofield_out->sfr_rng[istart + count] = propbuf_out[1]; - halofield_out->xray_rng[istart + count] = propbuf_out[2]; - count++; - if (ii == 0) { M_prog += prog_buf[jj]; - LOG_ULTRA_DEBUG( - "First Halo Prog %d: Mass %.2e Stellar %.2e SFR %.2e XRAY %.2e e_d " - "%.3f", - jj, prog_buf[jj], propbuf_out[0], propbuf_out[1], propbuf_out[2], - Deltac * hs_constants->growth_out / hs_constants->growth_in); + "Halo %d Prog %d: Mass %.2e Stellar %.2e SFR %.2e XRAY %.2e", ii, jj, + prog_buf[jj], propbuf_out[0], propbuf_out[1], propbuf_out[2]); } } if (ii == 0) { LOG_ULTRA_DEBUG( " HMF %d delta %.3f delta_coll %.3f delta_desc %.3f adjusted %.3f", - simulation_options_global->HMF, hs_constants_priv.delta, - get_delta_crit(simulation_options_global->HMF, hs_constants_priv.sigma_cond, + matter_options_global->HMF, hs_constants_priv.delta, + get_delta_crit(matter_options_global->HMF, hs_constants_priv.sigma_cond, hs_constants->growth_out), - get_delta_crit(simulation_options_global->HMF, hs_constants_priv.sigma_cond, + get_delta_crit(matter_options_global->HMF, hs_constants_priv.sigma_cond, hs_constants->growth_in), - get_delta_crit(simulation_options_global->HMF, hs_constants_priv.sigma_cond, + get_delta_crit(matter_options_global->HMF, hs_constants_priv.sigma_cond, hs_constants->growth_in) * hs_constants->growth_out / hs_constants->growth_in); print_hs_consts(&hs_constants_priv); - LOG_SUPER_DEBUG( - "First Halo: Mass %.2f | N %d (exp. %.2e) | Total M %.2e (exp. %.2e)", M2, + } + if (ii == 0) { + LOG_ULTRA_DEBUG( + "Halo %d: Mass %.2f | N %d (exp. %.2e) | Total M %.2e (exp. %.2e)", ii, M2, n_prog, hs_constants_priv.expected_N, M_prog, hs_constants_priv.expected_M); } } istart_threads[threadnum] = istart; nhalo_threads[threadnum] = count; } + if (halo_overflow_error) { + LOG_ERROR("More than %llu halos (expected %.1e) with buffer size factor %.1f", + arraysize_local, arraysize_local / config_settings.HALO_CATALOG_MEM_FACTOR, + config_settings.HALO_CATALOG_MEM_FACTOR); + LOG_ERROR( + "If you expected to have an above average halo number try raising " + "config_settings.HALO_CATALOG_MEM_FACTOR"); + Throw(ValueError); + } condense_sparse_halolist(halofield_out, istart_threads, nhalo_threads); - return 0; } - return 0; } From 2a26701d92af033a1579263c89db10866fe321cd Mon Sep 17 00:00:00 2001 From: James Davies Date: Fri, 25 Jul 2025 15:23:20 +1000 Subject: [PATCH 130/145] use setuptools_scm for versioning --- .bumpversion.cfg | 6 ------ VERSION | 1 - bump | 18 ------------------ meson.build | 3 ++- pyproject.toml | 6 +++++- 5 files changed, 7 insertions(+), 27 deletions(-) delete mode 100644 .bumpversion.cfg delete mode 100644 VERSION delete mode 100755 bump diff --git a/.bumpversion.cfg b/.bumpversion.cfg deleted file mode 100644 index 97e8d915f..000000000 --- a/.bumpversion.cfg +++ /dev/null @@ -1,6 +0,0 @@ -[bumpversion] -current_version = 3.4.0 -commit = False -tag = False - -[bumpversion:file:VERSION] diff --git a/VERSION b/VERSION deleted file mode 100644 index 18091983f..000000000 --- a/VERSION +++ /dev/null @@ -1 +0,0 @@ -3.4.0 diff --git a/bump b/bump deleted file mode 100755 index 1d8fa939d..000000000 --- a/bump +++ /dev/null @@ -1,18 +0,0 @@ -#!/usr/bin/bash -set -e - -PART=$1 - -OLDVERSION=$(cat VERSION) -NEWVERSION=$(bump2version --dry-run --list ${PART} | grep new_version | sed -r s,"^.*=",,) -echo "New Version: ${NEWVERSION}" - -# Actually Run The Update -bump2version $PART - -# Now add in stuff to the changelog -python changethelog.py ${NEWVERSION} - -# Now commit -git add . -git commit -m "Bump Version: ${OLDVERSION} -> ${NEWVERSION}" diff --git a/meson.build b/meson.build index 0eb63bc09..d15024636 100644 --- a/meson.build +++ b/meson.build @@ -1,5 +1,6 @@ +#To find the version we need to find python and run setuptools-scm, while keeping the project() call first project('21cmFAST', [ 'c', 'cpp' ], - version : run_command('cat', 'VERSION').stdout().strip(), + version : run_command(find_program('python'), '-c', 'from setuptools_scm import get_version; print(get_version())', check: true).stdout().strip(), default_options : ['cpp_std=c++17'], ) diff --git a/pyproject.toml b/pyproject.toml index df7f0f0ab..d8f8647b7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -112,11 +112,15 @@ Changelog="https://github.com/21cmFAST/21cmFAST" [build-system] build-backend = 'mesonpy' -requires = ['meson-python','nanobind>=2.4.0'] +requires = ['meson-python', 'nanobind>=2.4.0', 'setuptools-scm>=8'] [tool.meson-python.args] setup = ["-Dbuildtype=release"] +[tool.setuptools_scm] +version_file = "src/py21cmfast/_version.py" +fallback_version = "4.0.0b1" + [tool.pytest.ini_options] testpaths = ["tests"] From 7156183c96f93a7dcf20bd29362c81d81c053b23 Mon Sep 17 00:00:00 2001 From: James Davies Date: Tue, 29 Jul 2025 14:37:24 +1000 Subject: [PATCH 131/145] delete old RSD params --- src/py21cmfast/src/_wrapper.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/py21cmfast/src/_wrapper.cpp b/src/py21cmfast/src/_wrapper.cpp index d4e479637..b6099fef0 100644 --- a/src/py21cmfast/src/_wrapper.cpp +++ b/src/py21cmfast/src/_wrapper.cpp @@ -113,7 +113,6 @@ NB_MODULE(c_21cmfast, m) { .def_rw("FIXED_VAVG", &AstroParams::FIXED_VAVG) .def_rw("POP2_ION", &AstroParams::POP2_ION) .def_rw("POP3_ION", &AstroParams::POP3_ION) - .def_rw("N_RSD_STEPS", &AstroParams::N_RSD_STEPS) .def_rw("PHOTONCONS_CALIBRATION_END", &AstroParams::PHOTONCONS_CALIBRATION_END) .def_rw("CLUMPING_FACTOR", &AstroParams::CLUMPING_FACTOR) .def_rw("ALPHA_UVB", &AstroParams::ALPHA_UVB) @@ -132,8 +131,6 @@ NB_MODULE(c_21cmfast, m) { .def_rw("USE_CMB_HEATING", &AstroOptions::USE_CMB_HEATING) .def_rw("USE_LYA_HEATING", &AstroOptions::USE_LYA_HEATING) .def_rw("USE_MASS_DEPENDENT_ZETA", &AstroOptions::USE_MASS_DEPENDENT_ZETA) - .def_rw("SUBCELL_RSD", &AstroOptions::SUBCELL_RSD) - .def_rw("APPLY_RSDS", &AstroOptions::APPLY_RSDS) .def_rw("INHOMO_RECO", &AstroOptions::INHOMO_RECO) .def_rw("USE_TS_FLUCT", &AstroOptions::USE_TS_FLUCT) .def_rw("M_MIN_in_Mass", &AstroOptions::M_MIN_in_Mass) From fbf41ebcd42e402ee759a4cd31d5f16547a1944e Mon Sep 17 00:00:00 2001 From: James Davies Date: Tue, 29 Jul 2025 15:18:59 +1000 Subject: [PATCH 132/145] rename parameters in CUDA files --- src/py21cmfast/src/HaloField.cu | 6 ++++-- src/py21cmfast/src/IonisationBox.cu | 10 +++++----- src/py21cmfast/src/MapMass_gpu.cu | 24 ++++++++++-------------- src/py21cmfast/src/Stochasticity.cu | 20 ++++++++++---------- 4 files changed, 29 insertions(+), 31 deletions(-) diff --git a/src/py21cmfast/src/HaloField.cu b/src/py21cmfast/src/HaloField.cu index 6be600b7e..0390316c2 100644 --- a/src/py21cmfast/src/HaloField.cu +++ b/src/py21cmfast/src/HaloField.cu @@ -7,12 +7,14 @@ #include "HaloField.cuh" // define relevant variables stored in constant memory -__constant__ SimulationOptions d_user_params; +__constant__ MatterOptions d_matter_options; +__constant__ SimulationOptions d_simulation_options; __constant__ CosmoParams d_cosmo_params; __constant__ AstroParams d_astro_params; -void updateGlobalParams(SimulationOptions *h_user_params, CosmoParams *h_cosmo_params, AstroParams *h_astro_params){ +void updateGlobalParams(SimulationOptions *h_simulation_options, MatterOptions * h_matter_options, CosmoParams *h_cosmo_params, AstroParams *h_astro_params){ cudaMemcpyToSymbol(d_simulation_options, h_simulation_options, sizeof(SimulationOptions), 0, cudaMemcpyHostToDevice); + cudaMemcpyToSymbol(d_matter_options, h_matter_options, sizeof(MatterOptions), 0, cudaMemcpyHostToDevice); cudaMemcpyToSymbol(d_cosmo_params, h_cosmo_params, sizeof(CosmoParams), 0, cudaMemcpyHostToDevice); cudaMemcpyToSymbol(d_astro_params, h_astro_params, sizeof(AstroParams), 0, cudaMemcpyHostToDevice); } diff --git a/src/py21cmfast/src/IonisationBox.cu b/src/py21cmfast/src/IonisationBox.cu index 83b105d4f..f0d473825 100644 --- a/src/py21cmfast/src/IonisationBox.cu +++ b/src/py21cmfast/src/IonisationBox.cu @@ -120,7 +120,7 @@ void init_ionbox_gpu_data( sizeof(fftwf_complex) * hii_kspace_num_pixels)); // dereference the pointer to a pointer (*) - if (flag_options_global->USE_TS_FLUCT) { + if (astro_options_global->USE_TS_FLUCT) { CALL_CUDA(cudaMalloc((void **)d_xe_filtered, sizeof(fftwf_complex) * hii_kspace_num_pixels)); CALL_CUDA(cudaMemset(*d_xe_filtered, 0, @@ -175,7 +175,7 @@ void calculate_fcoll_grid_gpu( CALL_CUDA(cudaMemcpy(d_deltax_filtered, h_deltax_filtered, sizeof(fftwf_complex) * hii_kspace_num_pixels, cudaMemcpyHostToDevice)); - if (flag_options_global->USE_TS_FLUCT) { + if (astro_options_global->USE_TS_FLUCT) { CALL_CUDA(cudaMemcpy(d_xe_filtered, h_xe_filtered, sizeof(fftwf_complex) * hii_kspace_num_pixels, cudaMemcpyHostToDevice)); @@ -188,7 +188,7 @@ void calculate_fcoll_grid_gpu( // TODO: Can I pass these straight to kernel? (or access in kernel w/ Tiger's // method) double fract_float_err = FRACT_FLOAT_ERR; - bool use_ts_fluct = flag_options_global->USE_TS_FLUCT; + bool use_ts_fluct = astro_options_global->USE_TS_FLUCT; long long hii_d = HII_D; long long hii_d_para = HII_D_PARA; long long hii_mid_para = HII_MID_PARA; @@ -218,7 +218,7 @@ void calculate_fcoll_grid_gpu( CALL_CUDA(cudaMemcpy(h_deltax_filtered, d_deltax_filtered, sizeof(fftwf_complex) * hii_kspace_num_pixels, cudaMemcpyDeviceToHost)); - if (flag_options_global->USE_TS_FLUCT) { + if (astro_options_global->USE_TS_FLUCT) { CALL_CUDA(cudaMemcpy(h_xe_filtered, d_xe_filtered, sizeof(fftwf_complex) * hii_kspace_num_pixels, cudaMemcpyDeviceToHost)); @@ -231,7 +231,7 @@ void free_ionbox_gpu_data( fftwf_complex **d_xe_filtered, float **d_y_arr, float **d_Fcoll) { CALL_CUDA(cudaFree( *d_deltax_filtered)); // Need to dereference the pointers to pointers (*) - if (flag_options_global->USE_TS_FLUCT) { + if (astro_options_global->USE_TS_FLUCT) { CALL_CUDA(cudaFree(*d_xe_filtered)); } CALL_CUDA(cudaFree(*d_y_arr)); diff --git a/src/py21cmfast/src/MapMass_gpu.cu b/src/py21cmfast/src/MapMass_gpu.cu index bcf8ad2b2..d2034ee37 100644 --- a/src/py21cmfast/src/MapMass_gpu.cu +++ b/src/py21cmfast/src/MapMass_gpu.cu @@ -227,16 +227,12 @@ __global__ void perturb_density_field_kernel( } double* MapMass_gpu( - UserParams *user_params, CosmoParams *cosmo_params, InitialConditions *boxes, double *resampled_box, + InitialConditions *boxes, double *resampled_box, int dimension, float f_pixel_factor, float init_growth_factor ) { - // Makes the parameter structs visible to a variety of functions/macros - // Do each time to avoid Python garbage collection issues - Broadcast_struct_global_noastro(user_params, cosmo_params); - // Box shapes from outputs.py and convenience macros size_t size_double, size_float; - if(user_params->PERTURB_ON_HIGH_RES) { + if(matter_options_global->PERTURB_ON_HIGH_RES) { size_double = TOT_NUM_PIXELS * sizeof(double); size_float = TOT_NUM_PIXELS * sizeof(float); } @@ -281,7 +277,7 @@ double* MapMass_gpu( float* lowres_vy_2LPT; float* lowres_vz_2LPT; - if (user_params->PERTURB_ON_HIGH_RES) { + if (matter_options_global->PERTURB_ON_HIGH_RES) { cudaMalloc(&hires_vx, size_float); cudaMalloc(&hires_vy, size_float); cudaMalloc(&hires_vz, size_float); @@ -297,8 +293,8 @@ double* MapMass_gpu( cudaMemcpy(lowres_vy, boxes->lowres_vy, size_float, cudaMemcpyHostToDevice); cudaMemcpy(lowres_vz, boxes->lowres_vz, size_float, cudaMemcpyHostToDevice); } - if (user_params->USE_2LPT) { - if (user_params->PERTURB_ON_HIGH_RES) { + if (matter_options_global->USE_2LPT) { + if (matter_options_global->PERTURB_ON_HIGH_RES) { cudaMalloc(&hires_vx_2LPT, size_float); cudaMalloc(&hires_vy_2LPT, size_float); cudaMalloc(&hires_vz_2LPT, size_float); @@ -333,8 +329,8 @@ double* MapMass_gpu( perturb_density_field_kernel<<>>( d_resampled_box, hires_density, hires_vx, hires_vy, hires_vz, lowres_vx, lowres_vy, lowres_vz, hires_vx_2LPT, hires_vy_2LPT, hires_vz_2LPT, lowres_vx_2LPT, lowres_vy_2LPT, lowres_vz_2LPT, - dimension, user_params->DIM, d_para, hii_d, hii_d_para, user_params->NON_CUBIC_FACTOR, - f_pixel_factor, init_growth_factor, user_params->PERTURB_ON_HIGH_RES, user_params->USE_2LPT); + dimension, simulation_options_global->DIM, d_para, hii_d, hii_d_para, simulation_options_global->NON_CUBIC_FACTOR, + f_pixel_factor, init_growth_factor, matter_options_global->PERTURB_ON_HIGH_RES, matter_options_global->USE_2LPT); // // Only use during development! // err = cudaDeviceSynchronize(); @@ -357,7 +353,7 @@ double* MapMass_gpu( cudaFree(d_resampled_box); cudaFree(hires_density); - if (user_params->PERTURB_ON_HIGH_RES) { + if (matter_options_global->PERTURB_ON_HIGH_RES) { cudaFree(hires_vx); cudaFree(hires_vy); cudaFree(hires_vz); @@ -367,8 +363,8 @@ double* MapMass_gpu( cudaFree(lowres_vy); cudaFree(lowres_vz); } - if (user_params->USE_2LPT) { - if (user_params->PERTURB_ON_HIGH_RES) { + if (matter_options_global->USE_2LPT) { + if (matter_options_global->PERTURB_ON_HIGH_RES) { cudaFree(hires_vx_2LPT); cudaFree(hires_vy_2LPT); cudaFree(hires_vz_2LPT); diff --git a/src/py21cmfast/src/Stochasticity.cu b/src/py21cmfast/src/Stochasticity.cu index 2d0e4712c..33947691a 100644 --- a/src/py21cmfast/src/Stochasticity.cu +++ b/src/py21cmfast/src/Stochasticity.cu @@ -334,7 +334,7 @@ __device__ void stoc_set_consts_cond(struct HaloSamplingConstants *const_struct, // the splines don't work well for cells above Deltac, but there CAN be cells above deltac, since this calculation happens // before the overlap, and since the smallest dexm mass is M_cell*(1.01^3) there *could* be a cell above Deltac not in a halo // NOTE: all this does is prevent integration errors below since these cases are also dealt with in stoc_sample - if (const_struct->delta > MAX_DELTAC_FRAC * get_delta_crit(d_user_params.HMF, const_struct->sigma_cond, const_struct->growth_out)){ + if (const_struct->delta > MAX_DELTAC_FRAC * get_delta_crit(d_matter_options.HMF, const_struct->sigma_cond, const_struct->growth_out)){ const_struct->expected_M = const_struct->M_cond; const_struct->expected_N = 1; } @@ -423,7 +423,7 @@ __device__ int stoc_mass_sample(struct HaloSamplingConstants *hs_constants, cura // which is independent of density or halo mass, // this factor reduces the total expected mass to bring it into line with the CMF // exp_M *= user_params_global->HALOMASS_CORRECTION; - exp_M *= d_user_params.HALOMASS_CORRECTION; + exp_M *= d_matter_options.HALOMASS_CORRECTION; // int n_halo_sampled = 0; // double M_prog = 0; @@ -468,14 +468,14 @@ __device__ int stoc_sample(struct HaloSamplingConstants *hs_constants, curandSta // If the expected mass is below our minimum saved mass, don't bother calculating // NOTE: some of these conditions are redundant with set_consts_cond() - if (hs_constants->delta <= DELTA_MIN || hs_constants->expected_M < d_user_params.SAMPLER_MIN_MASS) + if (hs_constants->delta <= DELTA_MIN || hs_constants->expected_M < d_simulation_options.SAMPLER_MIN_MASS) { // *n_halo_out = 0; *sampleCondition = 0; return 0; } // if delta is above critical, form one big halo - if (hs_constants->delta >= MAX_DELTAC_FRAC * get_delta_crit(d_user_params.HMF, hs_constants->sigma_cond, hs_constants->growth_out)){ + if (hs_constants->delta >= MAX_DELTAC_FRAC * get_delta_crit(d_matter_options.HMF, hs_constants->sigma_cond, hs_constants->growth_out)){ // *n_halo_out = 1; // Expected mass takes into account potential dexm overlap @@ -486,21 +486,21 @@ __device__ int stoc_sample(struct HaloSamplingConstants *hs_constants, curandSta // todo: implement callee functions for SAMPLE_METHOD (1,2,3) // We always use Number-Limited sampling for grid-based cases - if (d_user_params.SAMPLE_METHOD == 1 || !hs_constants->from_catalog) + if (d_matter_options.SAMPLE_METHOD == 1 || !hs_constants->from_catalog) { // err = stoc_halo_sample(hs_constants, rng, n_halo_out, M_out); return 0; } - else if (d_user_params.SAMPLE_METHOD == 0) + else if (d_matter_options.SAMPLE_METHOD == 0) { err = stoc_mass_sample(hs_constants, state, M_out); } - else if (d_user_params.SAMPLE_METHOD == 2) + else if (d_matter_options.SAMPLE_METHOD == 2) { // err = stoc_partition_sample(hs_constants, rng, n_halo_out, M_out); return 0; } - else if (d_user_params.SAMPLE_METHOD == 3) + else if (d_matter_options.SAMPLE_METHOD == 3) { // err = stoc_split_sample(hs_constants, rng, n_halo_out, M_out); return 0; @@ -690,7 +690,7 @@ __global__ void update_halo_constants(float *d_halo_masses, float *d_star_rng_in d_n_prog[hid] = 0; } if (sampleCondition == 1){ - if(shared_mass[tid] >= d_user_params.SAMPLER_MIN_MASS){ + if(shared_mass[tid] >= d_simulation_options.SAMPLER_MIN_MASS){ d_halo_masses_out[out_id] = shared_mass[tid]; d_n_prog[hid] = 1; d_star_rng_out[out_id] = shared_prop_rng[3 * tid]; @@ -728,7 +728,7 @@ __global__ void update_halo_constants(float *d_halo_masses, float *d_star_rng_in for (int i = 0; i < write_limit + 1; ++i) { - if(shared_mass[tid + i] < d_user_params.SAMPLER_MIN_MASS) continue; + if(shared_mass[tid + i] < d_simulation_options.SAMPLER_MIN_MASS) continue; // write the final mass sample to array in global memory d_halo_masses_out[out_id + i] = shared_mass[tid + i]; d_star_rng_out[out_id + i] = shared_prop_rng[3*(tid +i)]; From f26d04801b0737c3790324e472fd0f1093bd1b46 Mon Sep 17 00:00:00 2001 From: James Davies Date: Tue, 29 Jul 2025 16:10:27 +1000 Subject: [PATCH 133/145] WIP try adding CUDA compilation to the meson files --- meson.build | 29 +++++++++++- meson.options | 1 + src/py21cmfast/src/meson.build | 80 +++++++++++++++++++++++++++++++--- 3 files changed, 103 insertions(+), 7 deletions(-) diff --git a/meson.build b/meson.build index d15024636..6bc0cfbed 100644 --- a/meson.build +++ b/meson.build @@ -1,9 +1,36 @@ #To find the version we need to find python and run setuptools-scm, while keeping the project() call first -project('21cmFAST', [ 'c', 'cpp' ], +project('21cmFAST', ['c', 'cpp'], version : run_command(find_program('python'), '-c', 'from setuptools_scm import get_version; print(get_version())', check: true).stdout().strip(), default_options : ['cpp_std=c++17'], ) +# Check if CUDA should be enabled after project declaration +env_cuda_str = run_command('printenv', 'USE_CUDA', check: false).stdout().strip() +if env_cuda_str == '' + env_cuda_flag = get_option('use_cuda') +else + if env_cuda_str == 'TRUE' + env_cuda_flag = true + else + env_cuda_flag = false + endif +endif + +nvcc_prog = find_program('nvcc', required: false) + +# Add CUDA language if both the environment variable is set AND nvcc is available +if env_cuda_flag and nvcc_prog.found() + add_languages('cuda') + # For CUDA, we need to pass the C++ standard as a compiler argument since we're adding the language dynamically + add_project_arguments('--std=c++17', language: 'cuda') + message('CUDA enabled: nvcc found at ' + nvcc_prog.full_path()) +else + if env_cuda_flag and not nvcc_prog.found() + warning('USE_CUDA=TRUE but nvcc not found, building CPU-only version') + endif + message('Building CPU-only version') +endif + py = import('python').find_installation(pure: false) subdir('src') diff --git a/meson.options b/meson.options index 9550b27a3..b6db5bbb9 100644 --- a/meson.options +++ b/meson.options @@ -1,2 +1,3 @@ # Define the log level option option('log_level', type: 'string', value: 'WARNING', description: 'Set the log level (e.g., NO_LOG, ERROR, WARNING, INFO, DEBUG, SUPER_DEBUG, ULTRA_DEBUG)') +option('use_cuda',type: 'boolean', value: false, description: 'Attempt to find and use CUDA in the compilation if set to TRUE') diff --git a/src/py21cmfast/src/meson.build b/src/py21cmfast/src/meson.build index f9dcd6e93..5484a45f7 100644 --- a/src/py21cmfast/src/meson.build +++ b/src/py21cmfast/src/meson.build @@ -1,5 +1,5 @@ -# Define the source files that contribute to the 21cmFast build -source_files = files([ +# Define the C source files +c_source_files = files([ 'BrightnessTemperatureBox.c', 'HaloBox.c', 'HaloField.c', @@ -29,9 +29,27 @@ source_files = files([ 'scaling_relations.c', 'thermochem.c', 'MapMass_cpu.c', - '_wrapper.cpp', ]) +# Define the CUDA source files +cuda_source_files = files([ + 'HaloField.cu', + 'IonisationBox.cu', + 'SpinTemperatureBox.cu', + 'Stochasticity.cu', + 'filtering.cu', + 'hmf.cu', + 'interp_tables.cu', + 'interpolation.cu', + 'device_rng.cu', + 'MapMass_gpu.cu', + 'test_Stochasticity.cu', + 'cuda_hello_world.cu', +]) + +# C++ wrapper file +cpp_source_files = files(['_wrapper.cpp']) + # Define the 21cmFast dependencies omp = dependency('openmp') gsl = dependency('gsl') @@ -45,7 +63,58 @@ search_paths = [ '/usr/lib', '/usr/local/lib', '/opt/homebrew/lib' ] fftw = cc.find_library ('fftw3f', required: true, dirs: search_paths) fftw_threads = cc.find_library ('fftw3f_threads', required: true, dirs: search_paths) -# Define a mapping of string values to integers (enum-like structure) +# Base dependencies (always needed) +deps = [omp, gsl, nanobind, fftw, fftw_threads] + +# CUDA dependency (optional) +# The root meson.build already checked for USE_CUDA environment variable and nvcc availability +cuda_dep = dependency('cuda', version: '>=10.0', required: false) + +# Check if CUDA language is available (this will be true only if root meson.build successfully added it) +if cuda_dep.found() + message('Using CUDA compilation in source build') + cuda_compiler = meson.get_compiler('cuda', required: false) + add_project_arguments('-DUSE_CUDA=1', language: ['c', 'cpp', 'cuda']) + + # CUDA compiler arguments + # Try to detect GPU architecture, fall back to a reasonable default + detect_arch_cmd = run_command('nvidia-smi', '--query-gpu=compute_cap', '--format=csv,noheader,nounits', check: false) + + if detect_arch_cmd.returncode() == 0 and detect_arch_cmd.stdout().strip() != '' + # Parse the compute capability (e.g., "7.5" -> "sm_75") + compute_cap = detect_arch_cmd.stdout().strip().split('\n')[0] + arch_major = compute_cap.split('.')[0] + arch_minor = compute_cap.split('.')[1] + detected_arch = 'sm_' + arch_major + arch_minor + message('Detected GPU architecture: ' + detected_arch) + cuda_arch = detected_arch + else + # Fall back to a widely compatible architecture + cuda_arch = 'sm_60' # Pascal - widely compatible + message('Could not detect GPU architecture, using default: ' + cuda_arch) + endif + + cuda_args = [ + '-arch=' + cuda_arch, + '--extended-lambda', + '--expt-relaxed-constexpr', + ] + add_project_arguments(cuda_args, language: 'cuda') + + # Add CUDA runtime library + cuda_rt = cuda_compiler.find_library('cudart', required: true) + deps = deps + [cuda_dep, cuda_rt] + + # Include CUDA source files + all_source_files = c_source_files + cuda_source_files + cpp_source_files +else + message('Using CPU-only compilation in source build') + add_project_arguments('-DUSE_CUDA=0', language: ['c', 'cpp']) + # Only include C/C++ files, no CUDA files + all_source_files = c_source_files + cpp_source_files +endif + +# Define a mapping of log level strings to integers log_level_map = { 'NO_LOG': 0, 'ERROR': 1, @@ -73,10 +142,9 @@ message('Selected log level: ' + log_level.to_string()) add_project_arguments('-DLOG_LEVEL=' + log_level.to_string(), language: 'c') # Define the Python extension module -deps = [omp,gsl,fftw,fftw_threads,nanobind] py.extension_module( 'c_21cmfast', - source_files, + all_source_files, dependencies: deps, install: true, subdir:'py21cmfast', From 2f92fa6030a4dc81bc852d3696c2b9e68e57c392 Mon Sep 17 00:00:00 2001 From: James Davies Date: Thu, 14 Aug 2025 14:03:47 +1000 Subject: [PATCH 134/145] WIP compiles --- src/py21cmfast/src/HaloBox.c | 653 ++++++------------ src/py21cmfast/src/HaloBox.h | 48 +- src/py21cmfast/src/IonisationBox.c | 10 +- src/py21cmfast/src/PerturbField.c | 108 +-- src/py21cmfast/src/PerturbHaloField.c | 4 +- src/py21cmfast/src/SpinTemperatureBox.c | 8 +- src/py21cmfast/src/Stochasticity.c | 8 +- .../src/_functionprototypes_wrapper.h | 5 +- src/py21cmfast/src/debugging.c | 3 +- src/py21cmfast/src/heating_helper_progs.c | 13 +- src/py21cmfast/src/heating_helper_progs.h | 4 +- src/py21cmfast/src/hmf.c | 13 +- src/py21cmfast/src/hmf.h | 13 +- src/py21cmfast/src/integral_wrappers.c | 10 +- src/py21cmfast/src/interp_tables.c | 49 +- src/py21cmfast/src/interp_tables.h | 30 +- src/py21cmfast/src/map_mass.c | 280 ++++++++ src/py21cmfast/src/map_mass.h | 13 + src/py21cmfast/src/photoncons.c | 2 +- src/py21cmfast/src/scaling_relations.c | 23 +- src/py21cmfast/src/scaling_relations.h | 21 +- src/py21cmfast/wrapper/outputs.py | 18 +- 22 files changed, 678 insertions(+), 658 deletions(-) create mode 100644 src/py21cmfast/src/map_mass.c create mode 100644 src/py21cmfast/src/map_mass.h diff --git a/src/py21cmfast/src/HaloBox.c b/src/py21cmfast/src/HaloBox.c index 9fb2d49df..207bd42bd 100644 --- a/src/py21cmfast/src/HaloBox.c +++ b/src/py21cmfast/src/HaloBox.c @@ -21,26 +21,24 @@ #include "indexing.h" #include "interp_tables.h" #include "logger.h" +#include "map_mass.h" #include "scaling_relations.h" #include "thermochem.h" -// struct holding each halo property we currently need. -// This is only used for both averages over the box/catalogues -// as well as an individual halo's properties -struct HaloProperties { - double halo_mass; - double stellar_mass; - double halo_sfr; - double stellar_mass_mini; - double sfr_mini; - double fescweighted_sfr; - double n_ion; - double halo_xray; - double metallicity; - double m_turn_acg; - double m_turn_mcg; - double m_turn_reion; -}; +// TODO: this should probably be somewhere else +void set_integral_constants(IntegralCondition *consts, double redshift, double M_min, double M_max, + double M_cell) { + consts->redshift = redshift; + consts->growth_factor = dicke(redshift); + consts->M_min = M_min; + consts->M_max = M_max; + consts->lnM_min = log(M_min); + consts->lnM_max = log(M_max); + consts->M_cell = M_cell; + consts->lnM_cell = log(M_cell); + // no table since this should be called once + consts->sigma_cell = sigma_z0(M_cell); +} // calculates halo properties from astro parameters plus the correlated rng // The inputs include all properties with a separate RNG @@ -56,8 +54,7 @@ struct HaloProperties { // representing a smooth transition in halo mass from one set of SFR/emmissivity parameters to the // other. void set_halo_properties(double halo_mass, double M_turn_a, double M_turn_m, - struct ScalingConstants *consts, double *input_rng, - struct HaloProperties *output) { + ScalingConstants *consts, double *input_rng, HaloProperties *output) { double n_ion_sample, wsfr_sample; double fesc; double fesc_mini = 0.; @@ -99,8 +96,8 @@ void set_halo_properties(double halo_mass, double M_turn_a, double M_turn_m, // Expected global averages for box quantities for mean adjustment // WARNING: THESE AVERAGE BOXES ARE WRONG, CHECK THEM -int get_box_averages(double M_min, double M_max, double M_turn_a, double M_turn_m, - struct ScalingConstants *consts, struct HaloProperties *averages_out) { +int get_uhmf_averages(double M_min, double M_max, double M_turn_a, double M_turn_m, + ScalingConstants *consts, HaloProperties *averages_out) { LOG_SUPER_DEBUG("Getting Box averages z=%.2f M [%.2e %.2e] Mt [%.2e %.2e]", consts->redshift, M_min, M_max, M_turn_a, M_turn_m); double t_h = consts->t_h; @@ -124,7 +121,7 @@ int get_box_averages(double M_min, double M_max, double M_turn_a, double M_turn_ // NOTE: we use the atomic method for all halo mass/count here mass_intgrl = Fcoll_General(consts->redshift, lnMmin, lnMmax); - struct ScalingConstants consts_sfrd = evolve_scaling_constants_sfr(consts); + ScalingConstants consts_sfrd = evolve_scaling_constants_sfr(consts); intgrl_fesc_weighted = Nion_General(consts->redshift, lnMmin, lnMmax, M_turn_a, consts); intgrl_stars_only = Nion_General(consts->redshift, lnMmin, lnMmax, M_turn_a, &consts_sfrd); @@ -154,70 +151,131 @@ int get_box_averages(double M_min, double M_max, double M_turn_a, double M_turn_ return 0; } +HaloProperties get_halobox_averages(HaloBox *grids) { + double mean_count = 0., mean_mass = 0., mean_stars = 0., mean_stars_mini = 0., mean_sfr = 0., + mean_sfr_mini = 0.; + double mean_n_ion = 0., mean_xray = 0., mean_wsfr = 0.; + +// TODO: optional flags for count/hm/sm +#pragma omp parallel for reduction(+ : mean_count, mean_mass, mean_stars, mean_stars_mini, \ + mean_sfr, mean_sfr_mini) + for (int i = 0; i < HII_TOT_NUM_PIXELS; i++) { + mean_count += grids->count[i]; + mean_mass += grids->halo_mass[i]; + mean_stars += grids->halo_stars[i]; + mean_sfr += grids->halo_sfr[i]; + mean_n_ion += grids->n_ion[i]; + if (astro_options_global->USE_TS_FLUCT) { + mean_xray += grids->halo_xray[i]; + } + if (astro_options_global->USE_MINI_HALOS) { + mean_stars_mini += grids->halo_stars_mini[i]; + mean_sfr_mini += grids->halo_sfr_mini[i]; + } + if (astro_options_global->INHOMO_RECO) mean_wsfr += grids->whalo_sfr[i]; + } + + HaloProperties averages = { + .count = mean_count / HII_TOT_NUM_PIXELS, + .halo_mass = mean_mass / HII_TOT_NUM_PIXELS, + .stellar_mass = mean_stars / HII_TOT_NUM_PIXELS, + .stellar_mass_mini = mean_stars_mini / HII_TOT_NUM_PIXELS, + .halo_sfr = mean_sfr / HII_TOT_NUM_PIXELS, + .sfr_mini = mean_sfr_mini / HII_TOT_NUM_PIXELS, + .n_ion = mean_n_ion / HII_TOT_NUM_PIXELS, + .halo_xray = mean_xray / HII_TOT_NUM_PIXELS, + .fescweighted_sfr = mean_wsfr / HII_TOT_NUM_PIXELS, + }; + return averages; +} // This takes a HaloBox struct and fixes it's mean to exactly what we expect from the UMF integrals. // Generally should only be done for the fixed portion of the grids, since // it will otherwise make the box inconsistent with the input catalogue -void mean_fix_grids(double M_min, double M_max, HaloBox *grids, struct HaloProperties *averages_box, - struct ScalingConstants *consts) { - struct HaloProperties averages_global; - double M_turn_a_global = averages_box->m_turn_acg; - double M_turn_m_global = averages_box->m_turn_mcg; - get_box_averages(M_min, M_max, M_turn_a_global, M_turn_m_global, consts, &averages_global); +void mean_fix_grids(double M_min, double M_max, HaloBox *grids, ScalingConstants *consts) { + HaloProperties averages_global; + // NOTE: requires the mean mcrits to be set on the grids + double M_turn_a_global = pow(10, grids->log10_Mcrit_ACG_ave); + double M_turn_m_global = pow(10, grids->log10_Mcrit_MCG_ave); + get_uhmf_averages(M_min, M_max, M_turn_a_global, M_turn_m_global, consts, &averages_global); + HaloProperties averages_hbox; + averages_hbox = get_halobox_averages(grids); unsigned long long int idx; #pragma omp parallel for num_threads(simulation_options_global->N_THREADS) private(idx) for (idx = 0; idx < HII_TOT_NUM_PIXELS; idx++) { - grids->halo_mass[idx] *= averages_global.halo_mass / averages_box->halo_mass; - grids->halo_stars[idx] *= averages_global.stellar_mass / averages_box->stellar_mass; - grids->halo_sfr[idx] *= averages_global.halo_sfr / averages_box->halo_sfr; - grids->n_ion[idx] *= averages_global.n_ion / averages_box->n_ion; + grids->halo_mass[idx] *= averages_global.halo_mass / averages_hbox.halo_mass; + grids->halo_stars[idx] *= averages_global.stellar_mass / averages_hbox.stellar_mass; + grids->halo_sfr[idx] *= averages_global.halo_sfr / averages_hbox.halo_sfr; + grids->n_ion[idx] *= averages_global.n_ion / averages_hbox.n_ion; if (astro_options_global->USE_MINI_HALOS) { grids->halo_stars_mini[idx] *= - averages_global.stellar_mass_mini / averages_box->stellar_mass_mini; - grids->halo_sfr_mini[idx] *= averages_global.sfr_mini / averages_box->sfr_mini; + averages_global.stellar_mass_mini / averages_hbox.stellar_mass_mini; + grids->halo_sfr_mini[idx] *= averages_global.sfr_mini / averages_hbox.sfr_mini; } if (astro_options_global->USE_TS_FLUCT) { - grids->halo_xray[idx] *= averages_global.halo_xray / averages_box->halo_xray; + grids->halo_xray[idx] *= averages_global.halo_xray / averages_hbox.halo_xray; } if (astro_options_global->INHOMO_RECO) { grids->whalo_sfr[idx] *= - averages_global.fescweighted_sfr / averages_box->fescweighted_sfr; + averages_global.fescweighted_sfr / averages_hbox.fescweighted_sfr; } } } +// Evaluate Mass function integrals given information from the cell +void get_cell_integrals(double dens, double l10_mturn_a, double l10_mturn_m, + ScalingConstants *consts, IntegralCondition *int_consts, + HaloProperties *properties) { + double M_min = int_consts->M_min; + double M_max = int_consts->M_max; + double growth_z = int_consts->growth_factor; + double M_cell = int_consts->M_cell; + double sigma_cell = int_consts->sigma_cell; + + properties->n_ion = EvaluateNion_Conditional(dens, l10_mturn_a, growth_z, M_min, M_max, M_cell, + sigma_cell, consts, false); + properties->stellar_mass = + EvaluateSFRD_Conditional(dens, growth_z, M_min, M_max, M_cell, sigma_cell, consts); + // TODO: SFRD tables still assume no reion feedback, this should be fixed + // although it doesn't affect the histories (only used in Ts) it makes outputs wrong + // for post-processing + if (astro_options_global->USE_MINI_HALOS) { + properties->stellar_mass_mini = EvaluateSFRD_Conditional_MINI( + dens, l10_mturn_m, growth_z, M_min, M_max, M_cell, sigma_cell, consts); + // re-using field + properties->fescweighted_sfr = EvaluateNion_Conditional_MINI( + dens, l10_mturn_m, growth_z, M_min, M_max, M_cell, sigma_cell, consts, false); + } else { + properties->stellar_mass_mini = 0; + properties->fescweighted_sfr = 0; + } + + if (astro_options_global->USE_TS_FLUCT) { + properties->halo_xray = + EvaluateXray_Conditional(dens, l10_mturn_m, consts->redshift, growth_z, M_min, M_max, + M_cell, sigma_cell, consts); + } else { + properties->halo_xray = 0; + } + + // TODO: add an optional flag for fields which aren't used in the radiation fields but are + // useful + // for analysis + // properties->count = EvaluateNhalo(dens, growth_z, lnMmin, lnMmax, M_cell, sigma_cell, dens); + // properties->halo_mass = EvaluateMcoll(dens, growth_z, lnMmin, lnMmax, M_cell, sigma_cell, + // dens); +} + // Fixed halo grids, where each property is set as the integral of the CMF on the EULERIAN cell // scale As per default 21cmfast (strange pretending that the lagrangian density is eulerian and // then *(1+delta)) This outputs the UN-NORMALISED grids (before mean-adjustment) -int set_fixed_grids(double M_min, double M_max, InitialConditions *ini_boxes, - PerturbedField *perturbed_field, TsBox *previous_spin_temp, - IonizedBox *previous_ionize_box, struct ScalingConstants *consts, - HaloBox *grids, struct HaloProperties *averages, const bool eulerian) { +int set_fixed_grids(double M_min, double M_max, InitialConditions *ini_boxes, float *mturn_a_grid, + float *mturn_m_grid, ScalingConstants *consts, HaloBox *grids) { double M_cell = RHOcrit * cosmo_params_global->OMm * VOLUME / HII_TOT_NUM_PIXELS; // mass in cell of mean dens - double growth_z = dicke(consts->redshift); - - double lnMmin = log(M_min); - double lnMcell = log(M_cell); - double lnMmax = log(M_max); - - double sigma_cell = EvaluateSigma(lnMcell); - - double prefactor_mass = RHOcrit * cosmo_params_global->OMm; - double prefactor_stars = RHOcrit * cosmo_params_global->OMb * consts->fstar_10; - double prefactor_stars_mini = RHOcrit * cosmo_params_global->OMb * consts->fstar_7; - double prefactor_sfr = prefactor_stars / consts->t_star / consts->t_h; - double prefactor_sfr_mini = prefactor_stars_mini / consts->t_star / consts->t_h; - double prefactor_nion = prefactor_stars * consts->fesc_10 * consts->pop2_ion; - double prefactor_nion_mini = prefactor_stars_mini * consts->fesc_7 * consts->pop3_ion; - double prefactor_wsfr = prefactor_sfr * consts->fesc_10 * consts->pop2_ion; - double prefactor_wsfr_mini = prefactor_sfr_mini * consts->fesc_7 * consts->pop3_ion; - double prefactor_xray = RHOcrit * cosmo_params_global->OMm; - - double hm_sum = 0, nion_sum = 0, wsfr_sum = 0, xray_sum = 0; - double sm_sum = 0, sm_sum_mini = 0, sfr_sum = 0, sfr_sum_mini = 0; - double l10_mlim_m_sum = 0., l10_mlim_a_sum = 0., l10_mlim_r_sum = 0.; + IntegralCondition integral_cond; + set_integral_constants(&integral_cond, consts->redshift, M_min, M_max, M_cell); // find grid limits for tables double min_density = 0.; @@ -226,56 +284,27 @@ int set_fixed_grids(double M_min, double M_max, InitialConditions *ini_boxes, double min_log10_mturn_m = log10(M_MAX_INTEGRAL); double max_log10_mturn_a = log10(astro_params_global->M_TURN); double max_log10_mturn_m = log10(astro_params_global->M_TURN); - float *mturn_a_grid = calloc(HII_TOT_NUM_PIXELS, sizeof(float)); - float *mturn_m_grid = calloc(HII_TOT_NUM_PIXELS, sizeof(float)); #pragma omp parallel num_threads(simulation_options_global->N_THREADS) { unsigned long long int i; double dens; - double J21_val, Gamma12_val, zre_val; - double M_turn_r = 0.; double M_turn_m = consts->mturn_m_nofb; double M_turn_a = consts->mturn_a_nofb; - double curr_vcb = consts->vcb_norel; #pragma omp for reduction(min : min_density, min_log10_mturn_a, min_log10_mturn_m) \ - reduction(max : max_density, max_log10_mturn_a, max_log10_mturn_m) \ - reduction(+ : l10_mlim_m_sum, l10_mlim_a_sum, l10_mlim_r_sum) + reduction(max : max_density, max_log10_mturn_a, max_log10_mturn_m) for (i = 0; i < HII_TOT_NUM_PIXELS; i++) { - if (eulerian) - dens = perturbed_field->density[i]; - else - dens = ini_boxes->lowres_density[i] * growth_z; + dens = ini_boxes->lowres_density[i]; if (dens > max_density) max_density = dens; if (dens < min_density) min_density = dens; if (astro_options_global->USE_MINI_HALOS) { - if (!astro_options_global->FIX_VCB_AVG && - matter_options_global->USE_RELATIVE_VELOCITIES) { - curr_vcb = ini_boxes->lowres_vcb[i]; - } - J21_val = Gamma12_val = zre_val = 0.; - if (consts->redshift < simulation_options_global->Z_HEAT_MAX) { - J21_val = previous_spin_temp->J_21_LW[i]; - Gamma12_val = previous_ionize_box->ionisation_rate_G12[i]; - zre_val = previous_ionize_box->z_reion[i]; - } - M_turn_a = consts->mturn_a_nofb; - M_turn_m = lyman_werner_threshold(consts->redshift, J21_val, curr_vcb); - M_turn_r = reionization_feedback(consts->redshift, Gamma12_val, zre_val); - M_turn_a = fmax(M_turn_a, fmax(M_turn_r, astro_params_global->M_TURN)); - M_turn_m = fmax(M_turn_m, fmax(M_turn_r, astro_params_global->M_TURN)); + M_turn_a = mturn_a_grid[i]; + M_turn_m = mturn_m_grid[i]; + if (min_log10_mturn_a > M_turn_a) min_log10_mturn_a = M_turn_a; + if (min_log10_mturn_m > M_turn_m) min_log10_mturn_m = M_turn_m; + if (max_log10_mturn_a < M_turn_a) max_log10_mturn_a = M_turn_a; + if (max_log10_mturn_m < M_turn_m) max_log10_mturn_m = M_turn_m; } - mturn_a_grid[i] = log10(M_turn_a); - mturn_m_grid[i] = log10(M_turn_m); - - if (min_log10_mturn_a > mturn_a_grid[i]) min_log10_mturn_a = mturn_a_grid[i]; - if (min_log10_mturn_m > mturn_m_grid[i]) min_log10_mturn_m = mturn_m_grid[i]; - if (max_log10_mturn_a < mturn_a_grid[i]) max_log10_mturn_a = mturn_a_grid[i]; - if (max_log10_mturn_m < mturn_m_grid[i]) max_log10_mturn_m = mturn_m_grid[i]; - - l10_mlim_a_sum += mturn_a_grid[i]; - l10_mlim_m_sum += mturn_m_grid[i]; - l10_mlim_r_sum += log10(M_turn_r); } } // buffers for table ranges @@ -286,17 +315,14 @@ int set_fixed_grids(double M_min, double M_max, InitialConditions *ini_boxes, max_log10_mturn_a = max_log10_mturn_a * 1.001; max_log10_mturn_m = max_log10_mturn_m * 1.001; - LOG_DEBUG("Mean halo boxes || M = [%.2e %.2e] | Mcell = %.2e (s=%.2e) | z = %.2e | D = %.2e", - M_min, M_max, M_cell, sigma_cell, consts->redshift, growth_z); - + LOG_DEBUG("Mean halo boxes || M = [%.2e %.2e] | Mcell = %.2e", M_min, M_max, M_cell); // These tables are coarser than needed, an initial loop for Mturn to find limits may help if (matter_options_global->USE_INTERPOLATION_TABLES > 1) { if (astro_options_global->INTEGRATION_METHOD_ATOMIC == 1 || (astro_options_global->USE_MINI_HALOS && astro_options_global->INTEGRATION_METHOD_MINI == 1)) { - initialise_GL(lnMmin, lnMmax); + initialise_GL(integral_cond.lnM_min, integral_cond.lnM_max); } - // This table assumes no reionisation feedback initialise_SFRD_Conditional_table(consts->redshift, min_density, max_density, M_min, M_max, M_cell, consts); @@ -307,90 +333,23 @@ int set_fixed_grids(double M_min, double M_max, InitialConditions *ini_boxes, M_cell, min_log10_mturn_a, max_log10_mturn_a, min_log10_mturn_m, max_log10_mturn_m, consts, false); - initialise_dNdM_tables(min_density, max_density, lnMmin, lnMmax, growth_z, lnMcell, false); + initialise_dNdM_tables(min_density, max_density, integral_cond.lnM_min, + integral_cond.lnM_max, integral_cond.growth_factor, + integral_cond.lnM_cell, false); if (astro_options_global->USE_TS_FLUCT) { initialise_Xray_Conditional_table(consts->redshift, min_density, max_density, M_min, M_max, M_cell, consts); } } -#pragma omp parallel num_threads(simulation_options_global->N_THREADS) - { - unsigned long long int i; - double dens; - double l10_mturn_a, l10_mturn_m; - double mass_intgrl, h_count; - double intgrl_fesc_weighted, intgrl_stars_only; - double intgrl_fesc_weighted_mini = 0., intgrl_stars_only_mini = 0., integral_xray = 0; - double dens_fac; - -#pragma omp for reduction(+ : hm_sum, sm_sum, sm_sum_mini, sfr_sum, sfr_sum_mini, xray_sum, \ - nion_sum, wsfr_sum) - for (i = 0; i < HII_TOT_NUM_PIXELS; i++) { - if (eulerian) { - dens = perturbed_field->density[i]; - dens_fac = (1. + dens); - } else { - dens = ini_boxes->lowres_density[i] * growth_z; - dens_fac = 1.; - } - l10_mturn_a = mturn_a_grid[i]; - l10_mturn_m = mturn_m_grid[i]; - - h_count = EvaluateNhalo(dens, growth_z, lnMmin, lnMmax, M_cell, sigma_cell, dens); - mass_intgrl = EvaluateMcoll(dens, growth_z, lnMmin, lnMmax, M_cell, sigma_cell, dens); - intgrl_fesc_weighted = EvaluateNion_Conditional( - dens, l10_mturn_a, growth_z, M_min, M_max, M_cell, sigma_cell, consts, false); - intgrl_stars_only = - EvaluateSFRD_Conditional(dens, growth_z, M_min, M_max, M_cell, sigma_cell, consts); - // TODO: SFRD tables still assume no reion feedback, this should be fixed - // although it doesn't affect the histories (only used in Ts) it makes outputs wrong - // for post-processing - if (astro_options_global->USE_MINI_HALOS) { - intgrl_stars_only_mini = EvaluateSFRD_Conditional_MINI( - dens, l10_mturn_m, growth_z, M_min, M_max, M_cell, sigma_cell, consts); - intgrl_fesc_weighted_mini = EvaluateNion_Conditional_MINI( - dens, l10_mturn_m, growth_z, M_min, M_max, M_cell, sigma_cell, consts, false); - } - - if (astro_options_global->USE_TS_FLUCT) { - integral_xray = - EvaluateXray_Conditional(dens, l10_mturn_m, consts->redshift, growth_z, M_min, - M_max, M_cell, sigma_cell, consts); - } - - grids->count[i] = (int)(h_count * M_cell * dens_fac); // NOTE: truncated - grids->halo_mass[i] = mass_intgrl * prefactor_mass * dens_fac; - grids->halo_sfr[i] = (intgrl_stars_only * prefactor_sfr) * dens_fac; - grids->n_ion[i] = (intgrl_fesc_weighted * prefactor_nion + - intgrl_fesc_weighted_mini * prefactor_nion_mini) * - dens_fac; - grids->halo_stars[i] = intgrl_stars_only * prefactor_stars * dens_fac; - - hm_sum += grids->halo_mass[i]; - nion_sum += grids->n_ion[i]; - sfr_sum += grids->halo_sfr[i]; - sm_sum += grids->halo_stars[i]; - - if (astro_options_global->USE_TS_FLUCT) { - grids->halo_xray[i] = prefactor_xray * integral_xray * dens_fac; - xray_sum += grids->halo_xray[i]; - } - if (astro_options_global->INHOMO_RECO) { - grids->whalo_sfr[i] = (intgrl_fesc_weighted * prefactor_wsfr + - intgrl_fesc_weighted_mini * prefactor_wsfr_mini) * - dens_fac; - wsfr_sum += grids->whalo_sfr[i]; - } - if (astro_options_global->USE_MINI_HALOS) { - grids->halo_stars_mini[i] = - intgrl_stars_only_mini * prefactor_stars_mini * dens_fac; - grids->halo_sfr_mini[i] = intgrl_stars_only_mini * prefactor_sfr_mini * dens_fac; - sm_sum_mini += grids->halo_stars_mini[i]; - sfr_sum_mini += grids->halo_sfr_mini[i]; - } - } - } + int grid_dim[3] = {simulation_options_global->HII_DIM, simulation_options_global->HII_DIM, + HII_D_PARA}; + float *vel_pointers[3] = {ini_boxes->lowres_vx, ini_boxes->lowres_vy, ini_boxes->lowres_vz}; + float *vel_pointers_2LPT[3] = {ini_boxes->lowres_vx_2LPT, ini_boxes->lowres_vy_2LPT, + ini_boxes->lowres_vz_2LPT}; + move_grid_galprops(consts->redshift, ini_boxes->lowres_density, grid_dim, vel_pointers, + vel_pointers_2LPT, grid_dim, grids, grid_dim, mturn_a_grid, mturn_m_grid, + consts, &integral_cond); LOG_ULTRA_DEBUG("Cell 0 Totals: HM: %.2e SM: %.2e SF: %.2e, NI: %.2e ct : %d", grids->halo_mass[HII_R_INDEX(0, 0, 0)], grids->halo_stars[HII_R_INDEX(0, 0, 0)], @@ -408,54 +367,23 @@ int set_fixed_grids(double M_min, double M_max, InitialConditions *ini_boxes, } LOG_ULTRA_DEBUG("Mturn_a %.2e Mturn_m %.2e", mturn_a_grid[HII_R_INDEX(0, 0, 0)], mturn_m_grid[HII_R_INDEX(0, 0, 0)]); - - free(mturn_a_grid); - free(mturn_m_grid); free_conditional_tables(); - averages->halo_mass = hm_sum / HII_TOT_NUM_PIXELS; - averages->stellar_mass = sm_sum / HII_TOT_NUM_PIXELS; - averages->stellar_mass_mini = sm_sum_mini / HII_TOT_NUM_PIXELS; - averages->halo_sfr = sfr_sum / HII_TOT_NUM_PIXELS; - averages->sfr_mini = sfr_sum_mini / HII_TOT_NUM_PIXELS; - averages->n_ion = nion_sum / HII_TOT_NUM_PIXELS; - averages->halo_xray = xray_sum / HII_TOT_NUM_PIXELS; - averages->fescweighted_sfr = wsfr_sum / HII_TOT_NUM_PIXELS; - averages->m_turn_acg = pow(10, l10_mlim_a_sum / HII_TOT_NUM_PIXELS); - averages->m_turn_mcg = pow(10, l10_mlim_m_sum / HII_TOT_NUM_PIXELS); - averages->m_turn_reion = pow(10, l10_mlim_r_sum / HII_TOT_NUM_PIXELS); - - // mean-fix the grids - // TODO: put this behind a flag - if (consts->fix_mean) mean_fix_grids(M_min, M_max, grids, averages, consts); - - // assign the log10 average Mturn for the Ts global tables - grids->log10_Mcrit_MCG_ave = l10_mlim_m_sum / HII_TOT_NUM_PIXELS; - grids->log10_Mcrit_ACG_ave = l10_mlim_a_sum / HII_TOT_NUM_PIXELS; + if (consts->fix_mean) mean_fix_grids(M_min, M_max, grids, consts); return 0; } -void halobox_debug_print_avg(struct HaloProperties *averages_box, - struct HaloProperties *averages_subsampler, - struct ScalingConstants *consts, double M_min, double M_max) { +void halobox_debug_print_avg(HaloBox *halobox, ScalingConstants *consts, double M_min, + double M_max) { if (LOG_LEVEL < DEBUG_LEVEL) return; - struct HaloProperties averages_sub_expected, averages_global; + HaloProperties averages_box; + averages_box = get_halobox_averages(halobox); + HaloProperties averages_global; LOG_DEBUG("HALO BOXES REDSHIFT %.2f [%.2e %.2e]", consts->redshift, M_min, M_max); - if (matter_options_global->FIXED_HALO_GRIDS) { - get_box_averages(M_min, M_max, averages_box->m_turn_acg, averages_box->m_turn_mcg, consts, - &averages_global); - } else { - get_box_averages(simulation_options_global->SAMPLER_MIN_MASS, M_max, - averages_box->m_turn_acg, averages_box->m_turn_mcg, consts, - &averages_global); - if (astro_options_global->AVG_BELOW_SAMPLER && - M_min < simulation_options_global->SAMPLER_MIN_MASS) { - get_box_averages(M_min, simulation_options_global->SAMPLER_MIN_MASS, - averages_box->m_turn_acg, averages_box->m_turn_mcg, consts, - &averages_sub_expected); - } - } + double mturn_a_avg = pow(10, halobox->log10_Mcrit_ACG_ave); + double mturn_m_avg = pow(10, halobox->log10_Mcrit_MCG_ave); + get_uhmf_averages(M_min, M_max, mturn_a_avg, mturn_m_avg, consts, &averages_global); LOG_DEBUG( "Exp. averages: (HM %11.3e, SM %11.3e SM_MINI %11.3e SFR %11.3e, SFR_MINI %11.3e, XRAY " @@ -466,43 +394,22 @@ void halobox_debug_print_avg(struct HaloProperties *averages_box, LOG_DEBUG( "Box. averages: (HM %11.3e, SM %11.3e SM_MINI %11.3e SFR %11.3e, SFR_MINI %11.3e, XRAY " "%11.3e, NION %11.3e)", - averages_box->halo_mass, averages_box->stellar_mass, averages_box->stellar_mass_mini, - averages_box->halo_sfr, averages_box->sfr_mini, averages_box->halo_xray, - averages_box->n_ion); - - if (!matter_options_global->FIXED_HALO_GRIDS && astro_options_global->AVG_BELOW_SAMPLER && - M_min < simulation_options_global->SAMPLER_MIN_MASS) { - LOG_DEBUG("SUB-SAMPLER"); - LOG_DEBUG( - "Exp. averages: (HM %11.3e, SM %11.3e SM_MINI %11.3e SFR %11.3e, SFR_MINI %11.3e, XRAY " - "%11.3e, NION %11.3e)", - averages_sub_expected.halo_mass, averages_sub_expected.stellar_mass, - averages_sub_expected.stellar_mass_mini, averages_sub_expected.halo_sfr, - averages_sub_expected.sfr_mini, averages_sub_expected.halo_xray, - averages_sub_expected.n_ion); - LOG_DEBUG( - "Box. averages: (HM %11.3e, SM %11.3e SM_MINI %11.3e SFR %11.3e, SFR_MINI %11.3e, XRAY " - "%11.3e, NION %11.3e)", - averages_subsampler->halo_mass, averages_subsampler->stellar_mass, - averages_subsampler->stellar_mass_mini, averages_subsampler->halo_sfr, - averages_subsampler->sfr_mini, averages_subsampler->halo_xray, - averages_subsampler->n_ion); - } + averages_box.halo_mass, averages_box.stellar_mass, averages_box.stellar_mass_mini, + averages_box.halo_sfr, averages_box.sfr_mini, averages_box.halo_xray, averages_box.n_ion); } // We need the mean log10 turnover masses for comparison with expected global Nion and SFRD. // Sometimes we don't calculate these on the grid (if we use halos and no sub-sampler) // So this function simply returns the volume-weighted average log10 turnover mass -void get_mean_log10_turnovers(InitialConditions *ini_boxes, TsBox *previous_spin_temp, - IonizedBox *previous_ionize_box, PerturbedField *perturbed_field, - struct ScalingConstants *consts, double turnovers[3]) { +void get_log10_turnovers(InitialConditions *ini_boxes, TsBox *previous_spin_temp, + IonizedBox *previous_ionize_box, float *mturn_a_grid, float *mturn_m_grid, + ScalingConstants *consts, double averages[2]) { + averages[0] = consts->mturn_a_nofb; + averages[1] = consts->mturn_m_nofb; if (!astro_options_global->USE_MINI_HALOS) { - turnovers[0] = log10(consts->mturn_a_nofb); // ACG - turnovers[1] = log10(consts->mturn_m_nofb); // MCG - turnovers[2] = 0.; // reion (log10 so effectively 1 solar mass) return; } - double l10_mturn_a_avg = 0., l10_mturn_m_avg = 0., l10_mturn_r_avg = 0.; + double log10_mturn_m_avg = 0., log10_mturn_a_avg = 0.; #pragma omp parallel num_threads(simulation_options_global->N_THREADS) { @@ -513,7 +420,7 @@ void get_mean_log10_turnovers(InitialConditions *ini_boxes, TsBox *previous_spin double M_turn_a = consts->mturn_a_nofb; double M_turn_r; -#pragma omp for reduction(+ : l10_mturn_m_avg, l10_mturn_a_avg, l10_mturn_r_avg) +#pragma omp for reduction(+ : log10_mturn_m_avg, log10_mturn_a_avg) for (i = 0; i < HII_TOT_NUM_PIXELS; i++) { if (!astro_options_global->FIX_VCB_AVG && matter_options_global->USE_RELATIVE_VELOCITIES) { @@ -529,31 +436,27 @@ void get_mean_log10_turnovers(InitialConditions *ini_boxes, TsBox *previous_spin M_turn_r = reionization_feedback(consts->redshift, Gamma12_val, zre_val); M_turn_a = fmax(M_turn_a, fmax(M_turn_r, astro_params_global->M_TURN)); M_turn_m = fmax(M_turn_m, fmax(M_turn_r, astro_params_global->M_TURN)); - l10_mturn_a_avg += log10(M_turn_a); - l10_mturn_m_avg += log10(M_turn_m); - l10_mturn_r_avg += log10(M_turn_r); - } - l10_mturn_a_avg /= HII_TOT_NUM_PIXELS; - l10_mturn_m_avg /= HII_TOT_NUM_PIXELS; - l10_mturn_r_avg /= HII_TOT_NUM_PIXELS; - turnovers[0] = l10_mturn_a_avg; - turnovers[1] = l10_mturn_m_avg; - turnovers[2] = l10_mturn_r_avg; + mturn_a_grid[i] = log10(M_turn_a); + log10_mturn_a_avg += log10(M_turn_a); + mturn_m_grid[i] = log10(M_turn_m); + log10_mturn_m_avg += log10(M_turn_m); + } } + + // NOTE: This average log10 Mturn will be passed onto the spin temperature calculations where + // It is used to perform the frequency integrals (over tau, dependent on ), and possibly + // for mean fixing. It is the volume-weighted mean of LOG10 Mturn, although we could do another + // weighting or use Mturn directly None of these are a perfect representation due to the + // nonlinear way turnover mass affects N_ion + log10_mturn_a_avg /= HII_TOT_NUM_PIXELS; + log10_mturn_m_avg /= HII_TOT_NUM_PIXELS; + averages[0] = log10_mturn_a_avg; + averages[1] = log10_mturn_m_avg; } -void sum_halos_onto_grid(InitialConditions *ini_boxes, TsBox *previous_spin_temp, - IonizedBox *previous_ionize_box, PerturbHaloField *halos, - struct ScalingConstants *consts, HaloBox *grids, - struct HaloProperties *averages) { - double redshift = consts->redshift; - // averages - double hm_avg = 0., sm_avg = 0., sfr_avg = 0.; - double sm_avg_mini = 0., sfr_avg_mini = 0.; - double M_turn_a_avg = 0., M_turn_m_avg = 0., M_turn_r_avg = 0.; - double n_ion_avg = 0., wsfr_avg = 0., xray_avg = 0.; - // counts +void sum_halos_onto_grid(InitialConditions *ini_boxes, PerturbHaloField *halos, float *mturn_a_grid, + float *mturn_m_grid, ScalingConstants *consts, HaloBox *grids) { unsigned long long int total_n_halos, n_halos_cut = 0.; double cell_volume = VOLUME / HII_TOT_NUM_PIXELS; @@ -568,20 +471,15 @@ void sum_halos_onto_grid(InitialConditions *ini_boxes, TsBox *previous_spin_temp double halo_pos[3]; int halo_idx[3]; unsigned long long int i_halo, i_cell; - double hmass, nion, sfr, wsfr, sfr_mini, stars_mini, stars, xray; - double J21_val, Gamma12_val, zre_val; + double hmass; - double curr_vcb = consts->vcb_norel; double M_turn_m = consts->mturn_m_nofb; double M_turn_a = consts->mturn_a_nofb; - double M_turn_r = 0.; double in_props[3]; - struct HaloProperties out_props; + HaloProperties out_props; -#pragma omp for reduction(+ : hm_avg, sm_avg, sm_avg_mini, sfr_avg, sfr_avg_mini, n_ion_avg, \ - xray_avg, wsfr_avg, M_turn_a_avg, M_turn_m_avg, M_turn_r_avg, \ - n_halos_cut) +#pragma omp for reduction(+ : n_halos_cut) for (i_halo = 0; i_halo < halos->n_halos; i_halo++) { hmass = halos->halo_masses[i_halo]; // It is sometimes useful to make cuts to the halo catalogues before gridding. @@ -604,22 +502,8 @@ void sum_halos_onto_grid(InitialConditions *ini_boxes, TsBox *previous_spin_temp // NOTE: I could easily apply reionization feedback without minihalos but this was not // done previously if (astro_options_global->USE_MINI_HALOS) { - if (!astro_options_global->FIX_VCB_AVG && - matter_options_global->USE_RELATIVE_VELOCITIES) - curr_vcb = ini_boxes->lowres_vcb[i_cell]; - - J21_val = Gamma12_val = zre_val = 0.; - if (consts->redshift < simulation_options_global->Z_HEAT_MAX) { - J21_val = previous_spin_temp->J_21_LW[i_cell]; - Gamma12_val = previous_ionize_box->ionisation_rate_G12[i_cell]; - zre_val = previous_ionize_box->z_reion[i_cell]; - } - - M_turn_a = consts->mturn_a_nofb; - M_turn_m = lyman_werner_threshold(redshift, J21_val, curr_vcb); - M_turn_r = reionization_feedback(redshift, Gamma12_val, zre_val); - M_turn_a = fmax(M_turn_a, fmax(M_turn_r, astro_params_global->M_TURN)); - M_turn_m = fmax(M_turn_m, fmax(M_turn_r, astro_params_global->M_TURN)); + M_turn_a = mturn_a_grid[i_cell]; + M_turn_m = mturn_m_grid[i_cell]; } // these are the halo property RNG sequences @@ -629,22 +513,15 @@ void sum_halos_onto_grid(InitialConditions *ini_boxes, TsBox *previous_spin_temp set_halo_properties(hmass, M_turn_a, M_turn_m, consts, in_props, &out_props); - sfr = out_props.halo_sfr; - sfr_mini = out_props.sfr_mini; - nion = out_props.n_ion; - wsfr = out_props.fescweighted_sfr; - stars = out_props.stellar_mass; - stars_mini = out_props.stellar_mass_mini; - xray = out_props.halo_xray; - #if LOG_LEVEL >= ULTRA_DEBUG_LEVEL if (i_cell == 0) { // LOG_ULTRA_DEBUG("(%d %d %d) i_cell %llu i_halo %llu",x,y,z,i_cell, i_halo); LOG_ULTRA_DEBUG( "Cell 0 Halo: HM: %.2e SM: %.2e (%.2e) SF: %.2e (%.2e) X: %.2e NI: %.2e WS: " "%.2e Z : %.2e ct : %llu", - hmass, stars, stars_mini, sfr, sfr_mini, xray, nion, wsfr, - out_props.metallicity, i_halo); + hmass, out_props.stellar_mass, out_props.stellar_mass_mini, out_props.halo_sfr, + out_props.sfr_mini, out_props.halo_xray, out_props.n_ion, + out_props.fescweighted_sfr, out_props.metallicity, i_halo); // LOG_ULTRA_DEBUG("Cell 0 Sums: HM: %.2e SM: %.2e (%.2e) SF: %.2e (%.2e) X: %.2e // NI: %.2e WS: %.2e ct : %d", @@ -662,42 +539,30 @@ void sum_halos_onto_grid(InitialConditions *ini_boxes, TsBox *previous_spin_temp #pragma omp atomic update grids->halo_mass[i_cell] += hmass; #pragma omp atomic update - grids->halo_stars[i_cell] += stars; + grids->halo_stars[i_cell] += out_props.stellar_mass; #pragma omp atomic update - grids->n_ion[i_cell] += nion; + grids->n_ion[i_cell] += out_props.n_ion; #pragma omp atomic update - grids->halo_sfr[i_cell] += sfr; + grids->halo_sfr[i_cell] += out_props.halo_sfr; #pragma omp atomic update grids->count[i_cell] += 1; if (astro_options_global->USE_MINI_HALOS) { #pragma omp atomic update - grids->halo_stars_mini[i_cell] += stars_mini; + grids->halo_stars_mini[i_cell] += out_props.stellar_mass_mini; #pragma omp atomic update - grids->halo_sfr_mini[i_cell] += sfr_mini; + grids->halo_sfr_mini[i_cell] += out_props.sfr_mini; } if (astro_options_global->INHOMO_RECO) { #pragma omp atomic update - grids->whalo_sfr[i_cell] += wsfr; + grids->whalo_sfr[i_cell] += out_props.fescweighted_sfr; } if (astro_options_global->USE_TS_FLUCT) { #pragma omp atomic update - grids->halo_xray[i_cell] += xray; + grids->halo_xray[i_cell] += out_props.halo_xray; } - - hm_avg += hmass; - sfr_avg += sfr; - sfr_avg_mini += sfr_mini; - sm_avg += stars; - sm_avg_mini += stars_mini; - xray_avg += xray; - n_ion_avg += nion; - wsfr_avg += wsfr; - M_turn_a_avg += M_turn_a; - M_turn_r_avg += M_turn_r; - M_turn_m_avg += M_turn_m; } #pragma omp for @@ -721,8 +586,8 @@ void sum_halos_onto_grid(InitialConditions *ini_boxes, TsBox *previous_spin_temp total_n_halos = halos->n_halos - n_halos_cut; LOG_SUPER_DEBUG("Cell 0 Totals: HM: %.2e SM: %.2e SF: %.2e NI: %.2e ct : %d", grids->halo_mass[HII_R_INDEX(0, 0, 0)], grids->halo_stars[HII_R_INDEX(0, 0, 0)], - grids->halo_sfr[HII_R_INDEX(0, 0, 0)], grids->halo_xray[HII_R_INDEX(0, 0, 0)], - grids->n_ion[HII_R_INDEX(0, 0, 0)], grids->count[HII_R_INDEX(0, 0, 0)]); + grids->halo_sfr[HII_R_INDEX(0, 0, 0)], grids->n_ion[HII_R_INDEX(0, 0, 0)], + grids->count[HII_R_INDEX(0, 0, 0)]); if (astro_options_global->INHOMO_RECO) { LOG_SUPER_DEBUG("FESC * SF %.2e", grids->whalo_sfr[HII_R_INDEX(0, 0, 0)]); } @@ -733,46 +598,11 @@ void sum_halos_onto_grid(InitialConditions *ini_boxes, TsBox *previous_spin_temp LOG_SUPER_DEBUG("MINI SM %.2e SF %.2e", grids->halo_stars_mini[HII_R_INDEX(0, 0, 0)], grids->halo_sfr_mini[HII_R_INDEX(0, 0, 0)]); } - - // NOTE: There is an inconsistency here, the sampled grids use a halo-averaged turnover mass - // whereas the fixed grids / default 21cmfast uses the volume averaged LOG10(turnover mass). - // Neither of these are a perfect representation due to the nonlinear way turnover mass - // affects N_ion - if (total_n_halos > 0) { - M_turn_r_avg /= total_n_halos; - M_turn_a_avg /= total_n_halos; - M_turn_m_avg /= total_n_halos; - } else { - // If we have no halos, assume the turnover has no reion feedback & no LW - M_turn_m_avg = consts->mturn_m_nofb; - M_turn_a_avg = consts->mturn_a_nofb; - M_turn_r_avg = 0.; - } - - hm_avg /= VOLUME; - sm_avg /= VOLUME; - sm_avg_mini /= VOLUME; - sfr_avg /= VOLUME; - sfr_avg_mini /= VOLUME; - n_ion_avg /= VOLUME; - xray_avg /= VOLUME; - - averages->halo_mass = hm_avg; - averages->stellar_mass = sm_avg; - averages->halo_sfr = sfr_avg; - averages->stellar_mass_mini = sm_avg_mini; - averages->sfr_mini = sfr_avg_mini; - averages->halo_xray = xray_avg; - averages->n_ion = n_ion_avg; - averages->m_turn_acg = M_turn_a_avg; - averages->m_turn_mcg = M_turn_m_avg; - averages->m_turn_reion = M_turn_r_avg; } // We grid a PERTURBED halofield into the necessary quantities for calculating radiative backgrounds -int ComputeHaloBox(double redshift, InitialConditions *ini_boxes, PerturbedField *perturbed_field, - PerturbHaloField *halos, TsBox *previous_spin_temp, - IonizedBox *previous_ionize_box, HaloBox *grids) { +int ComputeHaloBox(double redshift, InitialConditions *ini_boxes, PerturbHaloField *halos, + TsBox *previous_spin_temp, IonizedBox *previous_ionize_box, HaloBox *grids) { int status; Try { // get parameters @@ -804,37 +634,37 @@ int ComputeHaloBox(double redshift, InitialConditions *ini_boxes, PerturbedField } } - struct ScalingConstants hbox_consts; - + ScalingConstants hbox_consts; set_scaling_constants(redshift, &hbox_consts, true); LOG_DEBUG("Gridding %llu halos...", halos->n_halos); double M_min = minimum_source_mass(redshift, false); double M_max_integral; - double cell_volume = VOLUME / HII_TOT_NUM_PIXELS; - - double turnovers[3]; - - struct HaloProperties averages_box, averages_subsampler; init_ps(); if (matter_options_global->USE_INTERPOLATION_TABLES > 0) { - initialiseSigmaMInterpTable( - M_min / 2, - M_MAX_INTEGRAL); // this needs to be initialised above MMax because of Nion_General + // this needs to be initialised above MMax because of Nion_General + initialiseSigmaMInterpTable(M_min / 2, M_MAX_INTEGRAL); + } + + float *mturn_a_grid = NULL; + float *mturn_m_grid = NULL; + if (astro_options_global->USE_MINI_HALOS) { + mturn_a_grid = calloc(HII_TOT_NUM_PIXELS, sizeof(float)); + mturn_m_grid = calloc(HII_TOT_NUM_PIXELS, sizeof(float)); } - // do the mean HMF box - // The default 21cmFAST has a strange behaviour where the nonlinear density is used as - // linear, the condition mass is at mean density, but the total cell mass is multiplied by - // delta This part mimics that behaviour Since we need the average turnover masses before we - // can calculate the global means, we do the CMF integrals first Then we calculate the - // expected UMF integrals before doing the adjustment + double mturn_averages[2]; + get_log10_turnovers(ini_boxes, previous_spin_temp, previous_ionize_box, mturn_a_grid, + mturn_m_grid, &hbox_consts, mturn_averages); + grids->log10_Mcrit_ACG_ave = mturn_averages[0]; + grids->log10_Mcrit_MCG_ave = mturn_averages[1]; if (matter_options_global->FIXED_HALO_GRIDS) { M_max_integral = M_MAX_INTEGRAL; - set_fixed_grids(M_min, M_max_integral, ini_boxes, perturbed_field, previous_spin_temp, - previous_ionize_box, &hbox_consts, grids, &averages_box, true); + set_fixed_grids(M_min, M_max_integral, ini_boxes, mturn_a_grid, mturn_m_grid, + &hbox_consts, grids); } else { + sum_halos_onto_grid(ini_boxes, halos, mturn_a_grid, mturn_m_grid, &hbox_consts, grids); // set below-resolution properties if (astro_options_global->AVG_BELOW_SAMPLER) { if (matter_options_global->HALO_STOCHASTICITY) { @@ -844,52 +674,22 @@ int ComputeHaloBox(double redshift, InitialConditions *ini_boxes, PerturbedField simulation_options_global->DIM); } if (M_min < M_max_integral) { - set_fixed_grids(M_min, M_max_integral, ini_boxes, perturbed_field, - previous_spin_temp, previous_ionize_box, &hbox_consts, grids, - &averages_subsampler, false); -// This is pretty redundant, but since the fixed grids have density units (X Mpc-3) I have to -// re-multiply before adding the halos. -// I should instead have a flag to output the summed values in cell. (2*N_pixel > N_halo so -// generally i don't want to do it in the halo loop) -#pragma omp parallel for num_threads(simulation_options_global->N_THREADS) private(idx) - for (idx = 0; idx < HII_TOT_NUM_PIXELS; idx++) { - grids->halo_mass[idx] *= cell_volume; - grids->halo_stars[idx] *= cell_volume; - grids->n_ion[idx] *= cell_volume; - grids->halo_sfr[idx] *= cell_volume; - if (astro_options_global->USE_TS_FLUCT) { - grids->halo_xray[idx] *= cell_volume; - } - if (astro_options_global->INHOMO_RECO) { - grids->whalo_sfr[idx] *= cell_volume; - } - if (astro_options_global->USE_MINI_HALOS) { - grids->halo_stars_mini[idx] *= cell_volume; - grids->halo_sfr_mini[idx] *= cell_volume; - } - } + set_fixed_grids(M_min, M_max_integral, ini_boxes, mturn_a_grid, mturn_m_grid, + &hbox_consts, grids); LOG_DEBUG("finished subsampler M[%.2e %.2e]", M_min, M_max_integral); } - } else { - // we still need the average turnovers for global values in spintemp, so get them - // here - get_mean_log10_turnovers(ini_boxes, previous_spin_temp, previous_ionize_box, - perturbed_field, &hbox_consts, turnovers); - grids->log10_Mcrit_ACG_ave = turnovers[0]; - grids->log10_Mcrit_MCG_ave = turnovers[1]; } - sum_halos_onto_grid(ini_boxes, previous_spin_temp, previous_ionize_box, halos, - &hbox_consts, grids, &averages_box); } - halobox_debug_print_avg(&averages_box, &averages_subsampler, &hbox_consts, M_min, - M_MAX_INTEGRAL); + halobox_debug_print_avg(grids, &hbox_consts, M_min, M_MAX_INTEGRAL); + if (astro_options_global->USE_MINI_HALOS) { + free(mturn_a_grid); + free(mturn_m_grid); + } // NOTE: the density-grid based calculations (!USE_HALO_FIELD) // use the cell-weighted average of the log10(Mturn) (see issue #369) - LOG_SUPER_DEBUG("log10 Mutrn ACG: log10 cell-weighted %.6e Halo-weighted %.6e", - pow(10, grids->log10_Mcrit_ACG_ave), averages_box.m_turn_acg); - LOG_SUPER_DEBUG("log10 Mutrn MCG: log10 cell-weighted %.6e Halo-weighted %.6e", - pow(10, grids->log10_Mcrit_MCG_ave), averages_box.m_turn_mcg); + LOG_SUPER_DEBUG("log10 Mutrn ACG: %.6e", pow(10, grids->log10_Mcrit_ACG_ave)); + LOG_SUPER_DEBUG("log10 Mutrn MCG: %.6e", pow(10, grids->log10_Mcrit_MCG_ave)); if (matter_options_global->USE_INTERPOLATION_TABLES > 0) { freeSigmaMInterpTable(); @@ -903,13 +703,14 @@ int ComputeHaloBox(double redshift, InitialConditions *ini_boxes, PerturbedField // test function for getting halo properties from the wrapper, can use a lot of memory for large // catalogs int test_halo_props(double redshift, float *vcb_grid, float *J21_LW_grid, float *z_re_grid, - float *Gamma12_ion_grid, int n_halos, float *halo_masses, float *halo_coords, - float *star_rng, float *sfr_rng, float *xray_rng, float *halo_props_out) { + float *Gamma12_ion_grid, unsigned long long int n_halos, float *halo_masses, + float *halo_coords, float *star_rng, float *sfr_rng, float *xray_rng, + float *halo_props_out) { int status; Try { // get parameters - struct ScalingConstants hbox_consts; + ScalingConstants hbox_consts; set_scaling_constants(redshift, &hbox_consts, true); print_sc_consts(&hbox_consts); @@ -931,7 +732,7 @@ int test_halo_props(double redshift, float *vcb_grid, float *J21_LW_grid, float double M_turn_r = 0.; double in_props[3], halo_pos[3]; - struct HaloProperties out_props; + HaloProperties out_props; #pragma omp for for (i_halo = 0; i_halo < n_halos; i_halo++) { diff --git a/src/py21cmfast/src/HaloBox.h b/src/py21cmfast/src/HaloBox.h index a81f77e25..9ed54c44e 100644 --- a/src/py21cmfast/src/HaloBox.h +++ b/src/py21cmfast/src/HaloBox.h @@ -8,10 +8,50 @@ #include "OutputStructs.h" #include "PerturbHaloField.h" #include "SpinTemperatureBox.h" +#include "scaling_relations.h" -// Compute the HaloBox Object -int ComputeHaloBox(double redshift, InitialConditions *ini_boxes, PerturbedField *perturbed_field, - PerturbHaloField *halos, TsBox *previous_spin_temp, - IonizedBox *previous_ionize_box, HaloBox *grids); +// struct holding each halo property we currently need. +// This is only used for both averages over the box/catalogues +// as well as an individual halo's properties +typedef struct HaloProperties { + double count; // from integral + double halo_mass; + double stellar_mass; + double halo_sfr; + double stellar_mass_mini; + double sfr_mini; + double fescweighted_sfr; + double n_ion; + double halo_xray; + double metallicity; + double m_turn_acg; + double m_turn_mcg; + double m_turn_reion; +} HaloProperties; + +// TODO: apply this constant struct to the EvaluateX functions in interp_tables.c, +// the integral_wrappers.c functions, and other places where the tables are called +// (probably not hmf.c) +typedef struct IntegralCondition { + double redshift; + double growth_factor; + double M_min; + double lnM_min; + double M_max; + double lnM_max; + double M_cell; + double lnM_cell; + double sigma_cell; +} IntegralCondition; + +void set_integral_constants(IntegralCondition *consts, double redshift, double M_min, double M_max, + double M_cell); + +int ComputeHaloBox(double redshift, InitialConditions *ini_boxes, PerturbHaloField *halos, + TsBox *previous_spin_temp, IonizedBox *previous_ionize_box, HaloBox *grids); + +void get_cell_integrals(double dens, double l10_mturn_a, double l10_mturn_m, + ScalingConstants *consts, IntegralCondition *int_consts, + HaloProperties *properties); #endif diff --git a/src/py21cmfast/src/IonisationBox.c b/src/py21cmfast/src/IonisationBox.c index e55f884a9..c441e4f86 100644 --- a/src/py21cmfast/src/IonisationBox.c +++ b/src/py21cmfast/src/IonisationBox.c @@ -54,7 +54,7 @@ struct IonBoxConstants { int hii_filter; // astro parameters - struct ScalingConstants scale_consts; + ScalingConstants scale_consts; double T_re; // astro calculated values @@ -135,7 +135,7 @@ void set_ionbox_constants(double redshift, double prev_redshift, struct IonBoxCo else consts->dz = prev_redshift - redshift; - struct ScalingConstants sc; + ScalingConstants sc; set_scaling_constants(redshift, &sc, true); consts->scale_consts = sc; @@ -447,7 +447,7 @@ void calculate_mcrit_boxes(IonizedBox *prev_ionbox, TsBox *spin_temp, InitialCon void set_mean_fcoll(struct IonBoxConstants *c, IonizedBox *prev_box, IonizedBox *curr_box, double mturn_acg, double mturn_mcg, double *f_limit_acg, double *f_limit_mcg) { double f_coll_curr = 0., f_coll_prev = 0., f_coll_curr_mini = 0., f_coll_prev_mini = 0.; - struct ScalingConstants *sc_ptr = &(c->scale_consts); + ScalingConstants *sc_ptr = &(c->scale_consts); if (astro_options_global->USE_MASS_DEPENDENT_ZETA) { f_coll_curr = Nion_General(c->redshift, c->lnMmin, c->lnMmax_gl, mturn_acg, sc_ptr); *f_limit_acg = Nion_General(simulation_options_global->Z_HEAT_MAX, c->lnMmin, c->lnMmax_gl, @@ -669,7 +669,7 @@ void setup_integration_tables(struct FilteredGrids *fg_struct, struct IonBoxCons double min_density, max_density, prev_min_density = 0., prev_max_density = 0.; double log10Mturn_min = 0., log10Mturn_max = 0., log10Mturn_min_MINI = 0., log10Mturn_max_MINI = 0.; - struct ScalingConstants *sc_ptr = &(consts->scale_consts); + ScalingConstants *sc_ptr = &(consts->scale_consts); // TODO: instead of putting a random upper limit, put a proper flag for switching of one/both // sides of the clipping @@ -741,7 +741,7 @@ void calculate_fcoll_grid(IonizedBox *box, IonizedBox *previous_ionize_box, double f_coll_total = 0., f_coll_MINI_total = 0.; // TODO: make proper error tracking through the parallel region bool error_flag; - struct ScalingConstants *sc_ptr = &(consts->scale_consts); + ScalingConstants *sc_ptr = &(consts->scale_consts); int fc_r_idx; fc_r_idx = (astro_options_global->USE_MINI_HALOS && !matter_options_global->USE_HALO_FIELD) diff --git a/src/py21cmfast/src/PerturbField.c b/src/py21cmfast/src/PerturbField.c index c3b2baf16..f0f4d563f 100644 --- a/src/py21cmfast/src/PerturbField.c +++ b/src/py21cmfast/src/PerturbField.c @@ -19,113 +19,7 @@ #include "filtering.h" #include "indexing.h" #include "logger.h" - -static inline void do_cic_interpolation(double *resampled_box, double pos[3], int box_dim[3], - double curr_dens) { - // get the CIC indices and distances - int ipos[3], iposp1[3]; - double dist[3]; - // NOTE: assumes the cell at idx == 0 is *centred* at (0,0,0) - for (int axis = 0; axis < 3; axis++) { - ipos[axis] = (int)floor(pos[axis]); - iposp1[axis] = ipos[axis] + 1; - dist[axis] = pos[axis] - ipos[axis]; - } - - wrap_coord(ipos, box_dim); - wrap_coord(iposp1, box_dim); - - unsigned long long int cic_indices[8] = { - grid_index_general(ipos[0], ipos[1], ipos[2], box_dim), - grid_index_general(iposp1[0], ipos[1], ipos[2], box_dim), - grid_index_general(ipos[0], iposp1[1], ipos[2], box_dim), - grid_index_general(iposp1[0], iposp1[1], ipos[2], box_dim), - grid_index_general(ipos[0], ipos[1], iposp1[2], box_dim), - grid_index_general(iposp1[0], ipos[1], iposp1[2], box_dim), - grid_index_general(ipos[0], iposp1[1], iposp1[2], box_dim), - grid_index_general(iposp1[0], iposp1[1], iposp1[2], box_dim)}; - - double cic_weights[8] = {(1. - dist[0]) * (1. - dist[1]) * (1. - dist[2]), - dist[0] * (1. - dist[1]) * (1. - dist[2]), - (1. - dist[0]) * dist[1] * (1. - dist[2]), - dist[0] * dist[1] * (1. - dist[2]), - (1. - dist[0]) * (1. - dist[1]) * dist[2], - dist[0] * (1. - dist[1]) * dist[2], - (1. - dist[0]) * dist[1] * dist[2], - dist[0] * dist[1] * dist[2]}; - - for (int i = 0; i < 8; i++) { -#pragma omp atomic update - resampled_box[cic_indices[i]] += curr_dens * cic_weights[i]; - } -} - -// Function that maps a IC density grid to the perturbed density grid -void move_grid_masses(double redshift, float *dens_pointer, int dens_dim[3], float *vel_pointers[3], - float *vel_pointers_2LPT[3], int vel_dim[3], double *resampled_box, - int out_dim[3]) { - // grid dimension constants - double boxlen = simulation_options_global->BOX_LEN; - double boxlen_z = boxlen * simulation_options_global->NON_CUBIC_FACTOR; - double box_size[3] = {boxlen, boxlen, boxlen_z}; - double dim_ratio_vel = (double)vel_dim[0] / (double)dens_dim[0]; - double dim_ratio_out = (double)out_dim[0] / (double)dens_dim[0]; - - // Setup IC velocity factors - double growth_factor = dicke(redshift); - double displacement_factor_2LPT = -(3.0 / 7.0) * growth_factor * growth_factor; // 2LPT eq. D8 - - double init_growth_factor = dicke(simulation_options_global->INITIAL_REDSHIFT); - double init_displacement_factor_2LPT = - -(3.0 / 7.0) * init_growth_factor * init_growth_factor; // 2LPT eq. D8 - - double velocity_displacement_factor[3] = { - (growth_factor - init_growth_factor) / box_size[0] * simulation_options_global->DIM, - (growth_factor - init_growth_factor) / box_size[1] * simulation_options_global->DIM, - (growth_factor - init_growth_factor) / box_size[2] * D_PARA}; - double velocity_displacement_factor_2LPT[3] = { - (displacement_factor_2LPT - init_displacement_factor_2LPT) / box_size[0] * - simulation_options_global->DIM, - (displacement_factor_2LPT - init_displacement_factor_2LPT) / box_size[1] * - simulation_options_global->DIM, - (displacement_factor_2LPT - init_displacement_factor_2LPT) / box_size[2] * D_PARA}; -#pragma omp parallel num_threads(simulation_options_global->N_THREADS) - { - int i, j, k, axis; - double pos[3], curr_dens; - int ipos[3]; - unsigned long long vel_index, dens_index; -#pragma omp for - for (i = 0; i < dens_dim[0]; i++) { - for (j = 0; j < dens_dim[1]; j++) { - for (k = 0; k < dens_dim[2]; k++) { - // Transform position to units of box size - pos[0] = i; - pos[1] = j; - pos[2] = k; - resample_index((int[3]){i, j, k}, dim_ratio_vel, ipos); - wrap_coord(ipos, vel_dim); - vel_index = grid_index_general(ipos[0], ipos[1], ipos[2], vel_dim); - for (axis = 0; axis < 3; axis++) { - pos[axis] += - vel_pointers[axis][vel_index] * velocity_displacement_factor[axis]; - // add 2LPT second order corrections - if (matter_options_global->PERTURB_ALGORITHM == 2) { - pos[axis] -= vel_pointers_2LPT[axis][vel_index] * - velocity_displacement_factor_2LPT[axis]; - } - pos[axis] *= dim_ratio_out; - } - - // CIC interpolation - dens_index = grid_index_general(i, j, k, dens_dim); - curr_dens = 1.0 + dens_pointer[dens_index] * init_growth_factor; - do_cic_interpolation(resampled_box, pos, out_dim, curr_dens); - } - } - } - } -} +#include "map_mass.h" void make_density_grid(float redshift, fftwf_complex *fft_density_grid, InitialConditions *boxes) { int i, j, k; diff --git a/src/py21cmfast/src/PerturbHaloField.c b/src/py21cmfast/src/PerturbHaloField.c index ca0747f21..0db4a7aed 100644 --- a/src/py21cmfast/src/PerturbHaloField.c +++ b/src/py21cmfast/src/PerturbHaloField.c @@ -94,7 +94,7 @@ int ComputePerturbHaloField(float redshift, InitialConditions *boxes, HaloField halos_perturbed->n_halos = halos->n_halos; // ****************** END INITIALIZATION ******************************** // - int n_exact_dim = 0; + unsigned long long int n_exact_dim = 0; bool error_in_parallel = false; #pragma omp parallel private(i_halo) num_threads(simulation_options_global -> N_THREADS) \ reduction(+ : n_exact_dim) @@ -136,7 +136,7 @@ int ComputePerturbHaloField(float redshift, InitialConditions *boxes, HaloField } } // Divide out multiplicative factor to return to pristine state - LOG_SUPER_DEBUG("Number of halos exactly on the box edge = %d of %d", n_exact_dim, + LOG_SUPER_DEBUG("Number of halos exactly on the box edge = %llu of %llu", n_exact_dim, halos->n_halos); if (error_in_parallel) { LOG_ERROR("Error in parallel processing, some halos were out of bounds."); diff --git a/src/py21cmfast/src/SpinTemperatureBox.c b/src/py21cmfast/src/SpinTemperatureBox.c index ea1e0ce6b..fbd86a14e 100644 --- a/src/py21cmfast/src/SpinTemperatureBox.c +++ b/src/py21cmfast/src/SpinTemperatureBox.c @@ -745,7 +745,7 @@ int UpdateXraySourceBox(HaloBox *halobox, double R_inner, double R_outer, int R_ // NOTE: Frequency integrals are based on PREVIOUS x_e_ave // The x_e tables are not regular, hence the precomputation of indices/interp points void fill_freqint_tables(double zp, double x_e_ave, double filling_factor_of_HI_zp, - double *log10_Mcrit_LW_ave, int R_mm, struct ScalingConstants *sc) { + double *log10_Mcrit_LW_ave, int R_mm, ScalingConstants *sc) { double lower_int_limit; int x_e_ct, R_ct; int R_start, R_end; @@ -867,7 +867,7 @@ int global_reion_properties(double zp, double x_e_ave, double *log10_Mcrit_LW_av double determine_zpp_max, determine_zpp_min; // at z', we need a differenc constant struct - struct ScalingConstants sc; + ScalingConstants sc; set_scaling_constants(zp, &sc, false); if (matter_options_global->USE_INTERPOLATION_TABLES > 1) { @@ -930,7 +930,7 @@ int global_reion_properties(double zp, double x_e_ave, double *log10_Mcrit_LW_av void calculate_sfrd_from_grid(int R_ct, float *dens_R_grid, float *Mcrit_R_grid, float *sfrd_grid, float *sfrd_grid_mini, double *ave_sfrd, double *ave_sfrd_mini, - struct ScalingConstants *sc) { + ScalingConstants *sc) { double ave_sfrd_buf = 0; double ave_sfrd_buf_mini = 0; if (astro_options_global->INTEGRATION_METHOD_ATOMIC == 1 || @@ -1458,7 +1458,7 @@ void ts_main(float redshift, float prev_redshift, float perturbed_field_redshift int R_index; float *delta_box_input; float *Mcrit_box_input = NULL; // may be unused - struct ScalingConstants sc, sc_sfrd; + ScalingConstants sc, sc_sfrd; // if we have stars, fill in the heating term boxes if (!NO_LIGHT) { diff --git a/src/py21cmfast/src/Stochasticity.c b/src/py21cmfast/src/Stochasticity.c index 3bf8645e1..b6cafda75 100644 --- a/src/py21cmfast/src/Stochasticity.c +++ b/src/py21cmfast/src/Stochasticity.c @@ -906,9 +906,9 @@ int sample_halo_grids(gsl_rng **rng_arr, double redshift, float *dens_field, nhalo_threads[threadnum] = count; } if (out_of_buffer) { - LOG_ERROR("Halo buffer overflow (allocated %d halos per thread)", arraysize_local); + LOG_ERROR("Halo buffer overflow (allocated %llu halos per thread)", arraysize_local); for (int n_t = 0; n_t < simulation_options_global->N_THREADS; n_t++) { - LOG_ERROR("Thread %d: %d halos", n_t, nhalo_threads[n_t]); + LOG_ERROR("Thread %d: %llu halos", n_t, nhalo_threads[n_t]); } LOG_ERROR( "If you expected to have an above average halo number try raising " @@ -1061,9 +1061,9 @@ int sample_halo_progenitors(gsl_rng **rng_arr, double z_in, double z_out, HaloFi nhalo_threads[threadnum] = count; } if (out_of_buffer) { - LOG_ERROR("Halo buffer overflow (allocated %d halos per thread)", arraysize_local); + LOG_ERROR("Halo buffer overflow (allocated %llu halos per thread)", arraysize_local); for (int n_t = 0; n_t < simulation_options_global->N_THREADS; n_t++) { - LOG_ERROR("Thread %d: %d halos", n_t, nhalo_threads[n_t]); + LOG_ERROR("Thread %d: %llu halos", n_t, nhalo_threads[n_t]); } LOG_ERROR( "If you expected to have an above average halo number try raising " diff --git a/src/py21cmfast/src/_functionprototypes_wrapper.h b/src/py21cmfast/src/_functionprototypes_wrapper.h index 4732dcfd4..0e4947ce5 100644 --- a/src/py21cmfast/src/_functionprototypes_wrapper.h +++ b/src/py21cmfast/src/_functionprototypes_wrapper.h @@ -25,9 +25,8 @@ int ComputeIonizedBox(float redshift, float prev_redshift, PerturbedField *pertu int ComputeBrightnessTemp(float redshift, TsBox *spin_temp, IonizedBox *ionized_box, PerturbedField *perturb_field, BrightnessTemp *box); -int ComputeHaloBox(double redshift, InitialConditions *ini_boxes, PerturbedField *perturbed_field, - PerturbHaloField *halos, TsBox *previous_spin_temp, - IonizedBox *previous_ionize_box, HaloBox *grids); +int ComputeHaloBox(double redshift, InitialConditions *ini_boxes, PerturbHaloField *halos, + TsBox *previous_spin_temp, IonizedBox *previous_ionize_box, HaloBox *grids); int UpdateXraySourceBox(HaloBox *halobox, double R_inner, double R_outer, int R_ct, XraySourceBox *source_box); diff --git a/src/py21cmfast/src/debugging.c b/src/py21cmfast/src/debugging.c index 71f4f0ea3..afeb5d794 100644 --- a/src/py21cmfast/src/debugging.c +++ b/src/py21cmfast/src/debugging.c @@ -147,8 +147,7 @@ void writeAstroParams(AstroParams *p) { " HII_EFF_FACTOR=%10.3e\n" " ION_Tvir_MIN=%10.3e\n" " X_RAY_Tvir_MIN=%10.3e\n", - p->HII_EFF_FACTOR, p->ION_Tvir_MIN, p->X_RAY_Tvir_MIN, p->R_BUBBLE_MAX, p->L_X, - p->NU_X_THRESH, p->X_RAY_SPEC_INDEX, p->F_STAR10, p->t_STAR); + p->HII_EFF_FACTOR, p->ION_Tvir_MIN, p->X_RAY_Tvir_MIN); } void writeAstroOptions(AstroOptions *p) { diff --git a/src/py21cmfast/src/heating_helper_progs.c b/src/py21cmfast/src/heating_helper_progs.c index 1552c1747..17bff67ea 100644 --- a/src/py21cmfast/src/heating_helper_progs.c +++ b/src/py21cmfast/src/heating_helper_progs.c @@ -857,7 +857,7 @@ typedef struct { double ion_eff; double ion_eff_MINI; double log10_Mturn_MINI; - struct ScalingConstants *scale_consts; + ScalingConstants *scale_consts; } tauX_params; double tauX_integrand_MINI(double zhat, void *params) { @@ -919,8 +919,7 @@ double tauX_integrand(double zhat, void *params) { return drpropdz * n * HI_filling_factor_zhat * sigma_tilde; } double tauX_MINI(double nu, double x_e, double x_e_ave, double zp, double zpp, - double HI_filling_factor_zp, double log10_Mturn_MINI, - struct ScalingConstants *sc) { + double HI_filling_factor_zp, double log10_Mturn_MINI, ScalingConstants *sc) { double result, error; gsl_function F; @@ -963,7 +962,7 @@ double tauX_MINI(double nu, double x_e, double x_e_ave, double zp, double zpp, } double tauX(double nu, double x_e, double x_e_ave, double zp, double zpp, - double HI_filling_factor_zp, struct ScalingConstants *sc) { + double HI_filling_factor_zp, ScalingConstants *sc) { double result, error, fcoll; gsl_function F; double rel_tol = 0.005; //<- relative tolerance @@ -1026,7 +1025,7 @@ typedef struct { double zpp; double HI_filling_factor_zp; double log10_Mturn_MINI; - struct ScalingConstants *scale_consts; + ScalingConstants *scale_consts; } nu_tau_one_params; double nu_tau_one_helper_MINI(double nu, void *params) { nu_tau_one_params *p = (nu_tau_one_params *)params; @@ -1039,7 +1038,7 @@ double nu_tau_one_helper(double nu, void *params) { return tauX(nu, p->x_e, p->x_e, p->zp, p->zpp, p->HI_filling_factor_zp, p->scale_consts) - 1; } double nu_tau_one_MINI(double zp, double zpp, double x_e, double HI_filling_factor_zp, - double log10_Mturn_MINI, struct ScalingConstants *sc) { + double log10_Mturn_MINI, ScalingConstants *sc) { int status, iter, max_iter; const gsl_root_fsolver_type *T; gsl_root_fsolver *s; @@ -1107,7 +1106,7 @@ double nu_tau_one_MINI(double zp, double zpp, double x_e, double HI_filling_fact } double nu_tau_one(double zp, double zpp, double x_e, double HI_filling_factor_zp, - struct ScalingConstants *sc) { + ScalingConstants *sc) { int status, iter, max_iter; const gsl_root_fsolver_type *T; gsl_root_fsolver *s; diff --git a/src/py21cmfast/src/heating_helper_progs.h b/src/py21cmfast/src/heating_helper_progs.h index cbd0f1c08..3f6fb770d 100644 --- a/src/py21cmfast/src/heating_helper_progs.h +++ b/src/py21cmfast/src/heating_helper_progs.h @@ -45,9 +45,9 @@ double Energy_Lya_heating(double Tk, double Ts, double tau_gp, int flag); // rootfind to get the distance at which GP optical depth tau==1 double nu_tau_one_MINI(double zp, double zpp, double x_e, double HI_filling_factor_zp, - double log10_Mturn_MINI, struct ScalingConstants *sc); + double log10_Mturn_MINI, ScalingConstants *sc); double nu_tau_one(double zp, double zpp, double x_e, double HI_filling_factor_zp, - struct ScalingConstants *sc); + ScalingConstants *sc); // xray heating integrals over frequency double integrate_over_nu(double zp, double local_x_e, double lower_int_limit, int FLAG); diff --git a/src/py21cmfast/src/hmf.c b/src/py21cmfast/src/hmf.c index ccac26017..4ec577e49 100644 --- a/src/py21cmfast/src/hmf.c +++ b/src/py21cmfast/src/hmf.c @@ -842,7 +842,7 @@ double Fcoll_General(double z, double lnM_min, double lnM_max) { } double Nion_General(double z, double lnM_Min, double lnM_Max, double MassTurnover, - struct ScalingConstants *sc) { + ScalingConstants *sc) { struct parameters_gsl_MF_integrals params = { .redshift = z, .growthf = dicke(z), @@ -860,7 +860,7 @@ double Nion_General(double z, double lnM_Min, double lnM_Max, double MassTurnove } double Nion_General_MINI(double z, double lnM_Min, double lnM_Max, double MassTurnover, - struct ScalingConstants *sc) { + ScalingConstants *sc) { struct parameters_gsl_MF_integrals params = { .redshift = z, .growthf = dicke(z), @@ -879,7 +879,7 @@ double Nion_General_MINI(double z, double lnM_Min, double lnM_Max, double MassTu } double Xray_General(double z, double lnM_Min, double lnM_Max, double mturn_acg, double mturn_mcg, - struct ScalingConstants *sc) { + ScalingConstants *sc) { // NOTE:in the _General functions, we don't use the scaling relation constants // that are z-dependent so we can evaluate them at multiple redshifts without redoing the // constants @@ -954,7 +954,7 @@ double Mcoll_Conditional(double growthf, double lnM1, double lnM2, double lnM_co double Nion_ConditionalM_MINI(double growthf, double lnM1, double lnM2, double lnM_cond, double sigma2, double delta2, double MassTurnover, - struct ScalingConstants *sc, int method) { + ScalingConstants *sc, int method) { struct parameters_gsl_MF_integrals params = { .growthf = growthf, .Mturn_mcg = MassTurnover, @@ -992,8 +992,7 @@ double Nion_ConditionalM_MINI(double growthf, double lnM1, double lnM2, double l } double Nion_ConditionalM(double growthf, double lnM1, double lnM2, double lnM_cond, double sigma2, - double delta2, double MassTurnover, struct ScalingConstants *sc, - int method) { + double delta2, double MassTurnover, ScalingConstants *sc, int method) { struct parameters_gsl_MF_integrals params = { .growthf = growthf, .Mturn_acg = MassTurnover, @@ -1029,7 +1028,7 @@ double Nion_ConditionalM(double growthf, double lnM1, double lnM2, double lnM_co double Xray_ConditionalM(double redshift, double growthf, double lnM1, double lnM2, double lnM_cond, double sigma2, double delta2, double mturn_acg, double mturn_mcg, - struct ScalingConstants *sc, int method) { + ScalingConstants *sc, int method) { // re-using escape fraction for minihalo parameters struct parameters_gsl_MF_integrals params = { .redshift = redshift, diff --git a/src/py21cmfast/src/hmf.h b/src/py21cmfast/src/hmf.h index 57796e592..454720379 100644 --- a/src/py21cmfast/src/hmf.h +++ b/src/py21cmfast/src/hmf.h @@ -14,11 +14,11 @@ void initialise_GL(double lnM_Min, double lnM_Max); double Nion_General(double z, double lnM_Min, double lnM_Max, double MassTurnover, - struct ScalingConstants *sc); + ScalingConstants *sc); double Nion_General_MINI(double z, double lnM_Min, double lnM_Max, double MassTurnover, - struct ScalingConstants *sc); + ScalingConstants *sc); double Xray_General(double z, double lnM_Min, double lnM_Max, double mturn_acg, double mturn_mcg, - struct ScalingConstants *sc); + ScalingConstants *sc); double Fcoll_General(double z, double lnM_min, double lnM_max); double Nhalo_General(double z, double lnM_min, double lnM_max); @@ -28,13 +28,12 @@ double Mcoll_Conditional(double growthf, double lnM1, double lnM2, double lnM_co double delta, int method); double Nion_ConditionalM_MINI(double growthf, double lnM1, double lnM2, double lnM_cond, double sigma2, double delta2, double MassTurnover, - struct ScalingConstants *sc, int method); + ScalingConstants *sc, int method); double Nion_ConditionalM(double growthf, double lnM1, double lnM2, double lnM_cond, double sigma2, - double delta2, double MassTurnover, struct ScalingConstants *sc, - int method); + double delta2, double MassTurnover, ScalingConstants *sc, int method); double Xray_ConditionalM(double redshift, double growthf, double lnM1, double lnM2, double lnM_cond, double sigma2, double delta2, double mturn_acg, double mturn_mcg, - struct ScalingConstants *sc, int method); + ScalingConstants *sc, int method); double unconditional_hmf(double growthf, double lnM, double z, int HMF); double conditional_hmf(double growthf, double lnM, double delta_cond, double sigma_cond, int HMF); diff --git a/src/py21cmfast/src/integral_wrappers.c b/src/py21cmfast/src/integral_wrappers.c index 4481dc98a..a25f5c759 100644 --- a/src/py21cmfast/src/integral_wrappers.c +++ b/src/py21cmfast/src/integral_wrappers.c @@ -116,7 +116,7 @@ void get_global_SFRD_z(int n_redshift, double *redshifts, double *log10_turnover if (matter_options_global->USE_INTERPOLATION_TABLES > 0) initialiseSigmaMInterpTable(M_min, 1e20); - struct ScalingConstants sc; + ScalingConstants sc; set_scaling_constants(redshifts[0], &sc, false); int i; @@ -146,7 +146,7 @@ void get_global_Nion_z(int n_redshift, double *redshifts, double *log10_turnover if (matter_options_global->USE_INTERPOLATION_TABLES > 0) initialiseSigmaMInterpTable(M_min, 1e20); - struct ScalingConstants sc; + ScalingConstants sc; set_scaling_constants(redshifts[0], &sc, false); int i; @@ -217,7 +217,7 @@ void get_conditional_SFRD(double redshift, double R, int n_densities, double *de astro_options_global->INTEGRATION_METHOD_MINI == 1)) initialise_GL(log(M_min), log(M_cond)); - struct ScalingConstants sc; + ScalingConstants sc; set_scaling_constants(redshift, &sc, false); int i; @@ -260,7 +260,7 @@ void get_conditional_Nion(double redshift, double R, int n_densities, double *de astro_options_global->INTEGRATION_METHOD_MINI == 1)) initialise_GL(log(M_min), log(M_cond)); - struct ScalingConstants sc; + ScalingConstants sc; set_scaling_constants(redshift, &sc, false); int i; @@ -315,7 +315,7 @@ void get_conditional_Xray(double redshift, double R, int n_densities, double *de astro_options_global->INTEGRATION_METHOD_MINI == 1)) initialise_GL(log(M_min), log(M_cond)); - struct ScalingConstants sc; + ScalingConstants sc; set_scaling_constants(redshift, &sc, false); int i; diff --git a/src/py21cmfast/src/interp_tables.c b/src/py21cmfast/src/interp_tables.c index 88452b10e..16a068dce 100644 --- a/src/py21cmfast/src/interp_tables.c +++ b/src/py21cmfast/src/interp_tables.c @@ -92,7 +92,7 @@ static RGTable1D_f dSigmasqdm_InterpTable = { // NOTE: this table is initialised for up to N_redshift x N_Mturn, but only called N_filter times to // assign ST_over_PS in Spintemp. // It may be better to just do the integrals at each R -void initialise_SFRD_spline(int Nbin, float zmin, float zmax, struct ScalingConstants *sc) { +void initialise_SFRD_spline(int Nbin, float zmin, float zmax, ScalingConstants *sc) { int i, j; double Mmax = M_MAX_INTEGRAL; double lnMmax = log(Mmax); @@ -117,7 +117,7 @@ void initialise_SFRD_spline(int Nbin, float zmin, float zmax, struct ScalingCons #pragma omp parallel private(i, j) num_threads(simulation_options_global -> N_THREADS) { - struct ScalingConstants sc_sfrd; + ScalingConstants sc_sfrd; sc_sfrd = evolve_scaling_constants_sfr(sc); double mturn_mcg; double lnMmin; @@ -159,7 +159,7 @@ void initialise_SFRD_spline(int Nbin, float zmin, float zmax, struct ScalingCons // Unlike the SFRD spline, this one is used more due to the nu_tau_one() rootfind // although still ignores reionisation feedback -void initialise_Nion_Ts_spline(int Nbin, float zmin, float zmax, struct ScalingConstants *sc) { +void initialise_Nion_Ts_spline(int Nbin, float zmin, float zmax, ScalingConstants *sc) { int i, j; double Mmax = M_MAX_INTEGRAL; double lnMmax = log(Mmax); @@ -183,7 +183,7 @@ void initialise_Nion_Ts_spline(int Nbin, float zmin, float zmax, struct ScalingC #pragma omp parallel private(i, j) num_threads(simulation_options_global -> N_THREADS) { - struct ScalingConstants sc_z; + ScalingConstants sc_z; double mturn_mcg; double z_val; double lnMmin; @@ -290,7 +290,7 @@ void initialise_Nion_Conditional_spline(double z, double min_density, double max double Mmin, double Mmax, double Mcond, double log10Mturn_min, double log10Mturn_max, double log10Mturn_min_MINI, double log10Mturn_max_MINI, - struct ScalingConstants *sc, bool prev) { + ScalingConstants *sc, bool prev) { int i, j; double overdense_table[NDELTA]; double mturns[NMTURN], mturns_MINI[NMTURN]; @@ -412,7 +412,7 @@ void initialise_Nion_Conditional_spline(double z, double min_density, double max // This function initialises one table, for table Rx arrays I will call this function in a loop void initialise_SFRD_Conditional_table(double z, double min_density, double max_density, double Mmin, double Mmax, double Mcond, - struct ScalingConstants *sc) { + ScalingConstants *sc) { float sigma2; int i, k; @@ -449,7 +449,7 @@ void initialise_SFRD_Conditional_table(double z, double min_density, double max_ SFRD_conditional_table_MINI.y_width = (LOG10_MTURN_MAX - LOG10_MTURN_MIN) / (NMTURN - 1.); } - struct ScalingConstants sc_sfrd = evolve_scaling_constants_sfr(sc); + ScalingConstants sc_sfrd = evolve_scaling_constants_sfr(sc); #pragma omp parallel private(i, k) num_threads(simulation_options_global -> N_THREADS) { @@ -494,7 +494,7 @@ void initialise_SFRD_Conditional_table(double z, double min_density, double max_ // This function initialises one table, for table Rx arrays I will call this function in a loop void initialise_Xray_Conditional_table(double redshift, double min_density, double max_density, double Mmin, double Mmax, double Mcond, - struct ScalingConstants *sc) { + ScalingConstants *sc) { int i, k; LOG_SUPER_DEBUG("Initialising Xray conditional table at mass %.2e from delta %.2e to %.2e", @@ -884,7 +884,7 @@ void free_global_tables() { // JD: moving the interp table evaluations here since some of them are needed in nu_tau_one // NOTE: with !USE_MASS_DEPENDENT_ZETA both EvaluateNionTs and EvaluateSFRD return Fcoll -double EvaluateNionTs(double redshift, struct ScalingConstants *sc) { +double EvaluateNionTs(double redshift, ScalingConstants *sc) { // differences in turnover are handled by table setup if (matter_options_global->USE_INTERPOLATION_TABLES > 1) { if (astro_options_global->USE_MASS_DEPENDENT_ZETA) @@ -898,7 +898,7 @@ double EvaluateNionTs(double redshift, struct ScalingConstants *sc) { double lnMmin = log(minimum_source_mass(redshift, true)); double lnMmax = log(M_MAX_INTEGRAL); - struct ScalingConstants sc_z = evolve_scaling_constants_to_redshift(redshift, sc, false); + ScalingConstants sc_z = evolve_scaling_constants_to_redshift(redshift, sc, false); // minihalos uses a different turnover mass if (astro_options_global->USE_MASS_DEPENDENT_ZETA) @@ -907,19 +907,18 @@ double EvaluateNionTs(double redshift, struct ScalingConstants *sc) { return Fcoll_General(redshift, lnMmin, lnMmax); } -double EvaluateNionTs_MINI(double redshift, double log10_Mturn_LW_ave, - struct ScalingConstants *sc) { +double EvaluateNionTs_MINI(double redshift, double log10_Mturn_LW_ave, ScalingConstants *sc) { if (matter_options_global->USE_INTERPOLATION_TABLES > 1) { return EvaluateRGTable2D(redshift, log10_Mturn_LW_ave, &Nion_z_table_MINI); } double lnMmin = log(minimum_source_mass(redshift, true)); double lnMmax = log(M_MAX_INTEGRAL); - struct ScalingConstants sc_z = evolve_scaling_constants_to_redshift(redshift, sc, false); + ScalingConstants sc_z = evolve_scaling_constants_to_redshift(redshift, sc, false); return Nion_General_MINI(redshift, lnMmin, lnMmax, pow(10., log10_Mturn_LW_ave), &sc_z); } -double EvaluateSFRD(double redshift, struct ScalingConstants *sc) { +double EvaluateSFRD(double redshift, ScalingConstants *sc) { if (matter_options_global->USE_INTERPOLATION_TABLES > 1) { if (astro_options_global->USE_MASS_DEPENDENT_ZETA) return EvaluateRGTable1D(redshift, &SFRD_z_table); @@ -934,7 +933,7 @@ double EvaluateSFRD(double redshift, struct ScalingConstants *sc) { // The SFRD calls the same function as N_ion but sets escape fractions to unity // NOTE: since this only occurs on integration, the struct copy shouldn't be a bottleneck - struct ScalingConstants sc_sfrd = evolve_scaling_constants_sfr(sc); + ScalingConstants sc_sfrd = evolve_scaling_constants_sfr(sc); sc_sfrd = evolve_scaling_constants_to_redshift(redshift, &sc_sfrd, false); if (astro_options_global->USE_MASS_DEPENDENT_ZETA) @@ -942,7 +941,7 @@ double EvaluateSFRD(double redshift, struct ScalingConstants *sc) { return Fcoll_General(redshift, lnMmin, lnMmax); } -double EvaluateSFRD_MINI(double redshift, double log10_Mturn_LW_ave, struct ScalingConstants *sc) { +double EvaluateSFRD_MINI(double redshift, double log10_Mturn_LW_ave, ScalingConstants *sc) { if (matter_options_global->USE_INTERPOLATION_TABLES > 1) { return EvaluateRGTable2D(redshift, log10_Mturn_LW_ave, &SFRD_z_table_MINI); } @@ -950,19 +949,19 @@ double EvaluateSFRD_MINI(double redshift, double log10_Mturn_LW_ave, struct Scal double lnMmin = log(minimum_source_mass(redshift, true)); double lnMmax = log(M_MAX_INTEGRAL); - struct ScalingConstants sc_sfrd = evolve_scaling_constants_sfr(sc); + ScalingConstants sc_sfrd = evolve_scaling_constants_sfr(sc); sc_sfrd = evolve_scaling_constants_to_redshift(redshift, &sc_sfrd, false); return Nion_General_MINI(redshift, lnMmin, lnMmax, pow(10., log10_Mturn_LW_ave), &sc_sfrd); } double EvaluateSFRD_Conditional(double delta, double growthf, double M_min, double M_max, - double M_cond, double sigma_max, struct ScalingConstants *sc) { + double M_cond, double sigma_max, ScalingConstants *sc) { if (matter_options_global->USE_INTERPOLATION_TABLES > 1) { return exp(EvaluateRGTable1D_f(delta, &SFRD_conditional_table)); } - struct ScalingConstants sc_sfrd = evolve_scaling_constants_sfr(sc); + ScalingConstants sc_sfrd = evolve_scaling_constants_sfr(sc); // SFRD in Ts assumes no (reion) feedback on ACG return Nion_ConditionalM(growthf, log(M_min), log(M_max), log(M_cond), sigma_max, delta, sc_sfrd.mturn_a_nofb, &sc_sfrd, @@ -971,20 +970,20 @@ double EvaluateSFRD_Conditional(double delta, double growthf, double M_min, doub double EvaluateSFRD_Conditional_MINI(double delta, double log10Mturn_m, double growthf, double M_min, double M_max, double M_cond, double sigma_max, - struct ScalingConstants *sc) { + ScalingConstants *sc) { if (matter_options_global->USE_INTERPOLATION_TABLES > 1) { return exp(EvaluateRGTable2D_f(delta, log10Mturn_m, &SFRD_conditional_table_MINI)); } - struct ScalingConstants sc_sfrd = evolve_scaling_constants_sfr(sc); + ScalingConstants sc_sfrd = evolve_scaling_constants_sfr(sc); return Nion_ConditionalM_MINI(growthf, log(M_min), log(M_max), log(M_cond), sigma_max, delta, pow(10, log10Mturn_m), &sc_sfrd, astro_options_global->INTEGRATION_METHOD_MINI); } double EvaluateNion_Conditional(double delta, double log10Mturn, double growthf, double M_min, - double M_max, double M_cond, double sigma_max, - struct ScalingConstants *sc, bool prev) { + double M_max, double M_cond, double sigma_max, ScalingConstants *sc, + bool prev) { RGTable2D_f *table = prev ? &Nion_conditional_table_prev : &Nion_conditional_table2D; if (matter_options_global->USE_INTERPOLATION_TABLES > 1) { if (astro_options_global->USE_MINI_HALOS) @@ -1001,7 +1000,7 @@ double EvaluateNion_Conditional(double delta, double log10Mturn, double growthf, double EvaluateNion_Conditional_MINI(double delta, double log10Mturn_m, double growthf, double M_min, double M_max, double M_cond, double sigma_max, - struct ScalingConstants *sc, bool prev) { + ScalingConstants *sc, bool prev) { RGTable2D_f *table = prev ? &Nion_conditional_table_MINI_prev : &Nion_conditional_table_MINI; if (matter_options_global->USE_INTERPOLATION_TABLES > 1) { return exp(EvaluateRGTable2D_f(delta, log10Mturn_m, table)); @@ -1014,7 +1013,7 @@ double EvaluateNion_Conditional_MINI(double delta, double log10Mturn_m, double g double EvaluateXray_Conditional(double delta, double log10Mturn_m, double redshift, double growthf, double M_min, double M_max, double M_cond, double sigma_max, - struct ScalingConstants *sc) { + ScalingConstants *sc) { if (matter_options_global->USE_INTERPOLATION_TABLES > 1) { if (astro_options_global->USE_MINI_HALOS) return exp(EvaluateRGTable2D_f(delta, log10Mturn_m, &Xray_conditional_table_2D)); diff --git a/src/py21cmfast/src/interp_tables.h b/src/py21cmfast/src/interp_tables.h index 0fe068a49..0b3cb1bb7 100644 --- a/src/py21cmfast/src/interp_tables.h +++ b/src/py21cmfast/src/interp_tables.h @@ -8,13 +8,13 @@ // all down the chain, so we broadcast them // TODO: in future it would be better to use a context struct. See `HaloBox.c` -void initialise_SFRD_spline(int Nbin, float zmin, float zmax, struct ScalingConstants *sc); -double EvaluateSFRD(double redshift, struct ScalingConstants *sc); -double EvaluateSFRD_MINI(double redshift, double log10_Mturn_LW_ave, struct ScalingConstants *sc); +void initialise_SFRD_spline(int Nbin, float zmin, float zmax, ScalingConstants *sc); +double EvaluateSFRD(double redshift, ScalingConstants *sc); +double EvaluateSFRD_MINI(double redshift, double log10_Mturn_LW_ave, ScalingConstants *sc); -void initialise_Nion_Ts_spline(int Nbin, float zmin, float zmax, struct ScalingConstants *sc); -double EvaluateNionTs(double redshift, struct ScalingConstants *sc); -double EvaluateNionTs_MINI(double redshift, double log10_Mturn_LW_ave, struct ScalingConstants *sc); +void initialise_Nion_Ts_spline(int Nbin, float zmin, float zmax, ScalingConstants *sc); +double EvaluateNionTs(double redshift, ScalingConstants *sc); +double EvaluateNionTs_MINI(double redshift, double log10_Mturn_LW_ave, ScalingConstants *sc); void initialise_FgtrM_delta_table(double min_dens, double max_dens, double zpp, double growth_zpp, double smin_zpp, double smax_zpp); @@ -27,27 +27,27 @@ void initialise_Nion_Conditional_spline(double z, double min_density, double max double Mmin, double Mmax, double Mcond, double log10Mturn_min, double log10Mturn_max, double log10Mturn_min_MINI, double log10Mturn_max_MINI, - struct ScalingConstants *sc, bool prev); + ScalingConstants *sc, bool prev); double EvaluateNion_Conditional(double delta, double log10Mturn, double growthf, double M_min, - double M_max, double M_cond, double sigma_max, - struct ScalingConstants *sc, bool prev); + double M_max, double M_cond, double sigma_max, ScalingConstants *sc, + bool prev); double EvaluateNion_Conditional_MINI(double delta, double log10Mturn_m, double growthf, double M_min, double M_max, double M_cond, double sigma_max, - struct ScalingConstants *sc, bool prev); + ScalingConstants *sc, bool prev); void initialise_Xray_Conditional_table(double redshift, double min_density, double max_density, double Mmin, double Mmax, double Mcond, - struct ScalingConstants *sc); + ScalingConstants *sc); double EvaluateXray_Conditional(double delta, double log10Mturn_m, double redshift, double growthf, double M_min, double M_max, double M_cond, double sigma_max, - struct ScalingConstants *sc); + ScalingConstants *sc); void initialise_SFRD_Conditional_table(double z, double min_density, double max_density, double Mmin, double Mmax, double Mcond, - struct ScalingConstants *sc); + ScalingConstants *sc); double EvaluateSFRD_Conditional(double delta, double growthf, double M_min, double M_max, - double M_cond, double sigma_max, struct ScalingConstants *sc); + double M_cond, double sigma_max, ScalingConstants *sc); double EvaluateSFRD_Conditional_MINI(double delta, double log10Mturn_m, double growthf, double M_min, double M_max, double M_cond, double sigma_max, - struct ScalingConstants *sc); + ScalingConstants *sc); void initialise_dNdM_tables(double xmin, double xmax, double ymin, double ymax, double growth1, double param, bool from_catalog); diff --git a/src/py21cmfast/src/map_mass.c b/src/py21cmfast/src/map_mass.c new file mode 100644 index 000000000..64a17af4f --- /dev/null +++ b/src/py21cmfast/src/map_mass.c @@ -0,0 +1,280 @@ +// Functions in this file map units of mass from Lagrangian (IC) +// coordinates to their real (Eulerian) Locations, these can sum +// masses or galaxy properties from grids or from coordinate catalogues + +#include "map_mass.h" + +#include +#include +#include +#include + +#include "Constants.h" +#include "HaloBox.h" +#include "InputParameters.h" +#include "cosmology.h" +#include "indexing.h" + +#define do_cic_interpolation(arr, ...) \ + _Generic((arr), float *: do_cic_interpolation_float, double *: do_cic_interpolation_double)( \ + arr, __VA_ARGS__) + +static inline void do_cic_interpolation_double(double *resampled_box, double pos[3], int box_dim[3], + double curr_dens) { + // get the CIC indices and distances + int ipos[3], iposp1[3]; + double dist[3]; + // NOTE: assumes the cell at idx == 0 is *centred* at (0,0,0) + for (int axis = 0; axis < 3; axis++) { + ipos[axis] = (int)floor(pos[axis]); + iposp1[axis] = ipos[axis] + 1; + dist[axis] = pos[axis] - ipos[axis]; + } + + wrap_coord(ipos, box_dim); + wrap_coord(iposp1, box_dim); + + unsigned long long int cic_indices[8] = { + grid_index_general(ipos[0], ipos[1], ipos[2], box_dim), + grid_index_general(iposp1[0], ipos[1], ipos[2], box_dim), + grid_index_general(ipos[0], iposp1[1], ipos[2], box_dim), + grid_index_general(iposp1[0], iposp1[1], ipos[2], box_dim), + grid_index_general(ipos[0], ipos[1], iposp1[2], box_dim), + grid_index_general(iposp1[0], ipos[1], iposp1[2], box_dim), + grid_index_general(ipos[0], iposp1[1], iposp1[2], box_dim), + grid_index_general(iposp1[0], iposp1[1], iposp1[2], box_dim)}; + + double cic_weights[8] = {(1. - dist[0]) * (1. - dist[1]) * (1. - dist[2]), + dist[0] * (1. - dist[1]) * (1. - dist[2]), + (1. - dist[0]) * dist[1] * (1. - dist[2]), + dist[0] * dist[1] * (1. - dist[2]), + (1. - dist[0]) * (1. - dist[1]) * dist[2], + dist[0] * (1. - dist[1]) * dist[2], + (1. - dist[0]) * dist[1] * dist[2], + dist[0] * dist[1] * dist[2]}; + + for (int i = 0; i < 8; i++) { +#pragma omp atomic update + resampled_box[cic_indices[i]] += curr_dens * cic_weights[i]; + } +} + +// Identical code as above, using a single precision output +static inline void do_cic_interpolation_float(float *resampled_box, double pos[3], int box_dim[3], + double curr_dens) { + // get the CIC indices and distances + int ipos[3], iposp1[3]; + double dist[3]; + // NOTE: assumes the cell at idx == 0 is *centred* at (0,0,0) + for (int axis = 0; axis < 3; axis++) { + ipos[axis] = (int)floor(pos[axis]); + iposp1[axis] = ipos[axis] + 1; + dist[axis] = pos[axis] - ipos[axis]; + } + + wrap_coord(ipos, box_dim); + wrap_coord(iposp1, box_dim); + + unsigned long long int cic_indices[8] = { + grid_index_general(ipos[0], ipos[1], ipos[2], box_dim), + grid_index_general(iposp1[0], ipos[1], ipos[2], box_dim), + grid_index_general(ipos[0], iposp1[1], ipos[2], box_dim), + grid_index_general(iposp1[0], iposp1[1], ipos[2], box_dim), + grid_index_general(ipos[0], ipos[1], iposp1[2], box_dim), + grid_index_general(iposp1[0], ipos[1], iposp1[2], box_dim), + grid_index_general(ipos[0], iposp1[1], iposp1[2], box_dim), + grid_index_general(iposp1[0], iposp1[1], iposp1[2], box_dim)}; + + double cic_weights[8] = {(1. - dist[0]) * (1. - dist[1]) * (1. - dist[2]), + dist[0] * (1. - dist[1]) * (1. - dist[2]), + (1. - dist[0]) * dist[1] * (1. - dist[2]), + dist[0] * dist[1] * (1. - dist[2]), + (1. - dist[0]) * (1. - dist[1]) * dist[2], + dist[0] * (1. - dist[1]) * dist[2], + (1. - dist[0]) * dist[1] * dist[2], + dist[0] * dist[1] * dist[2]}; + + for (int i = 0; i < 8; i++) { +#pragma omp atomic update + resampled_box[cic_indices[i]] += curr_dens * cic_weights[i]; + } +} + +// Function that maps a IC density grid to the perturbed density grid +void move_grid_masses(double redshift, float *dens_pointer, int dens_dim[3], float *vel_pointers[3], + float *vel_pointers_2LPT[3], int vel_dim[3], double *resampled_box, + int out_dim[3]) { + // grid dimension constants + double boxlen = simulation_options_global->BOX_LEN; + double boxlen_z = boxlen * simulation_options_global->NON_CUBIC_FACTOR; + double box_size[3] = {boxlen, boxlen, boxlen_z}; + double dim_ratio_vel = (double)vel_dim[0] / (double)dens_dim[0]; + double dim_ratio_out = (double)out_dim[0] / (double)dens_dim[0]; + + // Setup IC velocity factors + double growth_factor = dicke(redshift); + double displacement_factor_2LPT = -(3.0 / 7.0) * growth_factor * growth_factor; // 2LPT eq. D8 + + double init_growth_factor = dicke(simulation_options_global->INITIAL_REDSHIFT); + double init_displacement_factor_2LPT = + -(3.0 / 7.0) * init_growth_factor * init_growth_factor; // 2LPT eq. D8 + + double velocity_displacement_factor[3] = { + (growth_factor - init_growth_factor) / box_size[0] * dens_dim[0], + (growth_factor - init_growth_factor) / box_size[1] * dens_dim[1], + (growth_factor - init_growth_factor) / box_size[2] * dens_dim[2]}; + double velocity_displacement_factor_2LPT[3] = { + (displacement_factor_2LPT - init_displacement_factor_2LPT) / box_size[0] * dens_dim[0], + (displacement_factor_2LPT - init_displacement_factor_2LPT) / box_size[1] * dens_dim[1], + (displacement_factor_2LPT - init_displacement_factor_2LPT) / box_size[2] * dens_dim[2]}; +#pragma omp parallel num_threads(simulation_options_global->N_THREADS) + { + int i, j, k, axis; + double pos[3], curr_dens; + int ipos[3]; + unsigned long long vel_index, dens_index; +#pragma omp for + for (i = 0; i < dens_dim[0]; i++) { + for (j = 0; j < dens_dim[1]; j++) { + for (k = 0; k < dens_dim[2]; k++) { + // Transform position to units of box size + pos[0] = i; + pos[1] = j; + pos[2] = k; + resample_index((int[3]){i, j, k}, dim_ratio_vel, ipos); + wrap_coord(ipos, vel_dim); + vel_index = grid_index_general(ipos[0], ipos[1], ipos[2], vel_dim); + for (axis = 0; axis < 3; axis++) { + pos[axis] += + vel_pointers[axis][vel_index] * velocity_displacement_factor[axis]; + // add 2LPT second order corrections + if (matter_options_global->PERTURB_ALGORITHM == 2) { + pos[axis] -= vel_pointers_2LPT[axis][vel_index] * + velocity_displacement_factor_2LPT[axis]; + } + pos[axis] *= dim_ratio_out; + } + + // CIC interpolation + dens_index = grid_index_general(i, j, k, dens_dim); + curr_dens = 1.0 + dens_pointer[dens_index] * init_growth_factor; + do_cic_interpolation(resampled_box, pos, out_dim, curr_dens); + } + } + } + } +} + +// Function that maps a IC density grid to the perturbed density grid +// TODO: This shares a lot of code with move_grid_masses and (future) move_cat_galprops. +// I should move these functions to a MapMass.c file (like the GPU build) which contains all the +// mapping functions and specifies initialisation for the below constants. Since the differences +// are on the innermost loops, any generalisation is likely to slow things down. +void move_grid_galprops(double redshift, float *dens_pointer, int dens_dim[3], + float *vel_pointers[3], float *vel_pointers_2LPT[3], int vel_dim[3], + HaloBox *boxes, int out_dim[3], float *mturn_a_grid, float *mturn_m_grid, + ScalingConstants *consts, IntegralCondition *integral_cond) { + // grid dimension constants + double boxlen = simulation_options_global->BOX_LEN; + double boxlen_z = boxlen * simulation_options_global->NON_CUBIC_FACTOR; + double box_size[3] = {boxlen, boxlen, boxlen_z}; + double dim_ratio_vel = (double)vel_dim[0] / (double)dens_dim[0]; + double dim_ratio_out = (double)out_dim[0] / (double)dens_dim[0]; + + double prefactor_mass = RHOcrit * cosmo_params_global->OMm; + double prefactor_stars = RHOcrit * cosmo_params_global->OMb * consts->fstar_10; + double prefactor_stars_mini = RHOcrit * cosmo_params_global->OMb * consts->fstar_7; + double prefactor_sfr = prefactor_stars / consts->t_star / consts->t_h; + double prefactor_sfr_mini = prefactor_stars_mini / consts->t_star / consts->t_h; + double prefactor_nion = prefactor_stars * consts->fesc_10 * consts->pop2_ion; + double prefactor_nion_mini = prefactor_stars_mini * consts->fesc_7 * consts->pop3_ion; + double prefactor_xray = RHOcrit * cosmo_params_global->OMm; + + // Setup IC velocity factors + double growth_factor = dicke(redshift); + double displacement_factor_2LPT = -(3.0 / 7.0) * growth_factor * growth_factor; // 2LPT eq. D8 + + double init_growth_factor = dicke(simulation_options_global->INITIAL_REDSHIFT); + double init_displacement_factor_2LPT = + -(3.0 / 7.0) * init_growth_factor * init_growth_factor; // 2LPT eq. D8 + + double velocity_displacement_factor[3] = { + (growth_factor - init_growth_factor) / box_size[0] * dens_dim[0], + (growth_factor - init_growth_factor) / box_size[1] * dens_dim[1], + (growth_factor - init_growth_factor) / box_size[2] * dens_dim[2]}; + double velocity_displacement_factor_2LPT[3] = { + (displacement_factor_2LPT - init_displacement_factor_2LPT) / box_size[0] * dens_dim[0], + (displacement_factor_2LPT - init_displacement_factor_2LPT) / box_size[1] * dens_dim[1], + (displacement_factor_2LPT - init_displacement_factor_2LPT) / box_size[2] * dens_dim[2]}; +#pragma omp parallel num_threads(simulation_options_global->N_THREADS) + { + int i, j, k, axis; + double pos[3], curr_dens; + int ipos[3]; + unsigned long long vel_index, dens_index; + double l10_mturn_a, l10_mturn_m; + HaloProperties properties; +#pragma omp for + for (i = 0; i < dens_dim[0]; i++) { + for (j = 0; j < dens_dim[1]; j++) { + for (k = 0; k < dens_dim[2]; k++) { + // Transform position to units of box size + pos[0] = i; + pos[1] = j; + pos[2] = k; + resample_index((int[3]){i, j, k}, dim_ratio_vel, ipos); + wrap_coord(ipos, vel_dim); + vel_index = grid_index_general(ipos[0], ipos[1], ipos[2], vel_dim); + for (axis = 0; axis < 3; axis++) { + pos[axis] += + vel_pointers[axis][vel_index] * velocity_displacement_factor[axis]; + // add 2LPT second order corrections + if (matter_options_global->PERTURB_ALGORITHM == 2) { + pos[axis] -= vel_pointers_2LPT[axis][vel_index] * + velocity_displacement_factor_2LPT[axis]; + } + pos[axis] *= dim_ratio_out; + } + + // CIC interpolation + dens_index = grid_index_general(i, j, k, dens_dim); + curr_dens = dens_pointer[dens_index] * growth_factor; + l10_mturn_a = mturn_a_grid[i]; + l10_mturn_m = mturn_m_grid[i]; + + get_cell_integrals(curr_dens, l10_mturn_a, l10_mturn_m, consts, integral_cond, + &properties); + do_cic_interpolation(boxes->halo_sfr, pos, out_dim, + properties.halo_sfr * prefactor_sfr); + // re-used the fescweighted field for minihalos + do_cic_interpolation(boxes->n_ion, pos, out_dim, + properties.n_ion * prefactor_nion + + properties.fescweighted_sfr * prefactor_nion_mini); + + if (astro_options_global->USE_MINI_HALOS) { + do_cic_interpolation(boxes->halo_stars_mini, pos, out_dim, + properties.stellar_mass_mini * prefactor_stars_mini); + do_cic_interpolation(boxes->halo_sfr_mini, pos, out_dim, + properties.sfr_mini * prefactor_sfr_mini); + } + if (astro_options_global->USE_TS_FLUCT) { + do_cic_interpolation(boxes->halo_xray, pos, out_dim, + properties.halo_xray * prefactor_xray); + } + + // TODO: add an optional flag for fields which aren't used in the radiation + // fields but are useful + // for analysis + // do_cic_interpolation(boxes->halo_mass, pos, out_dim, properties.halo_mass * + // prefactor_mass); do_cic_interpolation(boxes->halo_stars, pos, out_dim, + // properties.stellar_mass * prefactor_stars); + } + } + } + } + // Without stochasticity, these grids are the same to a constant + double prefactor_wsfr = 1 / consts->t_h / consts->t_star; + for (int i = 0; i < HII_TOT_NUM_PIXELS; i++) { + boxes->whalo_sfr[i] = boxes->n_ion[i] * prefactor_wsfr; + } +} diff --git a/src/py21cmfast/src/map_mass.h b/src/py21cmfast/src/map_mass.h new file mode 100644 index 000000000..d95b8f9bc --- /dev/null +++ b/src/py21cmfast/src/map_mass.h @@ -0,0 +1,13 @@ + +#include "HaloBox.h" +#include "OutputStructs.h" +#include "scaling_relations.h" + +void move_grid_masses(double redshift, float *dens_pointer, int dens_dim[3], float *vel_pointers[3], + float *vel_pointers_2LPT[3], int vel_dim[3], double *resampled_box, + int out_dim[3]); + +void move_grid_galprops(double redshift, float *dens_pointer, int dens_dim[3], + float *vel_pointers[3], float *vel_pointers_2LPT[3], int vel_dim[3], + HaloBox *boxes, int out_dim[3], float *mturn_a_grid, float *mturn_m_grid, + ScalingConstants *consts, IntegralCondition *integral_cond); diff --git a/src/py21cmfast/src/photoncons.c b/src/py21cmfast/src/photoncons.c index 9f96a352d..9af217f01 100644 --- a/src/py21cmfast/src/photoncons.c +++ b/src/py21cmfast/src/photoncons.c @@ -111,7 +111,7 @@ int InitialisePhotonCons() { z_arr = calloc(Nmax, sizeof(double)); Q_arr = calloc(Nmax, sizeof(double)); - struct ScalingConstants sc_i, sc_0, sc_1; + ScalingConstants sc_i, sc_0, sc_1; set_scaling_constants(a_end, &sc_i, false); // set the minimum source mass diff --git a/src/py21cmfast/src/scaling_relations.c b/src/py21cmfast/src/scaling_relations.c index 37c452ec6..fc9316937 100644 --- a/src/py21cmfast/src/scaling_relations.c +++ b/src/py21cmfast/src/scaling_relations.c @@ -18,7 +18,7 @@ #include "photoncons.h" #include "thermochem.h" -void print_sc_consts(struct ScalingConstants *c) { +void print_sc_consts(ScalingConstants *c) { LOG_DEBUG("Printing scaling relation constants z = %.3f....", c->redshift); LOG_DEBUG("SHMR: f10 %.2e a %.2e f7 %.2e a_mini %.2e sigma %.2e", c->fstar_10, c->alpha_star, c->fstar_7, c->alpha_star_mini, c->sigma_star); @@ -33,11 +33,11 @@ void print_sc_consts(struct ScalingConstants *c) { return; } -void set_scaling_constants(double redshift, struct ScalingConstants *consts, bool use_photoncons) { +void set_scaling_constants(double redshift, ScalingConstants *consts, bool use_photoncons) { consts->redshift = redshift; // Set on for the fixed grid case since we are missing halos above the cell mass - consts->fix_mean = matter_options_global->FIXED_HALO_GRIDS; + consts->fix_mean = false; // whether to fix *integrated* (not sampled) galaxy properties to the expected mean consts->scaling_median = astro_options_global->HALO_SCALING_RELATIONS_MEDIAN; @@ -103,8 +103,8 @@ void set_scaling_constants(double redshift, struct ScalingConstants *consts, boo } // It's often useful to create a copy of scaling constants without F_ESC -struct ScalingConstants evolve_scaling_constants_sfr(struct ScalingConstants *sc) { - struct ScalingConstants sc_sfrd = *sc; +ScalingConstants evolve_scaling_constants_sfr(ScalingConstants *sc) { + ScalingConstants sc_sfrd = *sc; sc_sfrd.fesc_10 = 1.; sc_sfrd.fesc_7 = 1.; sc_sfrd.alpha_esc = 0.; @@ -115,10 +115,9 @@ struct ScalingConstants evolve_scaling_constants_sfr(struct ScalingConstants *sc } // It's often useful to create a copy of scaling relations at a different z -struct ScalingConstants evolve_scaling_constants_to_redshift(double redshift, - struct ScalingConstants *sc, - bool use_photoncons) { - struct ScalingConstants sc_z = *sc; +ScalingConstants evolve_scaling_constants_to_redshift(double redshift, ScalingConstants *sc, + bool use_photoncons) { + ScalingConstants sc_z = *sc; sc_z.redshift = redshift; sc_z.t_h = t_hubble(redshift); @@ -268,7 +267,7 @@ double get_lx_on_sfr(double sfr, double metallicity, double lx_constant) { } void get_halo_stellarmass(double halo_mass, double mturn_acg, double mturn_mcg, double star_rng, - struct ScalingConstants *consts, double *star_acg, double *star_mcg) { + ScalingConstants *consts, double *star_acg, double *star_mcg) { // low-mass ACG power-law parameters double f_10 = consts->fstar_10; double f_a = consts->alpha_star; @@ -320,7 +319,7 @@ void get_halo_stellarmass(double halo_mass, double mturn_acg, double mturn_mcg, } void get_halo_sfr(double stellar_mass, double stellar_mass_mini, double sfr_rng, - struct ScalingConstants *consts, double *sfr, double *sfr_mini) { + ScalingConstants *consts, double *sfr, double *sfr_mini) { double sfr_mean, sfr_mean_mini; double sfr_sample, sfr_sample_mini; @@ -376,7 +375,7 @@ void get_halo_metallicity(double sfr, double stellar, double redshift, double *z } void get_halo_xray(double sfr, double sfr_mini, double metallicity, double xray_rng, - struct ScalingConstants *consts, double *xray_out) { + ScalingConstants *consts, double *xray_out) { double sigma_xray = consts->sigma_xray; // adjustment to the mean for lognormal scatter diff --git a/src/py21cmfast/src/scaling_relations.h b/src/py21cmfast/src/scaling_relations.h index 17163ee72..afee70521 100644 --- a/src/py21cmfast/src/scaling_relations.h +++ b/src/py21cmfast/src/scaling_relations.h @@ -9,7 +9,7 @@ // These are just the values which come from the InputStruct objects and don't change within the // snapshot using this reduces the use of the global parameter structs and allows fewer exp/log // unit changes -struct ScalingConstants { +typedef struct ScalingConstants { double redshift; bool fix_mean; bool scaling_median; @@ -49,28 +49,27 @@ struct ScalingConstants { double Mlim_Fesc; double Mlim_Fstar_mini; double Mlim_Fesc_mini; -}; +} ScalingConstants; -void set_scaling_constants(double redshift, struct ScalingConstants *consts, bool use_photoncons); +void set_scaling_constants(double redshift, ScalingConstants *consts, bool use_photoncons); double get_lx_on_sfr(double sfr, double metallicity, double lx_constant); void get_halo_stellarmass(double halo_mass, double mturn_acg, double mturn_mcg, double star_rng, - struct ScalingConstants *consts, double *star_acg, double *star_mcg); + ScalingConstants *consts, double *star_acg, double *star_mcg); void get_halo_sfr(double stellar_mass, double stellar_mass_mini, double sfr_rng, - struct ScalingConstants *consts, double *sfr, double *sfr_mini); + ScalingConstants *consts, double *sfr, double *sfr_mini); void get_halo_metallicity(double sfr, double stellar, double redshift, double *z_out); void get_halo_xray(double sfr, double sfr_mini, double metallicity, double xray_rng, - struct ScalingConstants *consts, double *xray_out); + ScalingConstants *consts, double *xray_out); double scaling_PL_limit(double M, double norm, double alpha, double pivot, double limit); double log_scaling_PL_limit(double lnM, double ln_norm, double alpha, double ln_pivot, double ln_limit); double scaling_double_PL(double M, double alpha_lo, double pivot_ratio, double alpha_hi, double pivot_hi); -struct ScalingConstants evolve_scaling_constants_sfr(struct ScalingConstants *sc); -struct ScalingConstants evolve_scaling_constants_to_redshift(double redshift, - struct ScalingConstants *sc, - bool use_photoncons); -void print_sc_consts(struct ScalingConstants *c); +ScalingConstants evolve_scaling_constants_sfr(ScalingConstants *sc); +ScalingConstants evolve_scaling_constants_to_redshift(double redshift, ScalingConstants *sc, + bool use_photoncons); +void print_sc_consts(ScalingConstants *c); #endif diff --git a/src/py21cmfast/wrapper/outputs.py b/src/py21cmfast/wrapper/outputs.py index 96556e937..da19255e6 100644 --- a/src/py21cmfast/wrapper/outputs.py +++ b/src/py21cmfast/wrapper/outputs.py @@ -958,20 +958,21 @@ def get_required_input_arrays(self, input_box: OutputStruct) -> list[str]: "sfr_rng", "xray_rng", ] - elif isinstance(input_box, PerturbedField): - if self.matter_options.FIXED_HALO_GRIDS: - required += ["density"] elif isinstance(input_box, TsBox): if self.astro_options.USE_MINI_HALOS: required += ["J_21_LW"] elif isinstance(input_box, IonizedBox): required += ["ionisation_rate_G12", "z_reion"] elif isinstance(input_box, InitialConditions): - if ( - self.matter_options.HALO_STOCHASTICITY - and self.astro_options.AVG_BELOW_SAMPLER - ): - required += ["lowres_density"] + required += [ + "lowres_density", + "lowres_vx", + "lowres_vy", + "lowres_vz", + "lowres_vx_2LPT", + "lowres_vy_2LPT", + "lowres_vz_2LPT", + ] if self.matter_options.USE_RELATIVE_VELOCITIES: required += ["lowres_vcb"] else: @@ -994,7 +995,6 @@ def compute( allow_already_computed, self.redshift, initial_conditions, - perturbed_field, pt_halos, previous_spin_temp, previous_ionize_box, From d31c5a342960872a0b2aeac03bce79ea76f21135 Mon Sep 17 00:00:00 2001 From: James Davies Date: Wed, 20 Aug 2025 11:45:04 +1000 Subject: [PATCH 135/145] fix the bugs --- src/py21cmfast/drivers/coeval.py | 2 +- src/py21cmfast/drivers/single_field.py | 25 ++----------------- src/py21cmfast/src/HaloBox.c | 11 +++++---- src/py21cmfast/src/map_mass.c | 34 +++++++++++++++++--------- src/py21cmfast/wrapper/outputs.py | 10 +++++--- 5 files changed, 37 insertions(+), 45 deletions(-) diff --git a/src/py21cmfast/drivers/coeval.py b/src/py21cmfast/drivers/coeval.py index eb850e7a9..9b2276567 100644 --- a/src/py21cmfast/drivers/coeval.py +++ b/src/py21cmfast/drivers/coeval.py @@ -741,7 +741,7 @@ def _redshift_loop_generator( this_halobox = sf.compute_halo_grid( inputs=inputs, perturbed_halo_list=this_pthalo, - perturbed_field=this_perturbed_field, + redshift=z, previous_ionize_box=getattr(prev_coeval, "ionized_box", None), previous_spin_temp=getattr(prev_coeval, "ts_box", None), write=write.halobox, diff --git a/src/py21cmfast/drivers/single_field.py b/src/py21cmfast/drivers/single_field.py index 83248dee8..d2c4c0df7 100644 --- a/src/py21cmfast/drivers/single_field.py +++ b/src/py21cmfast/drivers/single_field.py @@ -203,10 +203,10 @@ def perturb_halo_list( @single_field_func def compute_halo_grid( *, + redshift: float, initial_conditions: InitialConditions, inputs: InputParameters | None = None, perturbed_halo_list: PerturbHaloField | None = None, - perturbed_field: PerturbedField | None = None, previous_spin_temp: TsBox | None = None, previous_ionize_box: IonizedBox | None = None, ) -> HaloBox: @@ -243,29 +243,9 @@ def compute_halo_grid( regenerate, write, cache: See docs of :func:`initial_conditions` for more information. """ - if perturbed_halo_list: - redshift = perturbed_halo_list.redshift - elif perturbed_field: - redshift = perturbed_field.redshift - else: - raise ValueError( - "Either perturbed_field or perturbed_halo_list are required (or both)." - ) - box = HaloBox.new(redshift=redshift, inputs=inputs) - if perturbed_field is None: - if ( - inputs.matter_options.FIXED_HALO_GRIDS - or inputs.astro_options.AVG_BELOW_SAMPLER - ): - raise ValueError( - "You must provide the perturbed field if FIXED_HALO_GRIDS is True or AVG_BELOW_SAMPLER is True" - ) - else: - perturbed_field = PerturbedField.dummy() - - elif perturbed_halo_list is None: + if perturbed_halo_list is None: if not inputs.matter_options.FIXED_HALO_GRIDS: raise ValueError( "You must provide the perturbed halo list if FIXED_HALO_GRIDS is False" @@ -302,7 +282,6 @@ def compute_halo_grid( return box.compute( initial_conditions=initial_conditions, pt_halos=perturbed_halo_list, - perturbed_field=perturbed_field, previous_ionize_box=previous_ionize_box, previous_spin_temp=previous_spin_temp, ) diff --git a/src/py21cmfast/src/HaloBox.c b/src/py21cmfast/src/HaloBox.c index 207bd42bd..99c59a2fa 100644 --- a/src/py21cmfast/src/HaloBox.c +++ b/src/py21cmfast/src/HaloBox.c @@ -276,6 +276,7 @@ int set_fixed_grids(double M_min, double M_max, InitialConditions *ini_boxes, fl HII_TOT_NUM_PIXELS; // mass in cell of mean dens IntegralCondition integral_cond; set_integral_constants(&integral_cond, consts->redshift, M_min, M_max, M_cell); + double growthf = dicke(consts->redshift); // find grid limits for tables double min_density = 0.; @@ -293,7 +294,7 @@ int set_fixed_grids(double M_min, double M_max, InitialConditions *ini_boxes, fl #pragma omp for reduction(min : min_density, min_log10_mturn_a, min_log10_mturn_m) \ reduction(max : max_density, max_log10_mturn_a, max_log10_mturn_m) for (i = 0; i < HII_TOT_NUM_PIXELS; i++) { - dens = ini_boxes->lowres_density[i]; + dens = ini_boxes->lowres_density[i] * growthf; if (dens > max_density) max_density = dens; if (dens < min_density) min_density = dens; @@ -364,9 +365,9 @@ int set_fixed_grids(double M_min, double M_max, InitialConditions *ini_boxes, fl if (astro_options_global->USE_MINI_HALOS) { LOG_ULTRA_DEBUG("MINI SM %.2e SF %.2e", grids->halo_stars_mini[HII_R_INDEX(0, 0, 0)], grids->halo_sfr_mini[HII_R_INDEX(0, 0, 0)]); + LOG_ULTRA_DEBUG("Mturn_a %.2e Mturn_m %.2e", mturn_a_grid[HII_R_INDEX(0, 0, 0)], + mturn_m_grid[HII_R_INDEX(0, 0, 0)]); } - LOG_ULTRA_DEBUG("Mturn_a %.2e Mturn_m %.2e", mturn_a_grid[HII_R_INDEX(0, 0, 0)], - mturn_m_grid[HII_R_INDEX(0, 0, 0)]); free_conditional_tables(); if (consts->fix_mean) mean_fix_grids(M_min, M_max, grids, consts); @@ -404,8 +405,8 @@ void halobox_debug_print_avg(HaloBox *halobox, ScalingConstants *consts, double void get_log10_turnovers(InitialConditions *ini_boxes, TsBox *previous_spin_temp, IonizedBox *previous_ionize_box, float *mturn_a_grid, float *mturn_m_grid, ScalingConstants *consts, double averages[2]) { - averages[0] = consts->mturn_a_nofb; - averages[1] = consts->mturn_m_nofb; + averages[0] = log10(consts->mturn_a_nofb); + averages[1] = log10(consts->mturn_m_nofb); if (!astro_options_global->USE_MINI_HALOS) { return; } diff --git a/src/py21cmfast/src/map_mass.c b/src/py21cmfast/src/map_mass.c index 64a17af4f..4262853b3 100644 --- a/src/py21cmfast/src/map_mass.c +++ b/src/py21cmfast/src/map_mass.c @@ -14,6 +14,7 @@ #include "InputParameters.h" #include "cosmology.h" #include "indexing.h" +#include "logger.h" #define do_cic_interpolation(arr, ...) \ _Generic((arr), float *: do_cic_interpolation_float, double *: do_cic_interpolation_double)( \ @@ -167,8 +168,7 @@ void move_grid_masses(double redshift, float *dens_pointer, int dens_dim[3], flo // Function that maps a IC density grid to the perturbed density grid // TODO: This shares a lot of code with move_grid_masses and (future) move_cat_galprops. -// I should move these functions to a MapMass.c file (like the GPU build) which contains all the -// mapping functions and specifies initialisation for the below constants. Since the differences +// I should look into combining elements, however since the differences // are on the innermost loops, any generalisation is likely to slow things down. void move_grid_galprops(double redshift, float *dens_pointer, int dens_dim[3], float *vel_pointers[3], float *vel_pointers_2LPT[3], int vel_dim[3], @@ -212,7 +212,8 @@ void move_grid_galprops(double redshift, float *dens_pointer, int dens_dim[3], double pos[3], curr_dens; int ipos[3]; unsigned long long vel_index, dens_index; - double l10_mturn_a, l10_mturn_m; + double l10_mturn_a = log10(consts->mturn_a_nofb); + double l10_mturn_m = log10(consts->mturn_m_nofb); HaloProperties properties; #pragma omp for for (i = 0; i < dens_dim[0]; i++) { @@ -239,23 +240,29 @@ void move_grid_galprops(double redshift, float *dens_pointer, int dens_dim[3], // CIC interpolation dens_index = grid_index_general(i, j, k, dens_dim); curr_dens = dens_pointer[dens_index] * growth_factor; - l10_mturn_a = mturn_a_grid[i]; - l10_mturn_m = mturn_m_grid[i]; + if (astro_options_global->USE_MINI_HALOS) { + l10_mturn_a = mturn_a_grid[dens_index]; + l10_mturn_m = mturn_m_grid[dens_index]; + } get_cell_integrals(curr_dens, l10_mturn_a, l10_mturn_m, consts, integral_cond, &properties); + + // using the properties struct: + // stellar_mass --> no F_esc integral ACG + // stellar_mass_mini --> no F_esc integral MCG + // n_ion --> F_esc integral ACG + // fescweighted_sfr --> F_esc integral MCG + // halo_xray --> Xray integral do_cic_interpolation(boxes->halo_sfr, pos, out_dim, - properties.halo_sfr * prefactor_sfr); - // re-used the fescweighted field for minihalos + properties.stellar_mass * prefactor_sfr); do_cic_interpolation(boxes->n_ion, pos, out_dim, properties.n_ion * prefactor_nion + properties.fescweighted_sfr * prefactor_nion_mini); if (astro_options_global->USE_MINI_HALOS) { - do_cic_interpolation(boxes->halo_stars_mini, pos, out_dim, - properties.stellar_mass_mini * prefactor_stars_mini); do_cic_interpolation(boxes->halo_sfr_mini, pos, out_dim, - properties.sfr_mini * prefactor_sfr_mini); + properties.stellar_mass_mini * prefactor_sfr_mini); } if (astro_options_global->USE_TS_FLUCT) { do_cic_interpolation(boxes->halo_xray, pos, out_dim, @@ -266,8 +273,11 @@ void move_grid_galprops(double redshift, float *dens_pointer, int dens_dim[3], // fields but are useful // for analysis // do_cic_interpolation(boxes->halo_mass, pos, out_dim, properties.halo_mass * - // prefactor_mass); do_cic_interpolation(boxes->halo_stars, pos, out_dim, - // properties.stellar_mass * prefactor_stars); + // prefactor_mass); + // do_cic_interpolation(boxes->halo_stars, pos, out_dim, + // properties.stellar_mass * prefactor_stars); + // do_cic_interpolation(boxes->halo_stars_mini, pos, out_dim, + // properties.stellar_mass_mini * prefactor_stars_mini); } } } diff --git a/src/py21cmfast/wrapper/outputs.py b/src/py21cmfast/wrapper/outputs.py index da19255e6..a95df9933 100644 --- a/src/py21cmfast/wrapper/outputs.py +++ b/src/py21cmfast/wrapper/outputs.py @@ -969,10 +969,13 @@ def get_required_input_arrays(self, input_box: OutputStruct) -> list[str]: "lowres_vx", "lowres_vy", "lowres_vz", - "lowres_vx_2LPT", - "lowres_vy_2LPT", - "lowres_vz_2LPT", ] + if self.matter_options.PERTURB_ALGORITHM == "2LPT": + required += [ + "lowres_vx_2LPT", + "lowres_vy_2LPT", + "lowres_vz_2LPT", + ] if self.matter_options.USE_RELATIVE_VELOCITIES: required += ["lowres_vcb"] else: @@ -985,7 +988,6 @@ def compute( *, initial_conditions: InitialConditions, pt_halos: PerturbHaloField, - perturbed_field: PerturbedField, previous_spin_temp: TsBox, previous_ionize_box: IonizedBox, allow_already_computed: bool = False, From 0485848dbf80c0720332bbfd3e4a3709da0d8b57 Mon Sep 17 00:00:00 2001 From: James Davies Date: Thu, 21 Aug 2025 12:45:17 +1000 Subject: [PATCH 136/145] add tests --- src/py21cmfast/src/map_mass.c | 6 +- src/py21cmfast/src/scaling_relations.c | 2 +- src/py21cmfast/wrapper/inputs.py | 2 +- tests/test_halo_sampler.py | 111 ------------------------- 4 files changed, 6 insertions(+), 115 deletions(-) diff --git a/src/py21cmfast/src/map_mass.c b/src/py21cmfast/src/map_mass.c index 4262853b3..5517bebba 100644 --- a/src/py21cmfast/src/map_mass.c +++ b/src/py21cmfast/src/map_mass.c @@ -284,7 +284,9 @@ void move_grid_galprops(double redshift, float *dens_pointer, int dens_dim[3], } // Without stochasticity, these grids are the same to a constant double prefactor_wsfr = 1 / consts->t_h / consts->t_star; - for (int i = 0; i < HII_TOT_NUM_PIXELS; i++) { - boxes->whalo_sfr[i] = boxes->n_ion[i] * prefactor_wsfr; + if (astro_options_global->INHOMO_RECO) { + for (int i = 0; i < HII_TOT_NUM_PIXELS; i++) { + boxes->whalo_sfr[i] = boxes->n_ion[i] * prefactor_wsfr; + } } } diff --git a/src/py21cmfast/src/scaling_relations.c b/src/py21cmfast/src/scaling_relations.c index fc9316937..c567a4a85 100644 --- a/src/py21cmfast/src/scaling_relations.c +++ b/src/py21cmfast/src/scaling_relations.c @@ -37,7 +37,7 @@ void set_scaling_constants(double redshift, ScalingConstants *consts, bool use_p consts->redshift = redshift; // Set on for the fixed grid case since we are missing halos above the cell mass - consts->fix_mean = false; + consts->fix_mean = matter_options_global->HMF == 2 || matter_options_global->HMF == 3; // whether to fix *integrated* (not sampled) galaxy properties to the expected mean consts->scaling_median = astro_options_global->HALO_SCALING_RELATIONS_MEDIAN; diff --git a/src/py21cmfast/wrapper/inputs.py b/src/py21cmfast/wrapper/inputs.py index a007a119e..e6b7a59a7 100644 --- a/src/py21cmfast/wrapper/inputs.py +++ b/src/py21cmfast/wrapper/inputs.py @@ -1385,7 +1385,7 @@ def _astro_options_validator(self, att, val): ) elif ( val.INTEGRATION_METHOD_ATOMIC == "GAMMA-APPROX" - and self.matter_options.HMF != 0 + and self.matter_options.HMF != "PS" ): warnings.warn( "The 'GAMMA-APPROX' integration method uses the EPS conditional mass function" diff --git a/tests/test_halo_sampler.py b/tests/test_halo_sampler.py index 961417b19..54c9c611d 100644 --- a/tests/test_halo_sampler.py +++ b/tests/test_halo_sampler.py @@ -4,11 +4,6 @@ import numpy as np import pytest -from py21cmfast import ( - compute_halo_grid, - compute_initial_conditions, - perturb_field, -) from py21cmfast.wrapper import cfuncs as cf from . import test_c_interpolation_tables as cint @@ -231,112 +226,6 @@ def test_halo_prop_sampling(default_input_struct_ts, plt): np.testing.assert_allclose(exp_LX, sim_LX, rtol=1e-4) -# testing that the integrals in HaloBox.c are done correctly by -# using the fixed grids -# TODO: extend test to minihalos w/o feedback -# TODO: maybe let this run with the default ics and perturbed field, -# even though they have different flag options? -def test_fixed_grids(default_input_struct_ts, plt): - inputs = default_input_struct_ts.evolve_input_structs( - USE_HALO_FIELD=True, - FIXED_HALO_GRIDS=True, - USE_UPPER_STELLAR_TURNOVER=False, - ) - - ic = compute_initial_conditions( - inputs=inputs, - ) - perturbed_field = perturb_field(initial_conditions=ic, redshift=10.0, inputs=inputs) - dens = perturbed_field.get("density") - - hbox = compute_halo_grid( - initial_conditions=ic, - inputs=inputs, - perturbed_field=perturbed_field, - ) - - cell_radius = 0.620350491 * ( - inputs.simulation_options.BOX_LEN / inputs.simulation_options.HII_DIM - ) - mt_grid = np.full_like(dens, inputs.astro_params.M_TURN) - - integral_sfrd, _ = cf.evaluate_SFRD_cond( - inputs=inputs, - redshift=perturbed_field.redshift, - radius=cell_radius, - densities=dens, - log10mturns=mt_grid, - ) - integral_sfrd *= 1 + dens - - integral_nion, _ = cf.evaluate_Nion_cond( - inputs=inputs, - redshift=perturbed_field.redshift, - radius=cell_radius, - densities=dens, - l10mturns_acg=mt_grid, - l10mturns_mcg=mt_grid, - ) - integral_nion *= 1 + dens - - integral_xray = cf.evaluate_Xray_cond( - inputs=inputs, - redshift=perturbed_field.redshift, - radius=cell_radius, - densities=perturbed_field.density.value, - log10mturns=mt_grid, - ) - integral_xray *= 1 + dens - - # mean-fixing and prefactor numerics results in 1-to-1 comparisons being more difficult - # for now we just test the relative values - integral_sfrd *= hbox.get("halo_sfr").mean() / integral_sfrd.mean() - integral_nion *= hbox.get("n_ion").mean() / integral_nion.mean() - integral_xray *= hbox.get("halo_xray").mean() / integral_xray.mean() - - if plt == mpl.pyplot: - plot_scatter_comparison( - [integral_sfrd, integral_nion, integral_xray], - [hbox.get("halo_sfr"), hbox.get("n_ion"), hbox.get("halo_xray")], - [dens, dens, dens], - ["SFRD", "Nion", "LX"], - plt=plt, - ) - - # TODO: a 5% tolerance isn't fantastic here since they should be the same to a constant factor. - # this happens near the GL integration transition (<1%) and delta_crit (~4%), examine plots - rtol = 5e-2 - print(f"{hbox.get('halo_sfr').shape} {integral_sfrd.shape}", flush=True) - print_failure_stats( - hbox.get("halo_sfr"), - integral_sfrd, - [dens], - 0.0, - rtol, - "sfr", - ) - print_failure_stats( - hbox.get("n_ion"), - integral_nion, - [dens], - 0.0, - rtol, - "nion", - ) - print_failure_stats( - hbox.get("halo_xray"), - integral_xray, - [dens], - 0.0, - rtol, - "LX", - ) - - np.testing.assert_allclose(hbox.get("halo_sfr"), integral_sfrd, rtol=rtol) - np.testing.assert_allclose(hbox.get("n_ion"), integral_nion, rtol=rtol) - np.testing.assert_allclose(hbox.get("halo_xray"), integral_xray, rtol=rtol) - - # very basic scatter comparison def plot_scatter_comparison( truths, tests, inputs, names, log_vals=True, log_inp=False, plt=None From 6acb370a604b81448493376e3b3d4aa7b96ffc14 Mon Sep 17 00:00:00 2001 From: James Davies Date: Thu, 21 Aug 2025 12:49:20 +1000 Subject: [PATCH 137/145] actually add the test file --- tests/test_perturb.py | 241 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 241 insertions(+) create mode 100644 tests/test_perturb.py diff --git a/tests/test_perturb.py b/tests/test_perturb.py new file mode 100644 index 000000000..ddd6c2654 --- /dev/null +++ b/tests/test_perturb.py @@ -0,0 +1,241 @@ +"""Contains the tests for the Perturbation algorithm (Linear, Zel'dovich, 2LPT). + +Including perturbation of galaxy properties +""" + +import numpy as np +import pytest + +from py21cmfast import ( + InitialConditions, + compute_halo_grid, + perturb_field, +) +from py21cmfast.wrapper import cfuncs as cf + + +class TestPerturb: + """Tests regarding the perturbation algorithms.""" + + @pytest.fixture(scope="class") + def test_pt_z(self): + """Set redshift at which to test the 2LPT.""" + return 8.0 + + @pytest.fixture(scope="class") + def inputs_low(self, default_input_struct_ts): + """Parameters for 2LPT tests.""" + # using 3-1 ratio for testing + return default_input_struct_ts.evolve_input_structs( + DIM=12, + HII_DIM=4, + BOX_LEN=8, + USE_HALO_FIELD=True, + FIXED_HALO_GRIDS=True, + PERTURB_ON_HIGH_RES=False, + R_BUBBLE_MAX=1.0, + ) + + @pytest.fixture(scope="class") + def inputs_zel(self, inputs_low): + """Parameters for Zel'dovich test.""" + return inputs_low.evolve_input_structs( + PERTURB_ALGORITHM="ZELDOVICH", + ) + + @pytest.fixture(scope="class") + def inputs_linear(self, inputs_low): + """Parameters for Linear test.""" + return inputs_low.evolve_input_structs( + PERTURB_ALGORITHM="LINEAR", + ) + + def get_fake_ics(self, inputs, test_pt_z): + """Make an IC instance for the testing. + + These are inconsistent and strange values for real ICS but + very trackable. + """ + ics = InitialConditions.new(inputs=inputs) + d_z = cf.get_growth_factor(inputs=inputs, redshift=test_pt_z) + d_z_i = cf.get_growth_factor( + inputs=inputs, redshift=inputs.simulation_options.INITIAL_REDSHIFT + ) + + res_fac = int(inputs.simulation_options.HIRES_TO_LOWRES_FACTOR) + lo_dim = inputs.simulation_options.HII_DIM + hi_dim = inputs.simulation_options.DIM + fac_1lpt = inputs.simulation_options.cell_size / (d_z - d_z_i) + fac_2lpt = inputs.simulation_options.cell_size / ( + (-3.0 / 7.0) * (d_z**2 - d_z_i**2) + ) + for name, array in ics.arrays.items(): + setattr(ics, name, array.initialize().computed()) + + # setup the velocities + # NOTE: IC velocities are in Mpc + if not inputs.matter_options.PERTURB_ON_HIGH_RES: + fake_v = np.ones_like(ics.get("lowres_vx")) + ics.set("lowres_vx", 0 * fake_v) + ics.set("lowres_vy", fac_1lpt * fake_v) + ics.set("lowres_vz", 0 * fake_v) + if inputs.matter_options.PERTURB_ALGORITHM == "2LPT": + ics.set("lowres_vx_2LPT", 0 * fake_v) + ics.set("lowres_vy_2LPT", 0 * fake_v) + ics.set("lowres_vz_2LPT", fac_2lpt * fake_v) + else: + fake_v = np.ones_like(ics.get("hires_vx")) + ics.set("hires_vx", 0 * fake_v) + ics.set("hires_vy", -fac_1lpt * fake_v) + ics.set("hires_vz", 0 * fake_v) + if inputs.matter_options.PERTURB_ALGORITHM == "2LPT": + ics.set("hires_vx_2LPT", 0 * fake_v) + ics.set("hires_vy_2LPT", 0 * fake_v) + ics.set("hires_vz_2LPT", -fac_2lpt * fake_v) + + # set some densities that can be easily tracked + d_lo = np.zeros_like(ics.get("lowres_density")) + d_lo[0, 0, 0] = 1 + d_lo[lo_dim // 2, lo_dim // 2, lo_dim // 2] = -1 + ics.set("lowres_density", d_lo) + # make similar hires densities + d_hi = np.zeros_like(ics.get("hires_density")) + d_hi[0, 0, 0] = res_fac**3 + d_hi[hi_dim // 2, hi_dim // 2, hi_dim // 2] = -(res_fac**3) + ics.set("hires_density", d_hi) + + return ics + + def test_lowres_perturb(self, inputs_low, test_pt_z): + """Tests low-resolution perturbation.""" + ics = self.get_fake_ics(inputs_low, test_pt_z) + expected_dens = np.roll(ics.get("lowres_density"), (0, 1, -1), (0, 1, 2)) + d_z_i = cf.get_growth_factor( + inputs=inputs_low, redshift=inputs_low.simulation_options.INITIAL_REDSHIFT + ) + expected_dens *= d_z_i + pt = perturb_field( + initial_conditions=ics, + redshift=test_pt_z, + regenerate=True, + write=False, + ) + np.testing.assert_allclose(pt.get("density"), expected_dens, atol=1e-3) + + def test_zel_perturb(self, inputs_zel, test_pt_z): + """Tests Zeldovich perturbation.""" + ics = self.get_fake_ics(inputs_zel, test_pt_z) + expected_dens = np.roll(ics.get("lowres_density"), (0, 1, 0), (0, 1, 2)) + d_z_i = cf.get_growth_factor( + inputs=inputs_zel, redshift=inputs_zel.simulation_options.INITIAL_REDSHIFT + ) + expected_dens *= d_z_i + pt = perturb_field( + initial_conditions=ics, + redshift=test_pt_z, + regenerate=True, + write=False, + ) + np.testing.assert_allclose(pt.get("density"), expected_dens, atol=1e-3) + + def test_linear_perturb(self, inputs_linear, test_pt_z): + """Tests linear perturbation.""" + ics = self.get_fake_ics(inputs_linear, test_pt_z) + expected_dens = ics.get("lowres_density").copy() + d_z = cf.get_growth_factor(inputs=inputs_linear, redshift=test_pt_z) + expected_dens *= d_z + pt = perturb_field( + initial_conditions=ics, + redshift=test_pt_z, + regenerate=True, + write=False, + ) + np.testing.assert_allclose(pt.get("density"), expected_dens, atol=1e-3) + + @pytest.mark.skip( + reason="aliasing in downsampling makes hires 2lpt unit tests difficult" + ) + def test_hires_perturb(self, inputs_hi, test_pt_z): + """Tests the high resolution perturbation.""" + ics = self.get_fake_ics(inputs_hi, test_pt_z) + expected_dens = np.roll(ics.get("lowres_density"), (0, -1, 1), (0, 1, 2)) + d_z_i = cf.get_growth_factor(inputs=inputs_hi, redshift=test_pt_z) + expected_dens *= d_z_i + pt = perturb_field( + initial_conditions=ics, + redshift=test_pt_z, + regenerate=True, + write=False, + ) + np.testing.assert_allclose(pt.get("density"), expected_dens, atol=1e-3) + + # TODO: include minihalo properties + def test_hb_perturb(self, inputs_low, test_pt_z): + """Tests the halo property perturbation.""" + ics = self.get_fake_ics(inputs_low, test_pt_z) + hbox = compute_halo_grid( + redshift=test_pt_z, + initial_conditions=ics, + inputs=inputs_low, + ) + cell_radius = 0.620350491 * ( + inputs_low.simulation_options.BOX_LEN + / inputs_low.simulation_options.HII_DIM + ) + d_z = cf.get_growth_factor( + inputs=inputs_low, + redshift=test_pt_z, + ) + dens = np.roll(ics.get("lowres_density"), (0, 1, -1), (0, 1, 2)) * d_z + mt_grid = np.full_like(dens, inputs_low.astro_params.M_TURN) + + prefac_sfr = ( + inputs_low.cosmo_params.cosmo.critical_density(0).to("Msun Mpc-3").value + * inputs_low.astro_params.cdict["F_STAR10"] + * inputs_low.cosmo_params.OMb + * inputs_low.cosmo_params.cosmo.H(test_pt_z).to("s-1").value + / inputs_low.astro_params.t_STAR + ) + prefac_nion = ( + inputs_low.cosmo_params.cosmo.critical_density(0).to("Msun Mpc-3").value + * inputs_low.astro_params.cdict["F_STAR10"] + * inputs_low.cosmo_params.OMb + * inputs_low.astro_params.cdict["F_ESC10"] + * inputs_low.astro_params.cdict["POP2_ION"] + ) + prefac_xray = ( + inputs_low.cosmo_params.cosmo.critical_density(0).to("Msun Mpc-3").value + * inputs_low.cosmo_params.OMm + ) + integral_sfrd, _ = cf.evaluate_SFRD_cond( + inputs=inputs_low, + redshift=test_pt_z, + radius=cell_radius, + densities=dens, + log10mturns=mt_grid, + ) + integral_sfrd *= prefac_sfr + + integral_nion, _ = cf.evaluate_Nion_cond( + inputs=inputs_low, + redshift=test_pt_z, + radius=cell_radius, + densities=dens, + l10mturns_acg=mt_grid, + l10mturns_mcg=mt_grid, + ) + integral_nion *= prefac_nion + + integral_xray = cf.evaluate_Xray_cond( + inputs=inputs_low, + redshift=test_pt_z, + radius=cell_radius, + densities=dens, + log10mturns=mt_grid, + ) + integral_xray *= prefac_xray + + rtol = 1e-2 + np.testing.assert_allclose(hbox.get("halo_sfr"), integral_sfrd, rtol=rtol) + np.testing.assert_allclose(hbox.get("n_ion"), integral_nion, rtol=rtol) + np.testing.assert_allclose(hbox.get("halo_xray"), integral_xray, rtol=rtol) From 5f7df6bd6b6c1c62678cc255268f9d03bb583e8f Mon Sep 17 00:00:00 2001 From: James Davies Date: Thu, 21 Aug 2025 21:30:04 +1000 Subject: [PATCH 138/145] add flag for halobox outputs not used in calculation --- src/py21cmfast/_cfg.py | 1 + src/py21cmfast/src/HaloBox.c | 111 +++++++++++++--------- src/py21cmfast/src/IonisationBox.c | 4 +- src/py21cmfast/src/_inputparams_wrapper.h | 1 + src/py21cmfast/src/filtering.c | 3 +- src/py21cmfast/src/interp_tables.c | 5 +- src/py21cmfast/src/interp_tables.h | 1 - src/py21cmfast/src/map_mass.c | 20 ++-- src/py21cmfast/wrapper/outputs.py | 17 ++-- tests/test_integration_features.py | 1 - tests/test_perturb.py | 106 +++++++++------------ 11 files changed, 139 insertions(+), 131 deletions(-) diff --git a/src/py21cmfast/_cfg.py b/src/py21cmfast/_cfg.py index 69cd925ef..302b8e323 100644 --- a/src/py21cmfast/_cfg.py +++ b/src/py21cmfast/_cfg.py @@ -25,6 +25,7 @@ class Config(dict): "ignore_R_BUBBLE_MAX_error": False, "external_table_path": DATA_PATH, "HALO_CATALOG_MEM_FACTOR": 1.2, + "EXTRA_HALOBOX_FIELDS": False, "safe_read": True, } _defaults["wisdoms_path"] = Path(_defaults["direc"]) / "wisdoms" diff --git a/src/py21cmfast/src/HaloBox.c b/src/py21cmfast/src/HaloBox.c index 99c59a2fa..b8f6586bb 100644 --- a/src/py21cmfast/src/HaloBox.c +++ b/src/py21cmfast/src/HaloBox.c @@ -152,27 +152,29 @@ int get_uhmf_averages(double M_min, double M_max, double M_turn_a, double M_turn return 0; } HaloProperties get_halobox_averages(HaloBox *grids) { - double mean_count = 0., mean_mass = 0., mean_stars = 0., mean_stars_mini = 0., mean_sfr = 0., - mean_sfr_mini = 0.; + int mean_count = 0; + double mean_mass = 0., mean_stars = 0., mean_stars_mini = 0., mean_sfr = 0., mean_sfr_mini = 0.; double mean_n_ion = 0., mean_xray = 0., mean_wsfr = 0.; -// TODO: optional flags for count/hm/sm #pragma omp parallel for reduction(+ : mean_count, mean_mass, mean_stars, mean_stars_mini, \ mean_sfr, mean_sfr_mini) for (int i = 0; i < HII_TOT_NUM_PIXELS; i++) { - mean_count += grids->count[i]; - mean_mass += grids->halo_mass[i]; - mean_stars += grids->halo_stars[i]; mean_sfr += grids->halo_sfr[i]; mean_n_ion += grids->n_ion[i]; if (astro_options_global->USE_TS_FLUCT) { mean_xray += grids->halo_xray[i]; } if (astro_options_global->USE_MINI_HALOS) { - mean_stars_mini += grids->halo_stars_mini[i]; mean_sfr_mini += grids->halo_sfr_mini[i]; } if (astro_options_global->INHOMO_RECO) mean_wsfr += grids->whalo_sfr[i]; + + if (config_settings.EXTRA_HALOBOX_FIELDS) { + mean_count += grids->count[i]; + mean_mass += grids->halo_mass[i]; + mean_stars += grids->halo_stars[i]; + if (astro_options_global->USE_MINI_HALOS) mean_stars_mini += grids->halo_stars_mini[i]; + } } HaloProperties averages = { @@ -204,13 +206,9 @@ void mean_fix_grids(double M_min, double M_max, HaloBox *grids, ScalingConstants unsigned long long int idx; #pragma omp parallel for num_threads(simulation_options_global->N_THREADS) private(idx) for (idx = 0; idx < HII_TOT_NUM_PIXELS; idx++) { - grids->halo_mass[idx] *= averages_global.halo_mass / averages_hbox.halo_mass; - grids->halo_stars[idx] *= averages_global.stellar_mass / averages_hbox.stellar_mass; grids->halo_sfr[idx] *= averages_global.halo_sfr / averages_hbox.halo_sfr; grids->n_ion[idx] *= averages_global.n_ion / averages_hbox.n_ion; if (astro_options_global->USE_MINI_HALOS) { - grids->halo_stars_mini[idx] *= - averages_global.stellar_mass_mini / averages_hbox.stellar_mass_mini; grids->halo_sfr_mini[idx] *= averages_global.sfr_mini / averages_hbox.sfr_mini; } if (astro_options_global->USE_TS_FLUCT) { @@ -220,6 +218,15 @@ void mean_fix_grids(double M_min, double M_max, HaloBox *grids, ScalingConstants grids->whalo_sfr[idx] *= averages_global.fescweighted_sfr / averages_hbox.fescweighted_sfr; } + + if (config_settings.EXTRA_HALOBOX_FIELDS) { + grids->halo_mass[idx] *= averages_global.halo_mass / averages_hbox.halo_mass; + grids->halo_stars[idx] *= averages_global.stellar_mass / averages_hbox.stellar_mass; + if (astro_options_global->USE_MINI_HALOS) { + grids->halo_stars_mini[idx] *= + averages_global.stellar_mass_mini / averages_hbox.stellar_mass_mini; + } + } } } @@ -233,6 +240,16 @@ void get_cell_integrals(double dens, double l10_mturn_a, double l10_mturn_m, double M_cell = int_consts->M_cell; double sigma_cell = int_consts->sigma_cell; + // set all fields to zero + memset(properties, 0, sizeof(HaloProperties)); + + // using the properties struct: + // stellar_mass --> no F_esc integral ACG + // stellar_mass_mini --> no F_esc integral MCG + // n_ion --> F_esc integral ACG + // fescweighted_sfr --> F_esc integral MCG + // halo_xray --> Xray integral + // halo_mass --> total mass properties->n_ion = EvaluateNion_Conditional(dens, l10_mturn_a, growth_z, M_min, M_max, M_cell, sigma_cell, consts, false); properties->stellar_mass = @@ -246,25 +263,18 @@ void get_cell_integrals(double dens, double l10_mturn_a, double l10_mturn_m, // re-using field properties->fescweighted_sfr = EvaluateNion_Conditional_MINI( dens, l10_mturn_m, growth_z, M_min, M_max, M_cell, sigma_cell, consts, false); - } else { - properties->stellar_mass_mini = 0; - properties->fescweighted_sfr = 0; } if (astro_options_global->USE_TS_FLUCT) { properties->halo_xray = EvaluateXray_Conditional(dens, l10_mturn_m, consts->redshift, growth_z, M_min, M_max, M_cell, sigma_cell, consts); - } else { - properties->halo_xray = 0; } - // TODO: add an optional flag for fields which aren't used in the radiation fields but are - // useful - // for analysis - // properties->count = EvaluateNhalo(dens, growth_z, lnMmin, lnMmax, M_cell, sigma_cell, dens); - // properties->halo_mass = EvaluateMcoll(dens, growth_z, lnMmin, lnMmax, M_cell, sigma_cell, - // dens); + if (config_settings.EXTRA_HALOBOX_FIELDS) { + properties->halo_mass = + EvaluateMcoll(dens, growth_z, log(M_min), log(M_max), M_cell, sigma_cell, dens); + } } // Fixed halo grids, where each property is set as the integral of the CMF on the EULERIAN cell @@ -352,10 +362,8 @@ int set_fixed_grids(double M_min, double M_max, InitialConditions *ini_boxes, fl vel_pointers_2LPT, grid_dim, grids, grid_dim, mturn_a_grid, mturn_m_grid, consts, &integral_cond); - LOG_ULTRA_DEBUG("Cell 0 Totals: HM: %.2e SM: %.2e SF: %.2e, NI: %.2e ct : %d", - grids->halo_mass[HII_R_INDEX(0, 0, 0)], grids->halo_stars[HII_R_INDEX(0, 0, 0)], - grids->halo_sfr[HII_R_INDEX(0, 0, 0)], grids->n_ion[HII_R_INDEX(0, 0, 0)], - grids->count[HII_R_INDEX(0, 0, 0)]); + LOG_ULTRA_DEBUG("Cell 0 Totals: SF: %.2e, NI: %.2e", grids->halo_sfr[HII_R_INDEX(0, 0, 0)], + grids->n_ion[HII_R_INDEX(0, 0, 0)]); if (astro_options_global->INHOMO_RECO) { LOG_ULTRA_DEBUG("FESC * SF %.2e", grids->whalo_sfr[HII_R_INDEX(0, 0, 0)]); } @@ -537,20 +545,12 @@ void sum_halos_onto_grid(InitialConditions *ini_boxes, PerturbHaloField *halos, #endif // update the grids -#pragma omp atomic update - grids->halo_mass[i_cell] += hmass; -#pragma omp atomic update - grids->halo_stars[i_cell] += out_props.stellar_mass; #pragma omp atomic update grids->n_ion[i_cell] += out_props.n_ion; #pragma omp atomic update grids->halo_sfr[i_cell] += out_props.halo_sfr; -#pragma omp atomic update - grids->count[i_cell] += 1; if (astro_options_global->USE_MINI_HALOS) { -#pragma omp atomic update - grids->halo_stars_mini[i_cell] += out_props.stellar_mass_mini; #pragma omp atomic update grids->halo_sfr_mini[i_cell] += out_props.sfr_mini; } @@ -564,14 +564,25 @@ void sum_halos_onto_grid(InitialConditions *ini_boxes, PerturbHaloField *halos, #pragma omp atomic update grids->halo_xray[i_cell] += out_props.halo_xray; } + + if (config_settings.EXTRA_HALOBOX_FIELDS) { +#pragma omp atomic update + grids->halo_mass[i_cell] += hmass; +#pragma omp atomic update + grids->halo_stars[i_cell] += out_props.stellar_mass; +#pragma omp atomic update + grids->count[i_cell] += 1; + if (astro_options_global->USE_MINI_HALOS) { +#pragma omp atomic update + grids->halo_stars_mini[i_cell] += out_props.stellar_mass_mini; + } + } } #pragma omp for for (i_cell = 0; i_cell < HII_TOT_NUM_PIXELS; i_cell++) { - grids->halo_mass[i_cell] /= cell_volume; - grids->halo_sfr[i_cell] /= cell_volume; - grids->halo_stars[i_cell] /= cell_volume; grids->n_ion[i_cell] /= cell_volume; + grids->halo_sfr[i_cell] /= cell_volume; if (astro_options_global->USE_TS_FLUCT) { grids->halo_xray[i_cell] /= cell_volume; } @@ -580,15 +591,19 @@ void sum_halos_onto_grid(InitialConditions *ini_boxes, PerturbHaloField *halos, } if (astro_options_global->USE_MINI_HALOS) { grids->halo_sfr_mini[i_cell] /= cell_volume; - grids->halo_stars_mini[i_cell] /= cell_volume; + } + if (config_settings.EXTRA_HALOBOX_FIELDS) { + grids->halo_mass[i_cell] /= cell_volume; + grids->halo_stars[i_cell] /= cell_volume; + if (astro_options_global->USE_MINI_HALOS) { + grids->halo_stars_mini[i_cell] /= cell_volume; + } } } } total_n_halos = halos->n_halos - n_halos_cut; - LOG_SUPER_DEBUG("Cell 0 Totals: HM: %.2e SM: %.2e SF: %.2e NI: %.2e ct : %d", - grids->halo_mass[HII_R_INDEX(0, 0, 0)], grids->halo_stars[HII_R_INDEX(0, 0, 0)], - grids->halo_sfr[HII_R_INDEX(0, 0, 0)], grids->n_ion[HII_R_INDEX(0, 0, 0)], - grids->count[HII_R_INDEX(0, 0, 0)]); + LOG_SUPER_DEBUG("Cell 0 Totals: SF: %.2e NI: %.2e", grids->halo_sfr[HII_R_INDEX(0, 0, 0)], + grids->n_ion[HII_R_INDEX(0, 0, 0)]); if (astro_options_global->INHOMO_RECO) { LOG_SUPER_DEBUG("FESC * SF %.2e", grids->whalo_sfr[HII_R_INDEX(0, 0, 0)]); } @@ -618,21 +633,25 @@ int ComputeHaloBox(double redshift, InitialConditions *ini_boxes, PerturbHaloFie unsigned long long int idx; #pragma omp parallel for num_threads(simulation_options_global->N_THREADS) private(idx) for (idx = 0; idx < HII_TOT_NUM_PIXELS; idx++) { - grids->halo_mass[idx] = 0.0; grids->n_ion[idx] = 0.0; grids->halo_sfr[idx] = 0.0; - grids->halo_stars[idx] = 0.0; - grids->count[idx] = 0; if (astro_options_global->USE_TS_FLUCT) { grids->halo_xray[idx] = 0.0; } if (astro_options_global->USE_MINI_HALOS) { - grids->halo_stars_mini[idx] = 0.0; grids->halo_sfr_mini[idx] = 0.0; } if (astro_options_global->INHOMO_RECO) { grids->whalo_sfr[idx] = 0.0; } + if (config_settings.EXTRA_HALOBOX_FIELDS) { + grids->halo_mass[idx] = 0.0; + grids->halo_stars[idx] = 0.0; + grids->count[idx] = 0; + if (astro_options_global->USE_MINI_HALOS) { + grids->halo_stars_mini[idx] = 0.0; + } + } } ScalingConstants hbox_consts; diff --git a/src/py21cmfast/src/IonisationBox.c b/src/py21cmfast/src/IonisationBox.c index c441e4f86..b1ecfd314 100644 --- a/src/py21cmfast/src/IonisationBox.c +++ b/src/py21cmfast/src/IonisationBox.c @@ -206,9 +206,7 @@ void set_ionbox_constants(double redshift, double prev_redshift, struct IonBoxCo pow(1 + redshift, 2) * CMperMPC * SIGMA_HI * astro_params_global->ALPHA_UVB / (astro_params_global->ALPHA_UVB + 2.75) * N_b0 * consts->ion_eff_factor / 1.0e-12; if (matter_options_global->USE_HALO_FIELD) - consts->gamma_prefactor /= - RHOcrit * cosmo_params_global->OMb; // TODO: double-check these unit differences, - // HaloBox.halo_wsfr vs Nion_General units + consts->gamma_prefactor /= RHOcrit * cosmo_params_global->OMb; else consts->gamma_prefactor = consts->gamma_prefactor / (sc.t_h * sc.t_star); diff --git a/src/py21cmfast/src/_inputparams_wrapper.h b/src/py21cmfast/src/_inputparams_wrapper.h index 0927b3c2a..fdf0ede90 100644 --- a/src/py21cmfast/src/_inputparams_wrapper.h +++ b/src/py21cmfast/src/_inputparams_wrapper.h @@ -157,6 +157,7 @@ typedef struct AstroOptions { typedef struct ConfigSettings { double HALO_CATALOG_MEM_FACTOR; + bool EXTRA_HALOBOX_FIELDS; char *external_table_path; char *wisdoms_path; diff --git a/src/py21cmfast/src/filtering.c b/src/py21cmfast/src/filtering.c index 408e94ad4..53196b45f 100644 --- a/src/py21cmfast/src/filtering.c +++ b/src/py21cmfast/src/filtering.c @@ -167,8 +167,7 @@ void filter_box(fftwf_complex *box, int RES, int filter_type, float R, float R_p grid_index = RES == 1 ? HII_C_INDEX(n_x, n_y, n_z) : C_INDEX(n_x, n_y, n_z); // TODO: it would be nice to combine these into the filter_function call, *but* - // since - // each can take different arguments more thought is needed + // since each can take different arguments more thought is needed if (filter_type == 0) { // real space top-hat kR = sqrt(k_mag_sq) * R; box[grid_index] *= real_tophat_filter(kR); diff --git a/src/py21cmfast/src/interp_tables.c b/src/py21cmfast/src/interp_tables.c index 16a068dce..5dccc6ed7 100644 --- a/src/py21cmfast/src/interp_tables.c +++ b/src/py21cmfast/src/interp_tables.c @@ -43,9 +43,8 @@ static RGTable2D SFRD_z_table_MINI = {.allocated = false}; static RGTable2D Nion_z_table_MINI = {.allocated = false}; static RGTable2D Xray_z_table_2D = {.allocated = false}; // TODO: SFRD tables assume no reionisation feedback, this is self-inconsistent, but probably okay -// given -// it's used (mostly) in the SpinTemperature, which deals with neutral regions -// Will overestimate integral component of SFRD lightcones used in observation +// given it's used (mostly) in the SpinTemperature, which deals with neutral regions +// Will overestimate integral component of SFRD lightcones used in observation static RGTable1D_f SFRD_conditional_table = {.allocated = false}; static RGTable1D_f Nion_conditional_table1D = {.allocated = false}; static RGTable2D_f Nion_conditional_table2D = {.allocated = false}; diff --git a/src/py21cmfast/src/interp_tables.h b/src/py21cmfast/src/interp_tables.h index 0b3cb1bb7..1b15bf88b 100644 --- a/src/py21cmfast/src/interp_tables.h +++ b/src/py21cmfast/src/interp_tables.h @@ -6,7 +6,6 @@ // Functions within interp_tables.c need the parameter structures, but we don't want to pass them // all down the chain, so we broadcast them -// TODO: in future it would be better to use a context struct. See `HaloBox.c` void initialise_SFRD_spline(int Nbin, float zmin, float zmax, ScalingConstants *sc); double EvaluateSFRD(double redshift, ScalingConstants *sc); diff --git a/src/py21cmfast/src/map_mass.c b/src/py21cmfast/src/map_mass.c index 5517bebba..ef0d62018 100644 --- a/src/py21cmfast/src/map_mass.c +++ b/src/py21cmfast/src/map_mass.c @@ -269,15 +269,17 @@ void move_grid_galprops(double redshift, float *dens_pointer, int dens_dim[3], properties.halo_xray * prefactor_xray); } - // TODO: add an optional flag for fields which aren't used in the radiation - // fields but are useful - // for analysis - // do_cic_interpolation(boxes->halo_mass, pos, out_dim, properties.halo_mass * - // prefactor_mass); - // do_cic_interpolation(boxes->halo_stars, pos, out_dim, - // properties.stellar_mass * prefactor_stars); - // do_cic_interpolation(boxes->halo_stars_mini, pos, out_dim, - // properties.stellar_mass_mini * prefactor_stars_mini); + if (config_settings.EXTRA_HALOBOX_FIELDS) { + do_cic_interpolation(boxes->halo_mass, pos, out_dim, + properties.halo_mass * prefactor_mass); + do_cic_interpolation(boxes->halo_stars, pos, out_dim, + properties.stellar_mass * prefactor_stars); + if (astro_options_global->USE_MINI_HALOS) { + do_cic_interpolation( + boxes->halo_stars_mini, pos, out_dim, + properties.stellar_mass_mini * prefactor_stars_mini); + } + } } } } diff --git a/src/py21cmfast/wrapper/outputs.py b/src/py21cmfast/wrapper/outputs.py index a95df9933..4185f29e0 100644 --- a/src/py21cmfast/wrapper/outputs.py +++ b/src/py21cmfast/wrapper/outputs.py @@ -28,6 +28,7 @@ from astropy.cosmology import z_at_value from bidict import bidict +from .._cfg import config from ..c_21cmfast import lib from .arrays import Array from .exceptions import _process_exitcode @@ -889,10 +890,9 @@ class HaloBox(OutputStructZ): _meta = False _c_compute_function = lib.ComputeHaloBox - halo_mass = _arrayfield() - halo_stars = _arrayfield() + halo_mass = _arrayfield(optional=True) + halo_stars = _arrayfield(optional=True) halo_stars_mini = _arrayfield(optional=True) - count = _arrayfield() halo_sfr = _arrayfield() halo_sfr_mini = _arrayfield(optional=True) halo_xray = _arrayfield(optional=True) @@ -922,15 +922,11 @@ def new(cls, inputs: InputParameters, redshift: float, **kw) -> Self: shape = (dim, dim, int(inputs.simulation_options.NON_CUBIC_FACTOR * dim)) out = { - "halo_mass": Array(shape, dtype=np.float32), - "halo_stars": Array(shape, dtype=np.float32), - "count": Array(shape, dtype=np.int32), "halo_sfr": Array(shape, dtype=np.float32), "n_ion": Array(shape, dtype=np.float32), } if inputs.astro_options.USE_MINI_HALOS: - out["halo_stars_mini"] = Array(shape, dtype=np.float32) out["halo_sfr_mini"] = Array(shape, dtype=np.float32) if inputs.astro_options.INHOMO_RECO: @@ -939,6 +935,13 @@ def new(cls, inputs: InputParameters, redshift: float, **kw) -> Self: if inputs.astro_options.USE_TS_FLUCT: out["halo_xray"] = Array(shape, dtype=np.float32) + if config["EXTRA_HALOBOX_FIELDS"]: + out["count"] = Array(shape, dtype=np.int32) + out["halo_mass"] = Array(shape, dtype=np.float32) + out["halo_stars"] = Array(shape, dtype=np.float32) + if inputs.astro_options.USE_MINI_HALOS: + out["halo_stars_mini"] = Array(shape, dtype=np.float32) + return cls( inputs=inputs, redshift=redshift, diff --git a/tests/test_integration_features.py b/tests/test_integration_features.py index 880d927da..b7351f5a9 100644 --- a/tests/test_integration_features.py +++ b/tests/test_integration_features.py @@ -79,7 +79,6 @@ def test_power_spectra_coeval(name, module_direc, plt): name=key, ) - any_failed = True # TODO:remove this testing line if plt == mpl.pyplot and any_failed: make_coeval_comparison_plot(true_k, test_k, true_powers, test_powers, plt) diff --git a/tests/test_perturb.py b/tests/test_perturb.py index ddd6c2654..3871ce3ac 100644 --- a/tests/test_perturb.py +++ b/tests/test_perturb.py @@ -106,43 +106,24 @@ def get_fake_ics(self, inputs, test_pt_z): return ics - def test_lowres_perturb(self, inputs_low, test_pt_z): + @pytest.mark.parametrize("inputs", ["inputs_low", "inputs_zel"]) + def test_lowres_perturb(self, inputs, test_pt_z, request): """Tests low-resolution perturbation.""" - ics = self.get_fake_ics(inputs_low, test_pt_z) - expected_dens = np.roll(ics.get("lowres_density"), (0, 1, -1), (0, 1, 2)) - d_z_i = cf.get_growth_factor( - inputs=inputs_low, redshift=inputs_low.simulation_options.INITIAL_REDSHIFT - ) - expected_dens *= d_z_i - pt = perturb_field( - initial_conditions=ics, - redshift=test_pt_z, - regenerate=True, - write=False, - ) - np.testing.assert_allclose(pt.get("density"), expected_dens, atol=1e-3) - - def test_zel_perturb(self, inputs_zel, test_pt_z): - """Tests Zeldovich perturbation.""" - ics = self.get_fake_ics(inputs_zel, test_pt_z) - expected_dens = np.roll(ics.get("lowres_density"), (0, 1, 0), (0, 1, 2)) - d_z_i = cf.get_growth_factor( - inputs=inputs_zel, redshift=inputs_zel.simulation_options.INITIAL_REDSHIFT - ) - expected_dens *= d_z_i - pt = perturb_field( - initial_conditions=ics, - redshift=test_pt_z, - regenerate=True, - write=False, - ) - np.testing.assert_allclose(pt.get("density"), expected_dens, atol=1e-3) - - def test_linear_perturb(self, inputs_linear, test_pt_z): - """Tests linear perturbation.""" - ics = self.get_fake_ics(inputs_linear, test_pt_z) - expected_dens = ics.get("lowres_density").copy() - d_z = cf.get_growth_factor(inputs=inputs_linear, redshift=test_pt_z) + inputs = request.getfixturevalue(inputs) + ics = self.get_fake_ics(inputs, test_pt_z) + z_d = ( + test_pt_z + if inputs.matter_options.PERTURB_ALGORITHM == "LINEAR" + else inputs.simulation_options.INITIAL_REDSHIFT + ) + roll_var = { + "LINEAR": (0, 0, 0), + "ZELDOVICH": (0, 1, 0), + "2LPT": (0, 1, -1), + }[inputs.matter_options.PERTURB_ALGORITHM] + d_z = cf.get_growth_factor(inputs=inputs, redshift=z_d) + + expected_dens = np.roll(ics.get("lowres_density"), roll_var, (0, 1, 2)) expected_dens *= d_z pt = perturb_field( initial_conditions=ics, @@ -170,45 +151,52 @@ def test_hires_perturb(self, inputs_hi, test_pt_z): np.testing.assert_allclose(pt.get("density"), expected_dens, atol=1e-3) # TODO: include minihalo properties - def test_hb_perturb(self, inputs_low, test_pt_z): + # TODO: include linear (for some reason) + @pytest.mark.parametrize("inputs", ["inputs_low", "inputs_zel"]) + def test_hb_perturb(self, inputs, test_pt_z, request): """Tests the halo property perturbation.""" - ics = self.get_fake_ics(inputs_low, test_pt_z) + inputs = request.getfixturevalue(inputs) + ics = self.get_fake_ics(inputs, test_pt_z) hbox = compute_halo_grid( redshift=test_pt_z, initial_conditions=ics, - inputs=inputs_low, + inputs=inputs, ) cell_radius = 0.620350491 * ( - inputs_low.simulation_options.BOX_LEN - / inputs_low.simulation_options.HII_DIM + inputs.simulation_options.BOX_LEN / inputs.simulation_options.HII_DIM ) d_z = cf.get_growth_factor( - inputs=inputs_low, + inputs=inputs, redshift=test_pt_z, ) - dens = np.roll(ics.get("lowres_density"), (0, 1, -1), (0, 1, 2)) * d_z - mt_grid = np.full_like(dens, inputs_low.astro_params.M_TURN) + roll_var = { + "LINEAR": (0, 0, 0), + "ZELDOVICH": (0, 1, 0), + "2LPT": (0, 1, -1), + }[inputs.matter_options.PERTURB_ALGORITHM] + dens = np.roll(ics.get("lowres_density"), roll_var, (0, 1, 2)) * d_z + mt_grid = np.full_like(dens, inputs.astro_params.M_TURN) prefac_sfr = ( - inputs_low.cosmo_params.cosmo.critical_density(0).to("Msun Mpc-3").value - * inputs_low.astro_params.cdict["F_STAR10"] - * inputs_low.cosmo_params.OMb - * inputs_low.cosmo_params.cosmo.H(test_pt_z).to("s-1").value - / inputs_low.astro_params.t_STAR + inputs.cosmo_params.cosmo.critical_density(0).to("Msun Mpc-3").value + * inputs.astro_params.cdict["F_STAR10"] + * inputs.cosmo_params.OMb + * inputs.cosmo_params.cosmo.H(test_pt_z).to("s-1").value + / inputs.astro_params.t_STAR ) prefac_nion = ( - inputs_low.cosmo_params.cosmo.critical_density(0).to("Msun Mpc-3").value - * inputs_low.astro_params.cdict["F_STAR10"] - * inputs_low.cosmo_params.OMb - * inputs_low.astro_params.cdict["F_ESC10"] - * inputs_low.astro_params.cdict["POP2_ION"] + inputs.cosmo_params.cosmo.critical_density(0).to("Msun Mpc-3").value + * inputs.astro_params.cdict["F_STAR10"] + * inputs.cosmo_params.OMb + * inputs.astro_params.cdict["F_ESC10"] + * inputs.astro_params.cdict["POP2_ION"] ) prefac_xray = ( - inputs_low.cosmo_params.cosmo.critical_density(0).to("Msun Mpc-3").value - * inputs_low.cosmo_params.OMm + inputs.cosmo_params.cosmo.critical_density(0).to("Msun Mpc-3").value + * inputs.cosmo_params.OMm ) integral_sfrd, _ = cf.evaluate_SFRD_cond( - inputs=inputs_low, + inputs=inputs, redshift=test_pt_z, radius=cell_radius, densities=dens, @@ -217,7 +205,7 @@ def test_hb_perturb(self, inputs_low, test_pt_z): integral_sfrd *= prefac_sfr integral_nion, _ = cf.evaluate_Nion_cond( - inputs=inputs_low, + inputs=inputs, redshift=test_pt_z, radius=cell_radius, densities=dens, @@ -227,7 +215,7 @@ def test_hb_perturb(self, inputs_low, test_pt_z): integral_nion *= prefac_nion integral_xray = cf.evaluate_Xray_cond( - inputs=inputs_low, + inputs=inputs, redshift=test_pt_z, radius=cell_radius, densities=dens, From 43593b4a29787fd3535081dbec0f2c5f86ac32ad Mon Sep 17 00:00:00 2001 From: James Davies Date: Sun, 24 Aug 2025 12:46:17 +1000 Subject: [PATCH 139/145] fix bugs from removing integral count field --- src/py21cmfast/src/HaloBox.c | 2 +- src/py21cmfast/wrapper/outputs.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/py21cmfast/src/HaloBox.c b/src/py21cmfast/src/HaloBox.c index b8f6586bb..6ef8c7c0b 100644 --- a/src/py21cmfast/src/HaloBox.c +++ b/src/py21cmfast/src/HaloBox.c @@ -178,7 +178,7 @@ HaloProperties get_halobox_averages(HaloBox *grids) { } HaloProperties averages = { - .count = mean_count / HII_TOT_NUM_PIXELS, + .count = (double)mean_count / HII_TOT_NUM_PIXELS, .halo_mass = mean_mass / HII_TOT_NUM_PIXELS, .stellar_mass = mean_stars / HII_TOT_NUM_PIXELS, .stellar_mass_mini = mean_stars_mini / HII_TOT_NUM_PIXELS, diff --git a/src/py21cmfast/wrapper/outputs.py b/src/py21cmfast/wrapper/outputs.py index 4185f29e0..1cb7f0a62 100644 --- a/src/py21cmfast/wrapper/outputs.py +++ b/src/py21cmfast/wrapper/outputs.py @@ -890,6 +890,7 @@ class HaloBox(OutputStructZ): _meta = False _c_compute_function = lib.ComputeHaloBox + count = _arrayfield(optional=True) halo_mass = _arrayfield(optional=True) halo_stars = _arrayfield(optional=True) halo_stars_mini = _arrayfield(optional=True) From b992c20874445024f878716153d797f861dbc899 Mon Sep 17 00:00:00 2001 From: James Davies Date: Sun, 24 Aug 2025 22:41:23 +1000 Subject: [PATCH 140/145] add tests for optional fields --- tests/test_output_structs.py | 156 +++++++++++++++++++++++++++++++++++ 1 file changed, 156 insertions(+) diff --git a/tests/test_output_structs.py b/tests/test_output_structs.py index 812db02ad..1c897a743 100644 --- a/tests/test_output_structs.py +++ b/tests/test_output_structs.py @@ -9,8 +9,10 @@ from py21cmfast import ( InitialConditions, # An example of an output struct InputParameters, + config, ) from py21cmfast.wrapper import outputs as ox +from py21cmfast.wrapper.arrays import Array @pytest.fixture @@ -76,3 +78,157 @@ def test_all_fields_exist(struct: ox.OutputStruct): for name in cstruct.primitive_fields: assert name in this + + +# NOTE: These do not test every field, but does test every conditional in the +# OutputStruct constructors, a better approach would probably be to have a +# comprehensive list of {"field_name": {"flag": value}} conditions for the fields +# in the output module which is checked in the constructors +def test_optional_field_ic(default_input_struct_lc: InputParameters): + """Ensure that the correct InitialConditions fields are set based on the parameters.""" + ic = ox.InitialConditions.new(inputs=default_input_struct_lc) + assert isinstance(ic.lowres_vx, Array) + assert isinstance(ic.lowres_vx_2LPT, Array) + assert ic.hires_vx is None + assert isinstance(ic.hires_vx_2LPT, Array) # Python requires it, check the C + assert ic.lowres_vcb is None + + ic = ox.InitialConditions.new( + inputs=default_input_struct_lc.evolve_input_structs( + PERTURB_ALGORITHM="ZELDOVICH" + ) + ) + assert isinstance(ic.lowres_vy, Array) + assert ic.lowres_vy_2LPT is None + assert ic.hires_vy is None + assert ic.hires_vy_2LPT is None + + ic = ox.InitialConditions.new( + inputs=default_input_struct_lc.evolve_input_structs(PERTURB_ON_HIGH_RES=True) + ) + assert ic.lowres_vz is None + assert ic.lowres_vz_2LPT is None + assert isinstance(ic.hires_vz, Array) + assert isinstance(ic.hires_vz_2LPT, Array) + + ic = ox.InitialConditions.new( + inputs=default_input_struct_lc.evolve_input_structs( + USE_RELATIVE_VELOCITIES=True, + POWER_SPECTRUM="CLASS", + ) + ) + assert isinstance(ic.lowres_vx, Array) + assert isinstance(ic.lowres_vx_2LPT, Array) + assert ic.hires_vx is None + assert isinstance(ic.hires_vx_2LPT, Array) + assert isinstance(ic.lowres_vcb, Array) + + +def test_optional_field_perturb(default_input_struct_lc: InputParameters): + """Ensure that the correct PerturbedField fields are set based on the parameters.""" + pt = ox.PerturbedField.new(redshift=0.0, inputs=default_input_struct_lc) + assert isinstance(pt.density, Array) + assert isinstance(pt.velocity_z, Array) + assert isinstance(pt.velocity_x, Array) + assert isinstance(pt.velocity_y, Array) + + pt = ox.PerturbedField.new( + redshift=0.0, + inputs=default_input_struct_lc.evolve_input_structs(KEEP_3D_VELOCITIES=False), + ) + assert isinstance(pt.density, Array) + assert isinstance(pt.velocity_z, Array) + assert pt.velocity_x is None + assert pt.velocity_y is None + + +def test_optional_field_halobox(default_input_struct_lc: InputParameters): + """Ensure that the correct HaloBox fields are set based on the parameters.""" + hb = ox.HaloBox.new(redshift=0.0, inputs=default_input_struct_lc) + assert hb.halo_mass is None + assert isinstance(hb.halo_sfr, Array) + assert isinstance(hb.n_ion, Array) + assert hb.halo_sfr_mini is None + assert hb.halo_xray is None + assert hb.whalo_sfr is None + + with config.use(EXTRA_HALOBOX_FIELDS=True): + hb = ox.HaloBox.new(redshift=0.0, inputs=default_input_struct_lc) + assert isinstance(hb.halo_mass, Array) + + inputs = default_input_struct_lc.evolve_input_structs(INHOMO_RECO=True) + hb = ox.HaloBox.new(redshift=0.0, inputs=inputs) + assert isinstance(hb.whalo_sfr, Array) + + inputs = inputs.evolve_input_structs(USE_TS_FLUCT=True) + hb = ox.HaloBox.new(redshift=0.0, inputs=inputs) + assert isinstance(hb.halo_xray, Array) + + inputs = inputs.evolve_input_structs(USE_MINI_HALOS=True) + hb = ox.HaloBox.new(redshift=0.0, inputs=inputs) + assert isinstance(hb.halo_sfr_mini, Array) + + +def test_optional_field_xrs(default_input_struct_lc: InputParameters): + """Ensure that the correct XraySourceBox fields are set based on the parameters.""" + xr = ox.XraySourceBox.new(redshift=0.0, inputs=default_input_struct_lc) + assert isinstance(xr.filtered_sfr, Array) + assert isinstance(xr.filtered_xray, Array) + assert xr.filtered_sfr_mini is None + + inputs = default_input_struct_lc.evolve_input_structs( + USE_TS_FLUCT=True, + USE_MINI_HALOS=True, + INHOMO_RECO=True, + ) + xr = ox.XraySourceBox.new(redshift=0.0, inputs=inputs) + assert isinstance(xr.filtered_sfr_mini, Array) + + +def test_optional_field_ts(default_input_struct_lc: InputParameters): + """Ensure that the correct TsBox fields are set based on the parameters.""" + ts = ox.TsBox.new(redshift=0.0, inputs=default_input_struct_lc) + assert isinstance(ts.spin_temperature, Array) + assert isinstance(ts.xray_ionised_fraction, Array) + assert isinstance(ts.kinetic_temp_neutral, Array) + assert ts.J_21_LW is None + + inputs = default_input_struct_lc.evolve_input_structs( + USE_TS_FLUCT=True, + INHOMO_RECO=True, + USE_MINI_HALOS=True, + ) + ts = ox.TsBox.new(redshift=0.0, inputs=inputs) + assert isinstance(ts.J_21_LW, Array) + + +def test_optional_field_ion(default_input_struct_lc: InputParameters): + """Ensure that the correct IonizedBox fields are set based on the parameters.""" + ion = ox.IonizedBox.new(redshift=0.0, inputs=default_input_struct_lc) + assert isinstance(ion.neutral_fraction, Array) + assert ion.unnormalised_nion_mini is None + assert ion.cumulative_recombinations is None + + inputs = default_input_struct_lc.evolve_input_structs( + INHOMO_RECO=True, + ) + ion = ox.IonizedBox.new(redshift=0.0, inputs=inputs) + assert isinstance(ion.cumulative_recombinations, Array) + + inputs = inputs.evolve_input_structs( + USE_TS_FLUCT=True, + USE_MINI_HALOS=True, + ) + ion = ox.IonizedBox.new(redshift=0.0, inputs=inputs) + assert isinstance(ion.unnormalised_nion_mini, Array) + + +def test_optional_field_bt(default_input_struct_lc: InputParameters): + """Ensure that the correct BrightnessTemp fields are set based on the parameters.""" + bt = ox.BrightnessTemp.new(redshift=0.0, inputs=default_input_struct_lc) + assert isinstance(bt.brightness_temp, Array) + assert bt.tau_21 is None + + inputs = default_input_struct_lc.evolve_input_structs(USE_TS_FLUCT=True) + bt = ox.BrightnessTemp.new(redshift=0.0, inputs=inputs) + assert isinstance(bt.tau_21, Array) From f6c640bb687e569b6ead67d73691c7e87076d16b Mon Sep 17 00:00:00 2001 From: James Davies Date: Mon, 25 Aug 2025 10:54:52 +1000 Subject: [PATCH 141/145] cover last case --- tests/test_output_structs.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/test_output_structs.py b/tests/test_output_structs.py index 1c897a743..bdd6726cb 100644 --- a/tests/test_output_structs.py +++ b/tests/test_output_structs.py @@ -156,17 +156,17 @@ def test_optional_field_halobox(default_input_struct_lc: InputParameters): hb = ox.HaloBox.new(redshift=0.0, inputs=default_input_struct_lc) assert isinstance(hb.halo_mass, Array) - inputs = default_input_struct_lc.evolve_input_structs(INHOMO_RECO=True) - hb = ox.HaloBox.new(redshift=0.0, inputs=inputs) - assert isinstance(hb.whalo_sfr, Array) + inputs = default_input_struct_lc.evolve_input_structs(INHOMO_RECO=True) + hb = ox.HaloBox.new(redshift=0.0, inputs=inputs) + assert isinstance(hb.whalo_sfr, Array) - inputs = inputs.evolve_input_structs(USE_TS_FLUCT=True) - hb = ox.HaloBox.new(redshift=0.0, inputs=inputs) - assert isinstance(hb.halo_xray, Array) + inputs = inputs.evolve_input_structs(USE_TS_FLUCT=True) + hb = ox.HaloBox.new(redshift=0.0, inputs=inputs) + assert isinstance(hb.halo_xray, Array) - inputs = inputs.evolve_input_structs(USE_MINI_HALOS=True) - hb = ox.HaloBox.new(redshift=0.0, inputs=inputs) - assert isinstance(hb.halo_sfr_mini, Array) + inputs = inputs.evolve_input_structs(USE_MINI_HALOS=True) + hb = ox.HaloBox.new(redshift=0.0, inputs=inputs) + assert isinstance(hb.halo_sfr_mini, Array) def test_optional_field_xrs(default_input_struct_lc: InputParameters): From 5cb18988f2bdc0b98dd723b669faac172754cba6 Mon Sep 17 00:00:00 2001 From: James Davies Date: Mon, 25 Aug 2025 12:31:55 +1000 Subject: [PATCH 142/145] make cosmo a cached property --- src/py21cmfast/wrapper/inputs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/py21cmfast/wrapper/inputs.py b/src/py21cmfast/wrapper/inputs.py index e6b7a59a7..0039653b0 100644 --- a/src/py21cmfast/wrapper/inputs.py +++ b/src/py21cmfast/wrapper/inputs.py @@ -349,7 +349,7 @@ def OMl(self): """Omega lambda, dark energy density.""" return 1 - self.OMm - @property + @cached_property def cosmo(self): """An astropy cosmology object for this cosmology.""" return self._base_cosmo.clone( From 78786258ece83f0dbc4964b2f3d2baaa7dc56a73 Mon Sep 17 00:00:00 2001 From: James Davies Date: Mon, 25 Aug 2025 13:07:39 +1000 Subject: [PATCH 143/145] regenerate integration test data --- tests/test_data/power_spectra_dexm.h5 | Bin 14296 -> 14296 bytes .../power_spectra_fixed_halogrids.h5 | Bin 14392 -> 14392 bytes tests/test_data/power_spectra_sampler.h5 | Bin 14392 -> 14392 bytes tests/test_data/power_spectra_sampler_ir.h5 | Bin 17328 -> 17328 bytes tests/test_data/power_spectra_sampler_mini.h5 | Bin 22096 -> 22096 bytes .../power_spectra_sampler_noncubic.h5 | Bin 14472 -> 14472 bytes tests/test_data/power_spectra_sampler_ts.h5 | Bin 20216 -> 20216 bytes .../test_data/power_spectra_sampler_ts_ir.h5 | Bin 21072 -> 21072 bytes .../power_spectra_sampler_ts_ir_onethread.h5 | Bin 21136 -> 21136 bytes 9 files changed, 0 insertions(+), 0 deletions(-) diff --git a/tests/test_data/power_spectra_dexm.h5 b/tests/test_data/power_spectra_dexm.h5 index 1c830080bd33dd297ff9b883c6e2d57830152568..b87c7e6db630564d4de8274120a940dc6ba3a045 100644 GIT binary patch delta 872 zcmV-u1DE{RZ`g0JC=P#2n4nJtZ2>_SjE-c(t4%+_BOa$V>Ons}Wes5cLN!0K0>eU+ z|1CcS?fkkkUL-##gdAK+;2=L}6ezSO@EJeB_56fU^%g&)DD|xqf)YR3=Z06C!V5p5 z_g(9Sp$b1;j?YKh{RKaoiO>-ey8=H;sC%_1bN@cIbA$fQi25)-=sN5>ca!x#9N1bA zbLR3s6zWl-oD|4O$&h10XWp!2Ih3j2%p0T7`;t-TH5kOi4Q z**Ht4xXnQnYkH(7rdZO z8OU5foW$GdL*P>{K;bemZYQuxK9Mq-2*=vD!eDFMF2n7EbumSD*HZULE1)jB=tVFJgic@YxF*1 zopOxDI`BT}So%oHo9I6O>9NB!!{$CA;S(4<2IW3Q7Wp_w5aL)qs${(!tQp@v_kX2H zr+V8ydV;|kP1ERL#TJgd<@BVtRteihI@OC)sEL5a&gT&$6zT=mF4 zV$m`i`hdnhk#OdV|J=hql0e?tpohYfZz$P+2XACFll#d*EK_upo=F8kI<---;C8gqDjyWhR!*<++4FSfl#57EgUZ{Te%^!$x*l zKul?*{kHmWK-k0I$N@fRKw*e9?eF?wK<#F@S-n?TKyK6>$yAL}Ko-}@v@P~ZK+w?J yQWrKvK+T50nRuc-K;BvFVb~WoK&42~pzumCK$e!u(?a$qK(iq#?*g+AEy)V02ckCs delta 872 zcmV-u1DE{RZ`g0JC=P!Ed^-jKZ2>_5f6yqn#Y#W1;Af1z4M0ElJJw@!LNh;cioX0lR&m94<^%gaL=zhD6$bh~kPD-#WLx-?*AB6N!EGE1skWyB{9D67h>bUQ>QT2qnVJ0A_xP(onA)5McN3vN3Oe?+zt@F84jd1OkcfRiCEvl=YO{1efh*b<2vlr9(W?mrSBqpoc|(kB zJiS~%wp~?*W7<_RY3JKx_lk05Z-wK!$|ImV(SNK!Kuq2tgGo zKn2`quqslMHW1x0K&t=mKL7v#;6(rLKL7v#0DJ!LKL7v#0R8;$KL7v#01wJTKmY&$ zfM91sKmY&$K(j;dNsO4Bb<)-e7jJo4KhqI)O z_xa&IAy0(l^jF|M@5WTfL4({r3OIo3`w`ndk%3ZJ8xGn&$}J44RI1lL>T$O$xrx?3 zlx!+_$O_gzYi*+(fWFi|IESD;w;I%wZz$P+h$^I`ko(C&NnYR3&q@VB&vq;$v~BD_ zm1ogfc!u0S-oX*N+2YYa)`GZBFDb%6PdtNW@>RV+RJDRy5&f}1hQLry!(*#JU=C6Q zXcD17W31M*YuTGX1YJH`z1fpMiJaKf;FpU)*wEHFvblsnDw<{GwoQFNGzFMT4MlcY zKs8=i;%VG@)E~zz5Dwhgj4%HGRN~d8bb~7^D+EeH3KL32qbH3;M{PCQpF|jdG!du3% zB60D$-)qt5_RZu-uYZM24tIv_Qqi%{8HO+4-w9KM1trTYsMvi`w%KrkEmlUpzSn-5 zh((dj$pr>h7(RGtp~b`kThBTlbnmPg*3rIcZU1XS?1bN{HJNP&nC5vAwU3C$nmaeS z?iZ@K{7HYg7p|*#s7M=^G#p z$omDKIRSK9CDYtjW5A9nv}Go0(Q*G%l?V}DRbNA`YnzZe)}6@IXl!)Ka2~p}h3OOT zUW3d&DNLqqNkjhoCX$d%IXK+$&@V!qHh7>#&)u|bY%wG$-fO1bs6Ig~C`#O!@;gI>1Y%BrZZplhbuwPPt` zaCKaS%RN^GbbMbWsSdgg>v|`Bl->=n4E)H*mLOql}RZy%AT0d&^rrP^p{v# zJLN-EVUiZrp9PE9*|(UybVvv^^meREg0~M}>2+Ym!LwC1@_0cc)W?0da_(LT%sQ#p zX#30`2H+0fHI{h82mIyvBh79wjTe6?WDJE~<^6(*K^kA?!5g5GoCq;9rvH zjpSb=wr6g}rq5j!J*3)Vucdf;a^6bJV%qHJ%SQz4c>d7~laKINoa7zr!wVg(w&FwP zejgmB-KuEGaaIHF<9`30)EV&SsqT!po&b8oPx-R0mw?Lf zY(8noyG^1V(RMSTzO zb7`^*Aa;XC;>4kC=Q_ci<}+R=@$Eo!NhYut+y#;M``j6mw?VJ&R<71*ISBkm<0QJE zW|iaYa)G?dd<^}xqo^qt#e`Ch7E+w{c@l>{PhrD(iupLLkY}7Xcv!Af+BxF+G4Eu^=nTfb7g~y)Vn3W_GZv;o<5#JTajZ zYLczTj}H{V5T6$rj)Njd>fY#?S&$2hJm3B`(!_;bmyU)S`=vus#F}q^^GJcM2Qui= zxkTvqBq&h5Jq8|gk5s;zV?(c$RSVaGUA}>|Qr8y0pZ35>@2n)dS#Ovp?>9De-T@P^ s12^I?ZiiAMSN$#vXIQL%|G}j9CP+D$sI_f&9h7ELl-kmSjqL`11Mt-cIsgCw delta 1271 zcmXBPdobrBRfkNec3oC- zP{ItcEYaU_h!+npK6+Gb*&hSJT_?Bg9f*KHL+s|dDJ{X=iRsI?YLGaUg zmw|UH1EK0^ZAHPetuNV11nf%q*)gb|RZ6Ap~K`n)m za79^68yBR$iS-`l910#?TxT6s<_5j2!(V9<*s%CQ#JW_T6Vxo~^m)s6fV}*q^y0r* zVEjYu8~rm|Fj2V=4JXbJTUum-Qmcj?B_ZClf_@94{pK(Mon=PpLfY>%_m|Q}Y}!NY z_tSDlA1#kq#vPq`Bu zRt**jRqm@g%5jC8-SYY&4W5b^w+bIB#1+HZ^SkHd;-I$G_+4!#N<(@}AaftCU3Q~i zcw`r{zy70#<+lT0hpb$u&e?=ffm^7v_hK+Z5}f;p@hwi3#RR$c1)+BDPMPU*f1J{? zvs1W2%s3u-)1t$Lhn<&SImzz2;xp}g+p>8MO7*-xq2{c96c`LfbLb}r3O|w@haZRx z)0@k2dVhaxJh3@s77>4|K4Z4pit3rL%(5M_7dIFfnyFR6AbHG=NF6~?AB=vho4pL` zT=eNUB>?ZtQu*x#JSZ%R9Fu+I!qfQ~IZIhfV4-4WOInLNL}gi?cGR+=oY_|zea8ig zwz^ebG;ju8jVM$F4)F6;ef>n29n6;Gu--MYARyWJT!M;8!5bf2hkyAtV82PIi+eI3 zN|TBQ_t{!P<#COCbekF2h_V)Tte6XrjUF`d?-|3(ur5z!-sf;B&r~Y6HUt~r^-iza z3?R&_fSMaR6N*%`lPs^yfcuh-=d!0imU8rAN7|EY;LG<`)1JMqxI%mK!mh@&Csa1r z^=@dQ3vHN=vqYbnt0`iF-B0lM(0yrdh7PpZLd%FDU$b4WX zQPJN+Y?Wzr`{oB%!bJ)Tvc(8C)Ii@t)cv1z1C8%6BhSwU3vD+>*QFlv;J|b1!vz#Lsm*H^a zN38;E+&w5g&h+AqCgXMC2J;%vt?0kG&T(RVJqFx5rTQ`=3Y{%< zOsPtWv5MT}!LcAjMqWP%0&w2=4f(+e5r!^FE1k6B<7=wQgtbw&1dFY`5BvMLVD$mF M(a)-rJlc)^2WjU*od5s; diff --git a/tests/test_data/power_spectra_sampler.h5 b/tests/test_data/power_spectra_sampler.h5 index 328320c5fdb19482bea68c20b8f7f37ad1ead956..a1ea2c15e4f3d9e598167f6a1f7aa824c7a8e60d 100644 GIT binary patch delta 880 zcmV-$1CRW;aJX==hz@@oTysqjZ2>`###vI{Jy1W2>Of(v=R!Xl>PS>79XLOr&>8xc z&@?}Lh{hb`l`B7F=ayz^gDF49oH%!QUL`+k8N9`MlOjLv+$97W%^*J^iuhMi792mC z-vjyW&l^7vyJ{De3mHG?bL39<1sFf&-jDxe$re9Zr%E~Y1{N?srae0)OY{^!;%{zf zXeAUs*dbPjPOlR`*dxy@S1=PlShoeJ3_cP+N*WQdSMCv$HxIFYZ3m;nzP6`9h0{^6 z1uEr0I%sxp_dnJ^o8Pd~8Ck8;c-wmNaW{N5owFQ|# z;YC(0mP3y~3&uCyvetz_G*AuqT9AD}2Dc4qd$4ps6qylFW?pPS+gkbJkCkbOJxyzgZzJ{{B8H3ZY?-j{ZJwgYhVCUi?0o%Rv$mbo*F7Zj4x= z80Pst9c4ea^t$;z^~v8oK`Qw^_T~E0(*5^7aK~Ge_FMQqIFt22#L4zP*G96fF-`V9 zrIxVM&35%Z3muqh*fRA#30ZN-5z_QNI;VZPGV1e_&nVe{6U&tt>EF>ncsHb!ye7gx62tAcM^?Q+NpU%Dt@*J)YOx9Y zLt(2x64dj75)q+51RTb#1lpTGW*8Emi`tVwJv^8hnw5({Rks-7=(vPH73iBp=}mn= z&nNNzc|vwsK(v!yP!0QVKts5$Y_2(IKs1outnc|@K#7>et8ZLcKwA?BS-gr;KtL~} z0>S)BKzcV)Gi5qNK+hr!akHsCKu26$4MiR{Ku7yUwyj+-K>s%X2i6iOK(m1=?*g-L GEy)TJY=`~; delta 880 zcmV-$1CRW;aJX==hz@^Cgv}@oZ2>{fDb=`M1WrFSt#JH~b3s1=wfTF5Z#F-*gu}2j zhciE>rwyOk+$ulSg_12ZrYJvzZ7TZhk|aO)kw`_N7$QHg1L7|PnIJzkVDTKzKO8^5 zG4Y?dcN{-pqu+b}%^5!vVC(4T+!#OqP^g!Vlovn1FQ#<-Hy1EJ8kJ5=-9Q#UeaQIp zR(=&fLhpt)tqm1F-@?$fCD;@{n4kdWqSq5YML~YC5^NKbHxIFYzQ7@vzqY4A(T;S^ z>Mi9!95hs1h(y*v$=q60QC-78!j20_ZcDd7?pXDtWcsT>Vqd`2I&rJ-#n^>Fa8nO7tB`#_s>l41DX?@v@2pX{pNKtix=a%#OwKsZE7z){FTKp(zFX z=@~y$OoZa-sun+M{nQ%~BN0Dg8iS`gYz;qn#QNvd7z{rTnYj|wdAw7P;O^8P;7NkjvMHT^#2OdfpZZv0q2<1EQH z(CGR;cVU{K^Thf-SmNE*#ccXMzQMT~037)~>Y$In3ikOvK>)ot^d4e|Tv;g-$QF3;apzHQN^j4^gp^ElCb~WaFNpkg*&nVe{$5P!uiu=hygDjGr;Ytd@wsGz6Mv=N~|x^#sYK(m1=?*g-L GEy)Vp-JK8s diff --git a/tests/test_data/power_spectra_sampler_ir.h5 b/tests/test_data/power_spectra_sampler_ir.h5 index 3c6952e7eabe2346c0ba38b745a8a35067eef396..a70e356f77ed7612e4fd8e26b4314d9dbdaf59f0 100644 GIT binary patch delta 1250 zcmV<81ReXZhXJsM0kGH)e;izMO%QDXL662+QruR4N@fKcLVV z`j*f%KYNJA9ORWNKV;{YW@&>dKgXOncX(bUKWiDh#d?z>KknQm1RBjCKOu_vS5X!m zKbqeI`R&geKM%WV7nKVcKj?GhPWJ^EKjq$!|76J)KUt?rIrat?Fh8a}J0(l>6hGo` zZfIyF6hGJ@R)!$1Ui_2(x>w?I!cA7U@|t3V|AeBj>=p+IJeDi^f{ znLy!1RxOr8k3b8?H{G(~~W=Vffl^D#hjZ3!}t zf+;{nzo$Cklp&J=EgzA?}Bc^7#&@je3xAQmQV z?LO|0MK&^K>OP2Rr6)r<d1{!mSoqZwr8d((K`kKvvtH6Z%FLuh z3iQ!Fmg0I#5d6_TnH+Akg{RU!7A{;oeRR=2Ic680`7zNxFwPrVb^y^nz1K@A`+Cqm z`+~vmG(pfldId6#sdUgjBCw4kKF`lS*J*2=?;+4Wj~r+Ig+|XlzM{5jx(d(FKK)jsieB<4&OrPF!wjj?!$4VCmFZH@w?M<*wuKcee>g=zRxsdbyJ6`t6b=KlqVIMWPrYKd=MhF9ew&KQ&YTgeL$+m{E;cJbU^Q{QLtcHY(N}&9QW^&WI!ro zm9mn*TtJ$F?41SVQ!qe6uxxT_y-GkhL`uL>$U;CLzJNzb`#C_MYfwGr=P^LnjIgva zZYe+@hc_oKh#`{!EgzAC9ul?LI0<<)=ph@IH|=wVz<9 z>psHDGmPa?={|PpDZ#6+%o?m%z9M?X~qT{jfn%6$uOSHJT=hZ%P*&tF;PS!qT2FA<}xzs)dj*C+C@6|r+ zlIlopEz~~o(UTM4q0~Mdk&WmlSbx+$tJ6`CsbJGSBd=j910U2rNqznvURl#V#@q6r zrmxaIq@(93S_;!XfZQK)S>V$?ix^E07kAP=#02{!NhH!fA(9EY82QmY1*3Q?%#YDN znwkl>K4Q^6Q|=bX8l2HSciyPvrjgO0e?)-tt17y+ClAA6oAELH$i4HXb_H(@Ib_xdo;;A?m&ptGDKVF?LhrR zgv!Tk;z0Gt4}|`Z(LiTD9CGbk%s?^yoQ8fW&Ontj7sC+K!$9fI+o8PBw?L{Zs^%R; zwm?B)>x*Mkwm`5Z2%voGtUzVaIuXI)ra+{trGG%aJP9aQM4~`C-BybKH<&=x zX@6ImqLn}m*2|&|8>6S%N?|zT^s` zcz{4I^XykU%z8iqcXEbq-gQ96$^RoAKXO3k@B7|;ac@9ckJ2_8c5FZgLL>X0A{`ykbMf zS4B!dT_Nj$EH_0!#Vm8Du4qF*n=F4ucNITCECnOWHMBWE3J6L_0UkC$_Rki?aCb96 zfinCbq82bfaV0d$BcLfj!U~Arkhdm4Ek*9f!U^aiKpox=^#cNvUO9CdpsN4xKL7v# zpho}iKL7v#;Bo%%KL7v#APAG~IerviVrN4@0002sO9n$g0002suBJkh0WBZ1a49MR MlMpUzvok$?1`dEiLjV8( diff --git a/tests/test_data/power_spectra_sampler_mini.h5 b/tests/test_data/power_spectra_sampler_mini.h5 index 88cd31c8e52e1cc97e70179faf4ff0358c53d383..645a0d0b1ddbb286f756a0ac388900ccb61207ce 100644 GIT binary patch delta 3780 zcmXZeXCT$@`v-6YL$e3-CqmF zbLUa{a{8h1H&bY^Qf!WK<11qI?$ENA8%4FBf~k)-eMHLqnpfkT-l0#)>uBHb1=R8F zR!zz98C+=^uSx=AT)Qm8TIr7jZpW|gC-aj9mo9Z`?Dk#66w(`+q9-RYo|Tc8YStr| z=al_y!$3dA{2+{thKtmTv3Q;D;(GiHQ+6)-sAx&S*n?;~+jyI>F7d_!hl&SS9mF4T z-d~QztFU|=Pbk8gChlij#pGkdQyLz1Y&n>Radn-bZ#uRjI69SbHx(=Bp4zusnuKLH zJH`-Ju49LisVi#VM?m3puB5U32UOxh;+HXLLQ%sEH=9U7S?F4p=rhW6EF!mZ3)&Q2 zKrHo2uhK%i&>~}?e`d-FR9E|`|1QN6eWcc9I@M%?#i4pw%j>MF_mql%x@lYJRvo=pD1hjKnb!)0! z;6q=ZO5w!<-yoJ{cjHh%!veB3q;Wy!+#j=ukSQN(~X_N3R2 z20nP&W5zxa%MO=2T9{K4>0z}f(;$_sN(C9>^Xo%*mk@jLK9NiBr;&Jzk$=S9FX+jV zTY0zqAli7?N_3ofgTn2(R&V-rq5bxP?{V>VWT5E039Vs-8Rc{6NA|=$2W3BWZ6PMB;Jzo)EL#+R z@ZJ^G*c&S35S);L1r^w+6=sRRN9J@H9<@Y{y`r}x9W zDb4ZnSZR=&S&3~#qEIsZ^=P2tZb&^YQL8w~QDZAzOGnDn+n7OAR8$nze~JqHzu*6C z_oAu)q+{CiSc zxY%p5=y-eM z{ILNrG?h#HGS&fe#IQyBm1dAX%Va1rRRxk*ycmN<5o9qK(=IP%LUc91{obTha9P%{ zX{sW_r?SqqmmODO@dDGSM4SZ2Bz6??>0Knj%nwU{QgR@;YSfk8usjPF^4edNAM}KX zL+a|exVD|gcn}%azs$*vY%j^Ct+~=Q>m%< zAplvQ{|IMJX@I6}?ah=?H3)8O`B-Lf0EST8_ce+#STKz^4DV9{Ue$NY)7k{M@VKBb zNKgTy=HkP$Y6wvD{Z7*rqAaXtH<(G@mw`>|oB(qkX;=vGygQmG1<#)j{@21J38Bg{ z>UQoDP-*6VYK=522ETqZ+dlmz1|w7gS0#<}kpu;nO!nNyj|DH2c$;1Dr3#KKwM}OD zbkjswMvF2ox=s>%G9-y#GIA81e#wbHpXt80)06=hV{V*JNZZ6BN`%LH85b}fFH`!N z(?7BC_4dStM-!M}wPGvv%Q4JDAt_5leGt1wj5tPWEPju*@DENoUw?}!o{vmi=YNff zczOF0^t-T~H(H8c)^}hA8l7RK7VVg@^p~!tCJNS_#B3((^$;^0)?ckf}kf$=(q-e2=d#y-;v4h!_hV>D75j{|MV zn3h>tz@5rytnF*&#(CB#Z1hRg?hBp~Sp5=SKBOOxN$rg1wWeIg7^EKUTh3dOuGUwsuQHr137a3Sj#}lsAln5ETP_v@r0ZoHV{JST;~pe zC7X4VcPoOx6FL2+B(VXe1xvRM?VkJd7J5>oDMIXn0 z@+_f4==e!5j>?Prh$?kG;@B_|9Sr@DT2ZZuZZ2)4rF*NO#52b3L~$h)5>r$^l`M~p zo*hpc|FajR?T&hV+gJkGN~*|lr|&@wvQGW$q&>*saoF94?|dk|dL=`%nFq;#o~OT< z#)+hb8F+#Q*%3*CdnEj{Dk};V`P6Y(hXtK}GN5#CfdREO^-kK}U_`T451bPQXb_o= zuA#S{8eP+@V_jw0#6gSEn(6x=TrZ7i^VEC|FOt#KI;6dVQ~0N+HOH55hLXjusCU2d z=A!>yCH$VpKcCE?KB7C1J9qGKJ<*uO3;Y6c$kRP`x5dru7cOB z>gWB$8&^~r?FxS2dj!2*y0q>Qr)IBHbsd%=?li-qO3#0Vo4FS`AFRHC&bpsvdP%i# z&Ftr96MZS%5y>Y^%%sCZ@6%#Q{@0qwA}l}?yoxCq{siu{+v&O-mdXMDDc zznz~}bA^K6y%!RP9bt*HIm;x{24;D4BW5ivp*!l^EA9K{u#3K0(2v;&3i65^ z7kKp`t@5)~H7yZX?#>&=QgncyC)|UNs1DtW_0i7*R3MvX_J#2b0j8Xy`S61T7+t5_ z&+C+d>vKCkAFz`GiByqLKGG3!IQ{UFI`vLbNNcoxRXZvKDrD88|F!Q1Hz!$v;dUM% zG04zPKj49a&Z0JpWpvtVm3k9Cy!(zzs{X%z+EW|Wuzyt$0**~#X=9xS2BBdgq5 zwF}7Z99Ilo@jQAYQ}>>KZ5GuCOdE$r|25e#i6+J3OwAkcVI;+EaMYfAD@W|H9QeIF zNkI(ezOK!eQ;=`{r}pQ|(daEpeZ%WN(I~0yfuFv_Wpu)ezM;3-4^?zz+mR_=C|7Zs z;qj^mx+pWD&1C3;-d-6GT=hMUnBe4o*EcpOm%x0+hRPN-1qg+z_8mp17fjYh$fl@2 zRF|07YC=M7A1cKY&5RK%w=OXtK@V9}`6Yg3*F%M(Pb1dlbdi>Crc7P0!xGJO|Wi5Pk2*`(h3JcJaf#d$@I0bR)st0eVza5*ZS zFv<5EJX;^{=ZUU^R`K>;CbJ6AF$un?W>yTpj}a+3h56usKE%;KOMyZ*`R4N@2{7kG zNlq#vLsDCe-J^;~VAGdzS+Ea<3hh^^Jqdp&`BOnj4i=$6sPNbFek|_`uY(20T0FeK zJr@62(&!Evi$}HXDxG0){%kHSvm?Z>DCK<1J_c0BHx%OXEMdW3_v79L3%KXpV{lE_ z6exc#$SHOiK?D1N2Zizau$b8}Kdz$-2O3UiTG0aRY3I+!6SRT4PMlG~kfaVSDMp85 ziK<|kbMDcFU?uqS_Job(m?D^f{Ly_(GLTakctp5F8WJ0$HP z6GDWcr?zzEV4omV2h9tHs_=nu`Q?LO&hbD=v1^-aBPV<=4dCb(;Q*~b!J6`nLz&J#Gld%2Kc*SiSF$dr~Gx~7ZTm_)HzGREh%@BdAA%fujJg$ zK1OWY^7$}tnP(`=uq73bD*cwCT9S-gp6Ux@-I6MzhHFcT$HVRSMhd4Swz8 fOJ3eLmF?o2r}ma%`2<}`b+!4wJ^!&;3jO~9c0QXl delta 3780 zcmXBXcOcdM7YA@VBYVrt#UJ$i2^xIjJCaP24khutX}Sw7kaFX# zoEsEer;XzHZUMd_^ns~_vv6|k!IDyt4h%T$D=QnrfJ4zgPVJd0oV)T#pT0x^6z<`+ ztaN37dVmxdCL;mFlRG}@0Rr%xR`KRpeNOoK{-|wT2ouCRJd{*gpn<@m$rc9LZ8Caf zf_gkCt0>hdS#)`B33Vuq_{uJPMeZ6^r9WDyP=p%pox<@6G(_d|zYi}*(cz9oj_r{v zh?$vn$v@MX_~jsvN4mQf@qIOlY(FnT^t~3QFBL#Xq>p-+)O~LmyU`HuR8TmDwU8^E zrIm-Un=iOmk7;#d<$u==n|>g-Vfi2V@96Zt!F~vo#KhtoF^j%VlS6_v*k`^`@miU3 zY+8g~q6&(!&uLG+=`){VFaEnl;GxdOMhsmi%2qP4{*J#*Ee6ssI%5YK@1%#AIlpVp zm9b=OU6Qt5>+XGQcz*H0(T;Gq*6!z%Y|)MK&I~oQHdLU}Q;NBRfp?PI?q- zO*{X(@5^Pxu{5Kk&E1pq}_WeKsd>evzsYNvO_{BA= z?~_P97^$mQ4WqZ=qK%}>-Do!N>cx7!HYDA)sl)xX38@*};+NK{MWv4CQCJ zw>Zd5De}?;95pHT+?c0|TqAPgcl6Y?Eko))O+oXqnJBVwxM)f(1>tNB)vMUzQNwB0 ztRSOkWD!%CZ=@EErtf*5rP{cTbdrRO4}QOjR_MmHy+nLbyP+Ze6`v0ZYr0A68R3C` znl5j~Hn^e}PDXkLXPnRrjrJmm5(fe@-_BWHJ7kOKqzqWw=&jL+1A(g`(*i{XAG%@n z*bMzsI#0f=V2X0P)*|OMsx9-#6}n1He%Ea$ItQr`%k+f{uJcYj+V*QBgH2|B*`Q zj~O#Nzw0*=|JcazM{)f2?|Met)^F`AHvZ&@wckS3(r>Bazy8S#qknRK$8X0*YX0P3 zGk?9LT5gW=)I*~C$u5#}S3)-CNG6I(Pt{c|N$Q$XiyI(7I_4DRqw$=MBvnx$HhO9j z;yr|D-X0r(Vd|eFFw_hw&vZoPmS4ide;*9aj+Q{nIc_c%TrQM8j5@#l?IGMIMftej zP683BxbV#M+rTyFGh-+5l&@o4))F3n*)r-O=na0itYDnf0tGujd}2MX;7Bx1YcBK| zI79rdBu#ba8-ciHaBiK!U6OeHD_0(W87DE?^~qtwkMzVm4Ytboo?VPG;$a*fyMiq< zyGfXj&S9g!~X6N3`?#5bV`Nkf| zbz(2Q5?H3FK46=cRHn_b&DgyoXFr%YHDOX$S92;_8?k^i(@Kf+by!JJ8cU*GHRgSI z=`9L+feE}1E`MTEj(rNCvc4Txj7=PE5n90)V73P15?YiTEL=O-r{KpE%%SqN!H!EN z<`fe}R=pIHj`0oS7HircVrSl~%x{XNV9(UM+bkvTV?T0!W+_G_U|Ientv{vjVJ_Yw zO=)$vvAP)%qMt!Dwy*!Da{s@Pn15=jrYm~{=AH7zsbo70a|xh3Uo;wmVGmxNnhFTU zGTw~xsK*6jlmU0a!<_-x8_xUTZL?P}{(WSh&JZep>|2MWzohkLY+n^~rK^W8hL(IJ zWCi?zhwI{w(&_HMHGc@TS#c&=hO{Ep9;G4b10`sftv=qhHWQ5=exAjBFCI-! z`1M~(ia_e4IP<$qfk-RzhDA2f7d3;{s*i^g+L}2e^nJhvF^TpFU(ql_Psy}{i$jNu zQ1HO8&^ZM?RQ!Zyiat{dh5kEwV)h^j+4eIm<9yVR=5<%*zEnjdJ`*IT=S)D`OXk+^ zLJy&=7A7V+Q5huXJM?)c=OC&Lb?Jze6GP#KGx%Mc7`lV8lY=7okwa5)XG{_=3QjmP zbMz4x!q?j@vGH*tvN9p$9A6YG(lKMq@V&}{*k-IXL_---f`HaX0yPuLepPs@)QT3p z&yMuiBGRBrZB_xMj}&5G0;jaefz7t1MI&&X4FO!KUuFn=)OfC`qiK#c!4VH*m zRP)u`$=`^=?>LDCc?(2^rNy*y^?Z`w-#^nP^zfv{ijdp{&ndXl_*v2XM;9c~H*RJ` zS3q)DL8OmsK zY%!Nd3jzlN@e@=e_@24ixZa=v&Vio2cubq_p_v!- zua`vBRPaKjBE`J4G;9mCU*;z*WU1HpIZY`togumix5{*2{yTFT)Y*!q5PPh9INsp_1 z-AuO;HM7^PUk-^!dH>md$hMC}2?T}6h!YX0+1SRHu`K}M6_#kJV=f{c=N6uYbZ6A6 z^P<_J_X2wV_(-3Fwmp&&xeAZu%+bzZ0>^Z*F;cUBUc`t@(7B=c?9o*N6pay?UYVal zvLC~Q3QhFLsNQ&jR#;9Kp)m3?n zKv78!LQCg`ke_i^YPh~2+B7Xz{h%m_&ZG~WO1UF|P7C$o8&miY`z{;fDl-osDmKmh zOM`{Cyya93HMvRi{kdaK>7?U+5^`!*M@cmTc8=0PD}brL5j(gt4uj%e0Y;y@z<;5$ zOaI+lpscnA)0tO5yZ5k?Z*mccv|sKvXwHG>j!rD*c^UADmCuk8l>m26i0kT_-34vU zrZ-9|u^`%Rkj{1~960h`e!Dmq3@mdh#9rlVWN@LC#dy~I;baA)Nmi;SB-CicZAiI+ zNO1ab?^tKZP}c6hIp+Yp!x@UBbyiSF#7_p?Hirq8uEb&?V?Yg86vOTsfk@7Z)a&E= z&~%BzGS*QK43g-`)UMhPH_wFX7XT?_58!s@+k`H9J3Q*S0mYLoq-3ZR|=n zq$@vWCzw(*<0L=cC7z2%03kn-V>=x-gdjfxcRG`sVIDs=RGx#_ia!@uV=16 z$VtP^j7g+G|H$CZJQ$ro0q-OIbRd;LGy{d=^wy3*?rp;74(f$KqXsSMm`H#?YC9L; zfLC=uij&T3q;+vX%wu4al3Hg#17tiYe?Vb>K$DA@c%{%=K=*8^v*%q?K&{x9)6#iO zKnArc2Qzm?K!$p=I&9xPK-FT_TUtFfK*X=7e(Q`dK--Kal1$(zKmY&$K&SujKL7v# zph^GlKL7v#pnm@EKL7v#zzO~DKL7v#01e7RKmY&$U}a}RKmY&$z*q)DKmY&$ptYs~ zLO_wRK$GAd2(u6pxD5PeJ@-CDU{(LYZS_7rB+XA9mh?WJbSkjISM)yi;>QuKi}P4MrQhSz!w>U5nrvNG zER69!mQoYTX8rI!W7$oQ0#@-pb_Y9lwtMhC^wp6dSTOKDvbQP&4~g$SE#*VccLMM} z*Nv4Oip$E zZgb-)_x;yEA7C~3dy>yU@oom=3Ua|fP-6)*2phRTd)~q+Cd;uvm(^&kN|vcWCqn2h zY_6d|fXI!cKo^=o2bu`eFshV5Vi%%SnBs~+-5dA*#^r=Sebn`DHgSGH@FH}Zv}ko% zKyX<~F>~i{K=2ubfcpz)Kse$Ftlno~Kn7`;vZ2CSK&;WomRLJdKx1*-cH&!1K$dP3 z40ZfNK%*>~({*b-KrDu_PGT=MK-YQbM`g`0Ku~)P`G?pjK(kRP7Xp*t9SO5=D;Nzm CcA;+o delta 928 zcmV;R17G}zaforSNDqG^@0Gg@Z2>{rV8zsfz*#@~;(&g(bvHkwy{-kOaW6mA0*0B= zktsiJKdM}#t0O;Z{p-*QTp&NGq_Vu+%O5|2+R`AZWF0>p5_i-gau`43wps%ML>50M zdh0UbwiG|6!uCl+W)nY9G+VLj3ll%JVjRRx6B0l0VH4BE?+`#g+>CSu7e^33?U+FX z$+ZtZj~))mcQOt?MoMxD^VbbO`uN1Tb^Z)L-HG!hrJoEx<{q-bDhUif{uVOFXvhnb z2N1!3M;s>^Ew!gX@~kW)E8O)!BGD*Azz)(tZyyEOsocdtO|pPwtzNo7mc|_$_-d{| z)tP=z;!LDK8jRnSz#E-F{V&`qha#0gAYt#;M%s=*7;B=JBI|`f`!!VaA4z~f8S)h= zc2;#jH+F;*R&;Sd9?LW1^H^s<$4vnvV?JSjKs0C|TH{Kov?LVI0dSKmY&$fT#cOKL7v# zph^GlKL7v#zhC&XhPc}aYTeVdSR~kQu^tvkucn&|cIC3uq z)CoUgYbxg^lmkD#@3{sU%L6~1>N3pXkpMsZ<*DJ1U;aLHI8CADs{KCMJ*dMIrTIR) z9H9fpV)s6I*ShoH+x0&7@RxM@X!Sl7`x_{2RrNle+M)hmaP(L{@*#?65We$1$0wWO zNqO=<$M663Nj|a*OsY3&h%5} zAg`f79zxE|!xx%B%Dxz9eX5i|Q1RR94dRMGsG~+ZVC95BgdLekj&OcJ>91UvnrC%c zK>G2(j|Ap#KyOi~G7}1CKq9InFk)t5K-(LCg~7gBKrHpKLNGT{Kqh5&^m)UAhvY!PKtw8##HLK#Wf z7in0L$SR_8`BlI3&--=8ALn)cIcqtDwH!j377z7uWoV(M_Cuma^^T>qxO?!>mziyL z?J`su53{wghQLkj=ARzBj{y_SGg0fi9b}SLIWqVy!P8&WainuMJhD2hoWHIGuJZl1 zryW#4|GgKzs6YzLnUnj-7llA*k+-n@6A##DKfdC2o(*cU9?hLdW`aSJV+$I+bSlzp z>=|#KT0^v{gy^;V%ZT|+InT?;CDa)7bWlWX0o~v2Jx(&7K|?_twW|8lC`f#EGnYg` zQgLiH_RCs$+qC2d?YoM2aK)2u+XI65mkYhonPzNws<=bqDGD7sWSeznv3Lp76qljB z^_{`AP}n`G`=c1UqwW`j^VC7iZff(tb(L<6$M1+p_s&+VJ>K(q(66VM0fW-DA0^e; z2w7y8TXiWWctERzv*r%=QM`2~d^#6fJ7dq*l#qdmub9;w)lA0DxKGYb%g11@GQu`t zSN_A|s-Dj8OO3$T$2ZxW9FGvcCmN?Q$e)8Mp?2BcPkwNmn5}S_+UNxMyW8|b-)umQ zIHiU=n1Wtf$<(ET`Y<)RJTEV+4Pu+$=DPTmAkCl2wY*3g)@n5wDMmtI{e=qr;zSybd_$6Peg#f0GswMPe`qOp49V^&jL&*CgnD%XjS~C2 zQB%{%Cm{{tHuUTk?Z%nj2Bc}O{2(W^3Z;~nQLC&Mb;5lKM6su2ZL+p4QZK%&i4U8g zt;{A8g8T-E)y7!4S`Ulu zkVAaW=*x3LQm9%e`GZfVBr@dN9=O3Njuzg0*SNzXh6=4JLao|G(5vDNWl=T}ZFFrP zJv3)o2wB~@7Sg69gsOv5dumGsQCXg_Zl{(YqH+7!CgB1|?7Kwz;5`5Dr33Q$1o){o zBc-p3e%~_pdC01xQ5kq=)VuhnUxm67{m3sdks#$*zRc-$2Ff`{G=-$dAYDiy(d9ir z_5q7Ttd%{iIAc33EUh8v?QBD3r8(SU**@cuY7AvwI~*R%=t1&z>_^gw7T7*43@DIM z2kV+xJAF4LkdRF;?S)0Epn#`QI0(J`EocT(XHKomN7B%=Vs8AJMzB!@>!& z_e@01WZA*aI)ZWjj)QQzu7Ct0 zG%@$3JTu!3X#)(*j4pa0eD1*U&Tw6@2`L&0)6<5x=W~=l3Twi>p8T%)Tn(T=>*jGG zsw!;OI5U$v)WCCQEK1m15k8g&ML!c#fRJ(7tV7H?k-Jlgf+_ z?ih;$l&}f@$P)o2bEl>DUU4|oynl;oydZqlyLr}cLjX8*Nb!L@{J=5G)V7$y2P^t^ z!3D2)U<8-PZ%lJTqpGD#D`yxNbl(4@dexIF?WV|uN_X{8PU_1u7RAM5H&7&PcepHG zi}MTHk%T-S_=_aQ|zE)TF0yoOq|G_D8`s{JrFLX7ofJ*PLA# zsP+}Zi(3lUZ1ec=j2x^;b(9G&VX512aAm-6$-i^a@Be{?PjEaZ3xC6|1opmut38Lc zwdOk%ElyLh!w=c(l_n;!y}QRa1`mv2*$w;rRJh+@*2kzMp*0$2wTrQ#&$1U|$n%!H zN$JEkrx-Y$o7=Ivm6u+b2U@ZJ!ow0PW|}ddq9+4s9e7SMs?eYq0EX z&Vfm-53v}9!9#2M6~z<$k6 zTE^W)RdYSfMd?CpC_i5i7?jNQp+uJg7B z6umEm3EBy^!N#j#_a>CO#UTM^Hkf5LOrxOY=yvm%M`wWHBJoCjQ~-SSmQO6V^8qED zs=F8CND$n4#bo828-y*Yf2UT>9|re5|J3{B*}+H4{zn-rw&1|nJ<%Cq1sd#O!7HAo zV5~ur6E`QqsX6~=>1fu3@PnRSVfH&nlC#=tswng zkUo_W{i*e2C#u@p{5I?k#$I;^y@+D$cZh z%gOD>#_{zg(WXOEAMjm9^w6yd@9{3oD~Hzd4o?bOFH`t2jGs<2l{>=u7EkAFyoeG9 z@z_*{!j6^!9F-W7z;qBF^>5i9?MlOa#0DZyRnqV;W*)T;D*bq-O3PBJEDevmr}y$& zL?7;<$Y?p$*@r8-?2GsGr}p7;yPnqlYVE~c+g>oI?&`zu1;3{zH-6glTrs(;3vZ@0 zSJ~kHkbp%jh57fx!;EdeNcR|zWD#YuEiA^E7VIPK1?Gs4e z@LBXI=d*F7ME0$p_%)jLuf++4M(2$M#PB delta 3056 zcmXBUcR1Dk8wYT+VatB3P*%uDD4*}=KFKOG$(|3Dkq3!*;!w$xvWvNK9m?L7nY z*&czoM9f9#6F$))a|n$JjI zHz6ed>>R4SD)kRr(R+07jvZ29dy0+#N2yU- zHQFTHc{?$_6m{I=)wa&PgC09shjO-Mp~pAUzlQjyB0SV?#K)e9Y_d5{#n5Ars{DD0 zn9%d6c3@WV#ffuhV6=wY-qwy(!;yJ^oj(FB=}l&KV}9^CQPd-hTxSoPidKFU(9A(% zOh~@jS`R*E#f|gqCxIE##>FE;E574G`@n?;$1m99^H|OgmpMG?d`f}yHWe?luY^;(2XSvgwmfyH z8;j0c2lC$#Ys1%vieHmPpW%+2GnC}WDx6eaM&2s)57${AMpEDVWrnhkrZ}LbLHE;f z61J1}@!R9E7c*$81=vrV2`9+4{S8 z&!la~_K41Sj1a@VcQx-Q1dHKD5BDy^*CKd33N0Dg7Cs`}O-KVHdPW|JA@!|xib1XFzZewFsmW~&sAH2lvE!2_}ku6kTgMFz167@<}~WP%zObVva5K;)6PO{Do4%h@{{nqg8r<% z$rZNUX%+}-I0{~4pOj@1O(2=S--uUF9|S*GEUIzq!tC?-qw-6dK=t*)_Zw7!dN76U z=b!>K-ZP2X<+KxI#HAjWc5PP$_FTpTX9+RT_*>LgQeOy^G9s=N$MZw^v_ozFEH8w< z{<`qtJ~wQ#KkxbJ7H8!%nVT$Rg)H9?F2zL>P59p{-joBR9WSq&8bvXY62c$(2yR{g z&*1~N>I^&K%EzatY4XoOZNZ#{o?Q-$Ar+l0o%v7{pdNI8@Fr*+u?cg*Ng(*T+f2Ol zBJ7(U?`yUr!-X8lxtP7dkj}iVY(^pgB8t;DST=dV?)demKchX!FlxRwu4nBIxo5bC zlL>nW=D94-mSzQ8cc}=URJMe*=Ik-%5EEE9Z5-69ZUhtM$@ADIdp=XFn{|gpx zKHchPAOhc|AGv=l7KGA)8^;AkMW8@5wtw^{9~3dqH&(Ip0sXX@vcD)V975feaTz?Y z);e}z+XxqgNObvK`OXR1zL#4h*y7kBSo{)ObO?J&u3%K9tKwNUvYYal+XHG2X7;e! zMW8xDRZig2F_%!{=192O>26QLRYQLhwcnU{ucSL6_kkezyC)C*U7$on%r#9vc_Ky7 zPwIy7_z4qJf~%yp93En3vG{E3u)?*HF^IyAgasUyx!*B%hcD z4cUy`b6{ASAtP$bp-Yn!Q>crzFjmpx9crEHaYvj($WQZF{l;oPGKk`83Nq?J@-bRT zsi!-U!NkTf^Og?eK~G?mHfu%h@BO)=W}DH9_o=U!yq=*1o}ILh?vGK@4?5SXNDX4m zWoasX{tz_}syzIvS%Hk^ZR1UUEuFOFGJn1|xfoHa9TVp~i&3tnYr+k`0@TH8#KeU1 zQ0v>kuLm7+P;F$h;;L2_f*IqPA;ViJtVoCTT3;G^spdB4>3TVxo~vj@S84o_nOo%QY+VwhE+tVWq`Piw}^Np=`z4*ay;+q_{R?^ty&Lu)clrJ?&T=@cB5dPw8ER^FH6FeIJE_OYlv%`xpJevE+eywv{)uTHH&D zjP-z~S5mo?5zcTu;8Zrb{L?XT^XRZ~$+iL~WAq^Tn*~(<`n%zCMsS!{&UxKk4`Kpb zLnZY|kP~bqLX$`E^N8NPj(jbs*;^#u@=^otWr)ld?o$Iw*p9H;>#Fc!;fyJVx&mZ& zg|&~??Su$dbin71G*r>QJU4XS4proUwW3qvz@IKm{%C6?0`c^VHk$hcfir)f`6?EK zJ#Um7A{=-@%$qG?MuHnUSR@ulhS(szxRJl4ehcJLkJ^1NV+GCF)0PEo%#f&}UV}F? z!F$O?YWge_jKv>#Q{cXigM~GiQp&#L9e0H}e1=!?<+*P$j&Up4#)xqJ=i)bPyFC4#&t`$$wOw5)nEyim;c8|mXga7YM$Cf5<9Y^5n?%_F|s=UEpd47;g+2fZwdP+S0_|02Z(-o z^8Ge}0isQBq=ezx0P&Mw0!$&TDdoV*p_d(*X-*mt^Oo<*XM=-D-GWN+6?d>QzcPA=^sq-HxE zYuS4V3n^NPF}0Uy34&$AkRIZMsDi)F&K}}c*1v~Y=@f!DyoiTLq!3|@QPl_BDa3Ju z{i3=V6e6YVnJ-y^LKu`K|HtPr2GXEoZe7`2E$lXT{4pm}3Vh`^hRJd1Fn!ysu5B~{>J1~i%uK_9 zr&O1(*!T>%DYQ!_3k8D18(vP;9bVwdp2}hN-W5bR@7P`wb_CAYq1H)bdniA5e4@Pa z2x!qVOfut5q28hHU69`)GB`G`@suAnfZF`=uqIvrYiG`PMw9!YE%|<;<%TxcdfJ^D zKB);yFH*BN6|2JAWqrMWVwK?fR_6wDLIJXdr49=tSkBmPg4U|H}e4$wa%G6Qh zSjJy4Vxna9Qcnm(iLCa=)BF(b`k+`QmKWBpHYmG&`+!a($*UE<~qT%CTE`}`;!bJ*w&I>)Tw{jv`S-|&9JjV_`51sh8^ zSo&{M&&)-$Mb`!Ct3GVBOO2>kKGw znSQ(7@WT4DUovdsS@~n)o6o;(phVCAHs{C7so%ohj`2TPO>E+iN>wwzZP4id`rF75 VS>}eyC_@|8l#MM?d8;N&{{u8jfM5Us diff --git a/tests/test_data/power_spectra_sampler_ts_ir.h5 b/tests/test_data/power_spectra_sampler_ts_ir.h5 index e0c67b4bfd5964e037eeed59bf2207201097aacd..c7418c7c1d64b92c87f9cd7c33a052795bea9c54 100644 GIT binary patch delta 3073 zcmXZdcQ};+7XWaVYiIAxm0dE*e&6?8yJ3_>))!snQpQCIbt@9GMU+v|B0CfcWh7-> zF47=!6In%6#+SbD{PTOx`0G5+c}`CuuBQ-J-os11QWaLDsr`uPS+ipyJ^nsC^24)F zuUvs@lVSFDwotgG-SXSh@EEYry^^$kIzSd_nKM(s3cLbTokzM1;j#50<$_f$a98NJ zKkcLf`X9X4iVLN|0-w@Hz9bC7^L#}eUwFYW=gC!%^XyQY{dnd?3JVOH9-Gsktx++} zrrxm@#tKGfB*v^fSj6yeD|lZ;EnrQ-&jv-+=CB8bK4T=4DQqa1vrbii5(}1?-pC_S zFzI-9JI6&Ww0%Aa2>$a7F{ z{OUy{zWUkh-n2-BV{C)n#rZJtXOc-elfpTu7VeOvees9m#2msQYLg3~@9(S~{9y-b zM1~sbWCnWarHsq_^?@KOYGAsF6dw9nHEi*eF}}z}3G4p- zpBZv00XxpU8XngvkMTcWTbvP=#%hF9KKgb^VMhGhgVwnuu(`KCHSV&CV@1}LVb&d@ z*z1yYWifV9E$sT(#jW~mW9A_c|yZ_n=jCJGz6v)dflgh49m zhr)FO0cZ}ke$DE~3vnNPQd16Wsp3<@vl5zI*XJ;%zp>+g%6F~(%bQkL{|VR9cd6^S z9|2a5rp>Y5VdhQcEu+>N|9YN=^sk*O4CI^oU^1 zVcrF5htxgom0y5QPM{!9ZUwjy9HnUpV!HFM=xlIr0wjq zbb*}R64I|{xI#fW!fh6pC z26f{z@Y~btjhb!6>3-D2-1bdkjBu4%&_&IgNY$Re0VaHpi$W;T1Y!y^FcA&x(dH<@?CpqI4k}3??qu zmJXzT>6LeuZ5whiB0Q;bss-^aemam&ZbZmopXpwtdW1u9seoIm7RlM>8kF4j2#F;O z9$e9{L=p%K)aBrmGK52F$tuIY7@^d9>ahA2BbSe7k^U2O2VoZu>utdb5SJuIO{LpS zgj7*Bt8Sc!ykLl%9^P{u!8=VX9UZua*k5Hi>lB=U)Xy${xZsnC{F$DxioczTBn(I5 zR{7_$7<+yT)BY zc$n?#k~d*4z(#&L{KUZL5YGd>w(sABtd{eu90l z=^8k^4Wn*xN`$F(yzIJJG}Ip5ZV~(V3@}|H-fV~tgzr8INfi#hprliM?@~MoLa1F= zO_$DjK={1+&+6GjaAfy^2H$)K_-xhxICIG!oS1vYyCSVYgCjg-$;%8(G$`^C7DPBT z;~3koiol)0)g#S?`tW2Y_h@UUE?BA}sf9+`pi){)D$dXXTusNDC2ci$zIEtiOPvz9 zjhcvf+$BJ8Sot>-4|$+Q*^SXMPs+fBMb21DYY7mQgxktSqOfNFe)m^Q6fSA$FP?T4 z0DbZh?|>{HXwL-Azj@0A@iA;qavM0HtCW&6QO*jb#@yMd?JO`X(X(#A&J4jtp(*KK znL&15Bc#A<4f~|1b~mQ{4>nhtu5tX`3by)iNNG>vvOLywq2-E8^b)2|+sH_yEMivz z_kTs|zGFjHGQ%4d-#4FBRiGh_C7s{auWB5bV59C(dfMAEp+WqbJMc(!Q5oyo@7hw2WHoUN^%1)>e(T03 zD$25b%gOD>#?aMfvF3x)AJJXLYhhaxKcL-+cP_p4J(?W8T2A;ijGj(5lRwP$4$a_d zx`ZVSqH$?XMV+k!C{}7j0<%GMG@x}~j5{6m6(5K?RYga?nS0hbsq~{+Dy<7?a&$E6 zzTT_rk$tGABC{2vs}EIl+neAOK zfApe+uPQry*LqR5I~_i;_i5;Y?lkk$kY03se<5X9rx&F&zEzTNz36w_aE47p59(l( zS$O32;aXw#Z+jWeMKCDh4A&0BY)F9~)bSzNu{s7unxl%W%E zs_ZP|a8&Up!<_NYyou>a9S(VHIAZ1)1wYPH2iuWKo!zEo@Nn^}uMr2xus!2EP}{yqX^sWkMc zzLW+3HTqS9o06d4EhR)W6N7EI@LaBGVPM>DE!>C_fUV=_0|R2HywI}OLGidBH?W23 zye_-O4iAO|E#oNozfY6(_^G9vA&4y zd?{$fH})Mnnk;525H^oR*q?8fjGw`}1C8=Ox=v#unR!|7gD0_Tkf0+j&cNh{eVM*= ljAIH$&tpcpo{wQlWWR>-KcoLY76MCpxgc9r*S?Fa{{^W=Y5f2I delta 3073 zcmXZdcRben`v-8lGH=<(3Yj4zp}aru>rPggN%lN+GIEfJ6E{ju$}TE0iWJHUkqVio zvW1MgjjZlSW!xp->i7HW`FK4Z*Pqw*y1MfjyYm^#y16OROJ=k|wTI+C{u3C}DJg_K z;V$yYTP{KD#5e67kAp#{PmfvcmKXdf*3vjNXARMj46P!~hEU=zpD21-7wQ9&7JTZ| z;DT=lnIb6$-lK#`!FT-Nw;3h8^EMA;d2iBu__jhq)R6^y%mRmd%-<}; zY*H}){-R*c^K^Xcb;R)j@(PyPO1B&o{fwpPJ8cZCKH<7Us+!)XX*j6f{qhW6!0o+* z3pt)axJ1lV=)@j%;?A1om5bvt#M|2-OgY6zXuRZ9iuc|^JYDA&?T*_(?NiI$y)~bZ zfnHK*{+W4Hds*sl_M&&_-W?TN!Dz}LnwmKt5G37=0`)=yZIoM41J>=nHU1PG0nSq6 zjB3Ot+<7ZGu@rUO!D{1E6fYh5AE z36>HPwNtJ#SY=Zc%iM8RY!^N-2W6y9$?&+S?sH!w8b))izc*e@I?Up3UHle?{&-gNu``@iF zlzlM6fh`SsACHr|Njq`e zrsZ0?pfr}NzkBy|#t!U&=#0k*G3<9&>yBcG7;g0R=rVdGf_I=WGs>ti4&)Q#bh;vh z`JX1U^6Chw;Se2ao&s4AA6Zo0{UA~Rx5UO;@p22`mw#DT-x%V@t2bBJWrq0h+XidF zG(WzyQkil}cKw5WZaRwOl=860U-y=Rq$%?5t4@b-=P{oZ=5tVyQ$?D{I0NzNoHeh? zPr&mE`m^>XH`sosSs=LKD0q*5RFO$Gg;f3lV_tm&5PWaBq|T!UbI%ixDlBUO)z2H> zZ%_m3!8GLEzfe0iR$j)~t{U2O8V z%2l(!Y|tF|^2?}DIp&6xa-E7@?b|4d*?ytiN=sy#@IRM)dJd3xzPM^;9L+>dig@HJ zxMdN%Mh@JlGwOs(AD*6~DLe=DMGF>sPB|=vR&=s-=0j1SM)3Wi>!5kWHry4bfZ(fc zbMeyiuy1CfzuBGw=W-?ID@65Og`43p)l3mhT`LHEkTMw@Yf z5f-}$m1Hu^xL+ti@Uy52m96s~_JV#l$*AD0CTQ*Ub^iycL!XGx`jD6^2;TIbv@O^L z*xR#PN|K@g@hgQHeLafMY46RRZ6*V5%-UMA64LOxB1k3pt0XkhR@-%}#6jD!cbH25 z1D38o-R5s70^g(`d3-1qgwnxl#|6ejpg=T!VC*^{6frL}R&nqF{gk;%fG95ZiKB2scdwHtK*0@lIK9QvN+b@OMug`m;cph@5YldGbVxpr6nSCHV^z z(}HW{^;{A$yHxz9n958@6-qAXxG)e|9Q6M01M4W|;N3_*q0dMuG>VTRrb$D#qxT#c zmS+*Q<}TalORn(e`xGN;wNvuES24=7a!b1AUx2!JjhUFxZPfZE=*vOJ zTvQv?thA<`jbPSfcG&0!3NO-Sz0#k7UZ}gzdj(uYjlRxbSH02^t848O%bXMxP3j9= zPf12wX<2I3tqF+X-?OAGJ1!%^@o`0^|1P2sZA!7ciS)myY~b0gT^bjV4Xdr9eOxTE z&GnAyB;6xNU94GEuu~-G3o9?(w0sX4St{17jr|}!MSk-{xeCaCOrylHB5-QFORsCl z0GnGU-qDUF0H3eZ#>r zO+~uE)xeWEb>wS9&E6vMmKU0EFH2;haGyH#gzpTmy{ZQ97f+jUYAC|ZuJHD;I$4Nx zLkE2CNJAC<^K&DY9YCp~1g;mI6bJrH;SYAkB9KTwZ>zOW5V-R9S*&3}*z;PYA<~f- z#C+J3W+ix_gGFL#beJ77iyQe%>bJse>QVb|WvrkTf6A($jTw?vHEQq{CU_^gM9rLI zg7L%yuM0djaEP!bQ(Dj+CO+ zE)eo!lTIX`IU;#&+X0q_DWd((eyf=2DZ-iUFIAnOX<|@kRc=UQf;g5oIre~`N+D9| zXKsHHm>_aW9*#F^jS-wg@xhAjQ6jV95vRW2D6taR7*(DBhB&w0k*`Cr*oVXF_cipXz*mtU8fkmR9=-oAA>|ozVd>;IVPATmr zq~c4!>J;a2oF+F9tn-G&T5Z%VUy@!C$8ifDX*?{IDMa&DFwdrYa^6|OqjW4Ue`931ocKy zUFK#HKq}SaD>gX|?uzY_sX{^E_?nkXZKpT5ainvazjFf-t~+*Dgq?sZezD`~_cqx75Cxo?*Gc7v4WTxFBD{$gz{Z8^t?|@;XiL4HY_+Ka zc3$=;M^0$L@_*?$Y{hEuX2n4N?|5any3M7*f>4C)5vjw%NDd5yBBBxxNkdN|Vm{X_ z0c9E}YCP)?7&TQkexWY}qC|H4;~9R4aC=ZJ6VD6lmm8GkvnbrK-sLy^!Jh+qR!Z8! zv$w#5?5SlEg#~!ligbOM8DTJX$v%0@Ca%uB%o8(4$DFqMLoRWvc)#4kq1U`$aieP( zf5GN54w3%T%Tty@iXtSrK!PwdHE?^ iKZaM&v-3>irB&T5k)zcAKNe}`w6g7Nm7S(Btp5X83xvG@ diff --git a/tests/test_data/power_spectra_sampler_ts_ir_onethread.h5 b/tests/test_data/power_spectra_sampler_ts_ir_onethread.h5 index 5722abbd9c292c95ea48970b0d38a011e2bc51a5..b8b00a7b35f393e94b0eacfb6e7e7cdc94a40616 100644 GIT binary patch delta 3066 zcmXZdc{tVi7YA_HmMyM*&vMCNtX*6Aem>_?WQ)2bMv+`C8bc(@$d$Bc5#1p~V<;)g zR#DlDBKsDFu61mM5M}$#G{3)I&-*;*uk$?b=fvG5#oZ<4lSP6EEC%uIWS&V?I-nHpIZ zAJhP&&}h1An+jayj7b;Ck^!Y9j@LHRq7c9674wEq2!3nK5WVNf19~kDjSglccj_feM!EdWwK!>w2$Z~~cr{>_ zKsPHa)tAPJWYWquMO!vO-2a+sP{HgXEZaV=R#93Amw1KUZtXS1fftg}XOt=k_18Js z?Qu^CDuW@Wfb$5X-g@t{$6aEtW_h!0a2nyX=c80-X#(M3EwZL+5KBY{xmYG-pC?vd z3HT2fMiS+nXs76O2*LC=GQH~JN>kjgspd0XrsIxG)sJ3-3pCCD)r~;9dr5+eL~8|!ayBn566t}R#t>GLsn8%q69n)?zzDK zMG$tDUpyEpzzr5@h2<;D-?4%1fZIX$IXum z(Jt4DS-Y0*h&%sUi`yR_h@~e~;6Egy=t(_KSn&qd1(lacMKfYfWM)I?lfmp0BpNkE ziN07v!@K`zT@Md5#>(r&oq0-72>u`f)trBi~ zYpGl4D37Ido+{Lxm&5AHXS^rXWbnB2nI!H%rSQS+AA8|&nGA<`{WtYE-;Ke5u%2u01FwOgL z*&mxkSksgOrZv~H^;#W;c31L6T7g&+e5l>w9!R+eZ^r|U&KCrNDOr4rJlh+tvIwry z`!29~L;2}$QAa4FI|4)BG7)r&t%Xma-+3pXtBi$Hav$cXrd|KFGi6#Y2zNZh1>T}gL2KvjDuwp zuzpV->E(Y3NuzlsJyr!kzLHbH{p1c*1w_QFeM^SrGp>C>Q*mIquAy^5=fltaE*=#um6JCc$YDNxuf(9dVL__iyaJRv| zsTp)YWi8Kmhn*m0;#iGUiXC)!`q%q^w1&+OPsXf`Si0+|Gz_;Xtnp~t=!SKpBX%Yh;3?0FgB zQXX=qMoNL~K*US+OR})_-9AC-P;pqxs^|*m6^EAL*qo+OGKj`6JNOaN4DS z?{LraoUghzL5PU@WbbxGa30>1onLd^`QlY^}eJSa)+4 zA8)E9B0h*F+ia*IUjOAbqnTDg?4Fe0EM)hB=*n-tHauTUIG%IZP04>uR7=Y!jW-k! zgNLci+46mPgvZ8;P0~?0#GZ#$kIGhZh?~i)DY+AOh)5}W>Fm!LL<9Xq5=VbJ5j1HW zal!jGA^y&GLBA!1@H37umt!Om%KGbxSTBL-@)bGYkd;WBZ(loNeJh?AF;WlwB{Pn& zaee>Hmp7KU#M``oQ0^*mzb|)8M)xu?T*?!RjG88jUQC$z_^{!s_a&b?XbSbGfh-Tmo43J(C=&9Q!+OayJN z&BB4b*TCGVVXBN24U0SVJvmE*fo<{Do4>#x>`T%pf9v=_NY*Z!j1UG~W=@%8hhFu7 zkHW#;HZIQK-5c#ORZWMj!nf2-o;m?v$Kd<^?>pgou-PNuK1<+D{8hCe3t*NiAdjf# zz)n(aI(x+g2AcC2rzlkD3gJlCyJP@Q`ku?G|D_N5u3>*12+@R-Cl`u2^3~zCP^6xz zGX-jFg7f!yDg#rm;>G<(-3nm*O^;88EDJLRZ*rtPq+rzJfEXiI3bHm$nU)!e!pk|Y z@wIAUpi6C!Y5!Re^e>(cx6k7P`AsZP>*N7lt;EPtE^c@(*YMNVUJ^WP@k`ps&jpN< z55AShR`EXlSE{A?%UHmmXi@pY5mQ5mxuuE?`97qCUpaJkdrdAut7YiVWe z7o1;DiDkHd`S&{&<(su}v7C0DD59VdUZ%>e0NuI~ZJIwtvBS1o5no)s?oV}HM(hl? z@4u{kN83zySaeh@pvT8uGX9pIMZI0FM$Zi=P$u4$7?D1Tbl%=b=(sV0^qlR{?k|JL zF|}s8Uz3gM^ITpPZe}A!|GhB7#SiGg6-6%{p7$s}$07PkVmHb_i`T*xiUM>qKe+?y0BryHNL?gGpIw9jNqm75S=12l||p zf9geU;qNT65yq}W0$@{(^8_n2u#Sz9a_!wzkz zt?YyZms%?->=SDJ*@)SKGA=|rl9F1`Rvy`+lTyv-I(cpLMducjzpp{<<&kF8rBNgF zl+}c~7xp$C+0l%ie3(3Q^?nnwUJhz7(P~Eh?yQWG^hV^h^#ZS`Z4(loa28Ix(}*_c z%6r!KHX@aU5HG>PM)X@1@9Ae#4XF0p!z=B_8jzVw^!^Z}&3hp540^Gh+FK&|V3IyvVK=@C%}9JadwDPRNN{lhL`{(GgY zT=8z$^4Hg)l13YF%>R@{KC+Vuen*ez+BR1UMWE7 zvzoJAY+2|KveJGYC<%O)8&>`n6$eSHDvxj{5%4U(Ibwd6DFmMcT=fHN1t4|Ws!msm z2ex<$2Ra&YLNkfvy~T_JR7G5uO)soq*Y`H=ilIxm^m?Ph>h%SD>D!1#f#MuCd~)Xt zQ9X;V#qVv&Jo_21cfO00zBYmL#6~40UXEdlh)<3Kw?{F{!~ea6?=ZF)cU_PO9>i}V gqN7d<_zqyY1ql^hW&gh{s$8rYG2S9!hbW%^0eazRYybcN delta 3066 zcmXZdc{tVi`v!2wcF4X{wk#Eg$(AKPpZ9Y{AzMV4Q*?yMph6;wV~~(eq{xI23Qfow zLQF`q9I}+G*;?$1j^#Ja@B7#5y07d0^M0-;A(xPlODGl;psi`Gj@=9>Bdhi4wuIRv z!tS1tEUt}E&{WtZp*8FV+wUl8Rnm@dZ}!;*m7^vQuIh0s;G-doW|rI6AJT$R@05TQ z`@QhWi!M3VzZ;H)+C~(&Ny3pCnT9qFVNjFOu3`1&1>M^4(T(>UP!!2qW>rG~7N0u7 zExAn^;?r+At?##vIJoP7XOUk;ag>*Q2k1*kv3`I0)52q^y%)AkmS{L4%Otq)s1sAQO<(1mHt``9NkUSgPbZM&pcJvP|L z#~Bq}j#;XXFKcNOVYP+FPIn*A#rV$b|Kj77j>Xdr#P$^3!Q3p)Z1oD$G5Pk@+)wFO zF;BmvDFm%>%*oG%ow;%e)9Nq}y5aO2c{{WwEtwbwe=-9k5{1qH>0lq*TiOH#Sdgzb zrOq6zPlpv+%N+u-l+tO|5?y!{(Hk_nqyZ7k+euuS%Ha6S($hj-2JX&XPv4s&3U3yP zGx1A2@HB(mk-@_bs(tzF2e*Epe5%?lHR>|@C=kPy;Qt9loQld6Pai`u6yOByVL`5VJ0XRH;4tI}?P^(2&WA$Y3GleZ$%zR8I(HFzwWO1_aT= z?XAt47lKLiiX}9QS^e3;mx|=R0MUD!pOPWq*_TQRB^sRAyq|20`NN)5FYkO-^?>8^ z@gLfEI>B>cuiLVG-b7C11SNAqZ0=@7)M- zd|=y48|B{I0epjxLzu$ z<)+moa>rgFEs(nm1gmOK|4KH#&YS&&la-ubH9bM2%!7Q; z`f~N*4#>X05#q|z1j&j%WG@CqmgiX z|M^A=gh7RR=4%4&BG`;nE*Nk7!j0A#y_z6T@We@#6XNbP*zL$zJ}~797yruk4;G~W zW8X!6?_4{WyqX<3HDv>$SM$_rQZ2wS7|WPo+XK>1EssyfL- zWs`yf0bRvBbaC)A9f<3@B?T!uxmMdW5!loyem5;70(|dmrH#J|Lg^JwiN!wzA%B}c zyu6hk*pIwIzdYoF8`VQ~&uOka5I(r2)9=CqMJZCDEn}I8czZ#Fu3C-LgPe4SEran> zR^%$V2oL;K+QN|7A4l-LW4}jO3}Sdq_?q!}BnfBo8DB0rsDMAWWGJi;OW;k~1wQ{+ z@Z&5}RbwW)?D%VW(;phfEV%lu*x<>Ib!?7jr%A%lGDeuVnn$~5{slW!b@1K7iCK*L zn`P^m`y{qteP~FIcNCk8@e8s~8pPPIc}m5u_F{Z>NjDnOyD`rpo&Aw|9aw9Op@_MQ}1HqtC0oVYIm@?z&n#J50bI=vwmV>sfn0u=<9l7 zRswdZ+x&Yn2OV2hZM>H+6^~7BNpXH7#bM>EG=t$zs~9Y>-{^$n;b`pAbq8Ip?5h}e z`Hz<-LiuF-VA3^ddqY1-e^>oE74KOA$y__$pfEi}_L$FvGTF#OK zO&t`WTmbd{<|+@g1lJ{>R|C?JKO=|X_+%hJ#9R9bAnU`w*G_AXV#Cpl@NtnS^dlO(cRisQGe4H*>6!)Q9*xQ??_ zKBiX(uj4Mk1M2i$EBK4(!x>*x7w~fzvUSlT2XULJ?9htX0lbZ3m20*09SzT1 zq(3?8*^g^nYYw0-_T!!MTq;b2AwATMKS?{R@|nuOgH465Uv^{Qff1yp z{z?Y^SF)h*o~~{@t<1Yhu#47(x6+-hWXavQ9d-OemSh($cUwMrMW7ph)}A+Ikko~n zzo$D&DRP{>w*MsHmpgG*rD-1b_nml??)=G; zbDj7cdB8X9-(018lXteU6aPX!B=KNhC%$qwS^KMJ2mV3Fx?Ft6zs2Ui1B)+kca`2W zi6zkDebs9prFxK`z2;StA106|^;=I7y{jQ`+w~Vq^Fk=PEH}Scn*zeiGY8|AV&JQ( z?5`{(K_D@&JIo> zmvH;ZdVwuyvsE>DzB&R0e21|#iWv*=hg$U?*AtFbW4FO5mtDWH0 zh33eWDT6*O`20<>+K{pz*jv&oG)zel6wPAPIky*%5%P#Kn@TXi17*#}cEc#MvsIu) z8VF_5O>EOraA$^oj20*c>TfsMcS(wXY)nFKhr0l%DSgs|J2YPSs_f}?ma+rd$v5v^ z+Q$LmY~LGX%vhmxF2OI Date: Mon, 25 Aug 2025 15:21:57 +1000 Subject: [PATCH 144/145] fix compilation --- src/py21cmfast/meson.build | 2 +- src/py21cmfast/src/meson.build | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/py21cmfast/meson.build b/src/py21cmfast/meson.build index 08087425d..0eccccc13 100644 --- a/src/py21cmfast/meson.build +++ b/src/py21cmfast/meson.build @@ -6,7 +6,7 @@ source_files = [ 'lightconers.py', 'plotting.py', 'rsds.py', - 'run_templates.py', + '_templates.py', 'utils.py', 'yaml.py', ] diff --git a/src/py21cmfast/src/meson.build b/src/py21cmfast/src/meson.build index 5484a45f7..9eb82ed63 100644 --- a/src/py21cmfast/src/meson.build +++ b/src/py21cmfast/src/meson.build @@ -28,7 +28,7 @@ c_source_files = files([ 'rng.c', 'scaling_relations.c', 'thermochem.c', - 'MapMass_cpu.c', + 'map_mass.c', ]) # Define the CUDA source files From 6e749b58771aff7b7d5046f1b3f1c773b1c25dd1 Mon Sep 17 00:00:00 2001 From: James Davies Date: Mon, 25 Aug 2025 15:34:26 +1000 Subject: [PATCH 145/145] fix import --- src/py21cmfast/meson.build | 1 + 1 file changed, 1 insertion(+) diff --git a/src/py21cmfast/meson.build b/src/py21cmfast/meson.build index 0eccccc13..c91874ade 100644 --- a/src/py21cmfast/meson.build +++ b/src/py21cmfast/meson.build @@ -3,6 +3,7 @@ source_files = [ '_cfg.py', '_logging.py', 'cli.py', + 'input_serialization.py', 'lightconers.py', 'plotting.py', 'rsds.py',