From 8ead94031fbe54d032ccf8cca22d940d46d0870e Mon Sep 17 00:00:00 2001 From: John-Paul Robinson Date: Tue, 9 Apr 2024 23:03:42 -0500 Subject: [PATCH 01/11] Start data distribution out as varient of normal distribution Copy h5bench_write_normal_dist.c as baseline. --- h5bench_patterns/h5bench_write_data_dist.c | 1268 ++++++++++++++++++++ 1 file changed, 1268 insertions(+) create mode 100644 h5bench_patterns/h5bench_write_data_dist.c diff --git a/h5bench_patterns/h5bench_write_data_dist.c b/h5bench_patterns/h5bench_write_data_dist.c new file mode 100644 index 00000000..46deeb59 --- /dev/null +++ b/h5bench_patterns/h5bench_write_data_dist.c @@ -0,0 +1,1268 @@ +/****** Copyright Notice *** + * + * PIOK - Parallel I/O Kernels - VPIC-IO, VORPAL-IO, and GCRM-IO, Copyright + * (c) 2015, The Regents of the University of California, through Lawrence + * Berkeley National Laboratory (subject to receipt of any required + * approvals from the U.S. Dept. of Energy). All rights reserved. + * + * If you have questions about your rights to use or distribute this + * software, please contact Berkeley Lab's Innovation & Partnerships Office + * at IPO@lbl.gov. + * + * NOTICE. This Software was developed under funding from the U.S. + * Department of Energy and the U.S. Government consequently retains + * certain rights. As such, the U.S. Government has been granted for itself + * and others acting on its behalf a paid-up, nonexclusive, irrevocable, + * worldwide license in the Software to reproduce, distribute copies to the + * public, prepare derivative works, and perform publicly and display + * publicly, and to permit other to do so. + * + ****************************/ + +/** + * + * Email questions to SByna@lbl.gov + * Scientific Data Management Research Group + * Lawrence Berkeley National Laboratory + * + */ + +// Description: This is a simple benchmark based on VPIC's I/O interface +// Each process writes a specified number of particles into +// a hdf5 output file using only HDF5 calls +// Author: Suren Byna +// Lawrence Berkeley National Laboratory, Berkeley, CA +// Created: in 2011 +// Modified: 01/06/2014 --> Removed all H5Part calls and using HDF5 calls +// 02/19/2019 --> Add option to write multiple timesteps of data - Tang +// + +// This file is modified by kaushik.v@anl.gov (ALCF) +// Currently, in h5bench_write, each process writes the same amount of local data. +// This program h5bench_write_var_normal_dist.c demonstrates a prototype for each process writing a varying +// size local data buffer which follows a normal distribution based on the given mean number of praticles +// provided from DIM1 and standard deviation STDEV_DIM1 in the config file. + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "../commons/h5bench_util.h" +#include "../commons/async_adaptor.h" +#ifdef HAVE_SUBFILING +#include "H5FDsubfiling.h" +#include "H5FDioc.h" +#endif +#define DIM_MAX 3 + +herr_t ierr; + +typedef struct compress_info { + int USE_COMPRESS; + hid_t dcpl_id; + hsize_t chunk_dims[DIM_MAX]; +} compress_info; + +// Global Variables and dimensions +async_mode ASYNC_MODE; +compress_info COMPRESS_INFO; // Using parallel compressing: need to set chunk dimensions for dcpl. +long long NUM_PARTICLES = 0, FILE_OFFSET; // 8 meg particles per process +long long TOTAL_PARTICLES; +int NUM_RANKS, MY_RANK, NUM_TIMESTEPS; +int X_DIM = 64; +int Y_DIM = 64; +int Z_DIM = 64; +hid_t ES_ID, ES_META_CREATE, ES_META_CLOSE, ES_DATA; + +// Factors for filling data +const int X_RAND = 191; +const int Y_RAND = 1009; +const int Z_RAND = 3701; + +hid_t PARTICLE_COMPOUND_TYPE; +hid_t PARTICLE_COMPOUND_TYPE_SEPARATES[8]; + +// Optimization globals +int ALIGN = 0; +unsigned long ALIGN_THRESHOLD = 0; +unsigned long ALIGN_LEN = 0; // 16777216 +int COLL_METADATA = 0; +int DEFER_METADATA_FLUSH = 1; +unsigned long STDEV_DIM_1 = 1; + +long long *holder = NULL; + +typedef struct Particle { + float x, y, z; + float px, py, pz; + int id_1; + float id_2; +} particle; + +int subfiling = 0; + +double +normal_dist_particle_giver(double mu, double sigma) +{ + double U1, U2, W, mult; + static double X1, X2; + static int call = 0; + + if (call == 1) { + call = !call; + // printf("\n Kaushik A %lf call %d",(mu + sigma * (double) X1), call ); + + return (mu + sigma * (double)X2); + } + + do { + U1 = -1 + ((double)rand() / RAND_MAX) * 2; + U2 = -1 + ((double)rand() / RAND_MAX) * 2; + W = pow(U1, 2) + pow(U2, 2); + } while (W >= 1 || W == 0); + + mult = sqrt((-2 * log(W)) / W); + X1 = U1 * mult; + X2 = U2 * mult; + + call = !call; + // printf("\n Kaushik B %lf call %d",(mu + sigma * (double) X1), call ); + return (mu + sigma * (double)X1); +} + +void +timestep_es_id_set() +{ + ES_META_CREATE = es_id_set(ASYNC_MODE); + ES_DATA = es_id_set(ASYNC_MODE); + ES_META_CLOSE = es_id_set(ASYNC_MODE); +} + +mem_monitor *MEM_MONITOR; + +/** + * Create a compound HDF5 type to represent the particle. + * @return The compound HDF5 type. + */ +hid_t +make_compound_type() +{ + PARTICLE_COMPOUND_TYPE = H5Tcreate(H5T_COMPOUND, sizeof(particle)); + H5Tinsert(PARTICLE_COMPOUND_TYPE, "x", HOFFSET(particle, x), H5T_NATIVE_FLOAT); + H5Tinsert(PARTICLE_COMPOUND_TYPE, "y", HOFFSET(particle, y), H5T_NATIVE_FLOAT); + H5Tinsert(PARTICLE_COMPOUND_TYPE, "z", HOFFSET(particle, z), H5T_NATIVE_FLOAT); + H5Tinsert(PARTICLE_COMPOUND_TYPE, "px", HOFFSET(particle, px), H5T_NATIVE_FLOAT); + H5Tinsert(PARTICLE_COMPOUND_TYPE, "py", HOFFSET(particle, py), H5T_NATIVE_FLOAT); + H5Tinsert(PARTICLE_COMPOUND_TYPE, "pz", HOFFSET(particle, pz), H5T_NATIVE_FLOAT); + H5Tinsert(PARTICLE_COMPOUND_TYPE, "id_1", HOFFSET(particle, id_1), H5T_NATIVE_INT); + H5Tinsert(PARTICLE_COMPOUND_TYPE, "id_2", HOFFSET(particle, id_2), H5T_NATIVE_FLOAT); + return PARTICLE_COMPOUND_TYPE; +} + +hid_t * +make_compound_type_separates() +{ + PARTICLE_COMPOUND_TYPE_SEPARATES[0] = H5Tcreate(H5T_COMPOUND, sizeof(float)); + H5Tinsert(PARTICLE_COMPOUND_TYPE_SEPARATES[0], "x", 0, H5T_NATIVE_FLOAT); + + PARTICLE_COMPOUND_TYPE_SEPARATES[1] = H5Tcreate(H5T_COMPOUND, sizeof(float)); + H5Tinsert(PARTICLE_COMPOUND_TYPE_SEPARATES[1], "y", 0, H5T_NATIVE_FLOAT); + + PARTICLE_COMPOUND_TYPE_SEPARATES[2] = H5Tcreate(H5T_COMPOUND, sizeof(float)); + H5Tinsert(PARTICLE_COMPOUND_TYPE_SEPARATES[2], "z", 0, H5T_NATIVE_FLOAT); + + PARTICLE_COMPOUND_TYPE_SEPARATES[3] = H5Tcreate(H5T_COMPOUND, sizeof(float)); + H5Tinsert(PARTICLE_COMPOUND_TYPE_SEPARATES[3], "px", 0, H5T_NATIVE_FLOAT); + + PARTICLE_COMPOUND_TYPE_SEPARATES[4] = H5Tcreate(H5T_COMPOUND, sizeof(float)); + H5Tinsert(PARTICLE_COMPOUND_TYPE_SEPARATES[4], "py", 0, H5T_NATIVE_FLOAT); + + PARTICLE_COMPOUND_TYPE_SEPARATES[5] = H5Tcreate(H5T_COMPOUND, sizeof(float)); + H5Tinsert(PARTICLE_COMPOUND_TYPE_SEPARATES[5], "pz", 0, H5T_NATIVE_FLOAT); + + PARTICLE_COMPOUND_TYPE_SEPARATES[6] = H5Tcreate(H5T_COMPOUND, sizeof(int)); + H5Tinsert(PARTICLE_COMPOUND_TYPE_SEPARATES[6], "id_1", 0, H5T_NATIVE_INT); + + PARTICLE_COMPOUND_TYPE_SEPARATES[7] = H5Tcreate(H5T_COMPOUND, sizeof(float)); + H5Tinsert(PARTICLE_COMPOUND_TYPE_SEPARATES[7], "id_2", 0, H5T_NATIVE_FLOAT); + + return PARTICLE_COMPOUND_TYPE_SEPARATES; +} + +// returns prepared local data volume, used to calculate bandwidth +particle * +prepare_data_interleaved(long particle_cnt, unsigned long *data_size_out) +{ + particle *data_out = (particle *)malloc(particle_cnt * sizeof(particle)); + + for (long i = 0; i < particle_cnt; i++) { + data_out[i].id_1 = i; + data_out[i].id_2 = (float)(2 * i); + data_out[i].x = uniform_random_number() * X_DIM; + data_out[i].y = uniform_random_number() * Y_DIM; + data_out[i].z = ((float)i / particle_cnt) * Z_DIM; + data_out[i].px = uniform_random_number() * X_DIM; + data_out[i].py = uniform_random_number() * Y_DIM; + data_out[i].pz = ((float)2 * i / particle_cnt) * Z_DIM; + } + *data_size_out = particle_cnt * sizeof(particle); + return data_out; +} + +data_contig_md * +prepare_data_contig_1D(unsigned long long particle_cnt, unsigned long *data_size_out) +{ + data_contig_md *data_out = (data_contig_md *)malloc(sizeof(data_contig_md)); + data_out->particle_cnt = particle_cnt; + + data_out->x = (float *)malloc(particle_cnt * sizeof(float)); + data_out->y = (float *)malloc(particle_cnt * sizeof(float)); + data_out->z = (float *)malloc(particle_cnt * sizeof(float)); + data_out->px = (float *)malloc(particle_cnt * sizeof(float)); + data_out->py = (float *)malloc(particle_cnt * sizeof(float)); + data_out->pz = (float *)malloc(particle_cnt * sizeof(float)); + data_out->id_1 = (int *)malloc(particle_cnt * sizeof(int)); + data_out->id_2 = (float *)malloc(particle_cnt * sizeof(float)); + data_out->dim_1 = particle_cnt; + data_out->dim_2 = 1; + data_out->dim_3 = 1; + + for (long i = 0; i < particle_cnt; i++) { + data_out->id_1[i] = i; + data_out->id_2[i] = (float)(i * 2); + data_out->x[i] = uniform_random_number() * X_DIM; + data_out->y[i] = uniform_random_number() * Y_DIM; + data_out->px[i] = uniform_random_number() * X_DIM; + data_out->py[i] = uniform_random_number() * Y_DIM; + data_out->z[i] = ((float)data_out->id_1[i] / NUM_PARTICLES) * Z_DIM; + data_out->pz[i] = (data_out->id_2[i] / NUM_PARTICLES) * Z_DIM; + } + *data_size_out = particle_cnt * (7 * sizeof(float) + sizeof(int)); + + return data_out; +} + +data_contig_md * +prepare_data_contig_2D(unsigned long long particle_cnt, long dim_1, long dim_2, unsigned long *data_size_out) +{ + if (particle_cnt != dim_1 * dim_2) { + if (MY_RANK == 0) + printf("Invalid dimension definition: dim_1(%ld) * dim_2(%ld) = %ld, must equal num_particles " + "(%llu) per rank.\n", + dim_1, dim_2, dim_1 * dim_2, particle_cnt); + return NULL; + } + assert(particle_cnt == dim_1 * dim_2); + data_contig_md *data_out = (data_contig_md *)malloc(sizeof(data_contig_md)); + data_out->particle_cnt = particle_cnt; + data_out->dim_1 = dim_1; + data_out->dim_2 = dim_2; + data_out->dim_3 = 1; + + data_out->x = (float *)malloc(particle_cnt * sizeof(float)); + data_out->y = (float *)malloc(particle_cnt * sizeof(float)); + data_out->z = (float *)malloc(particle_cnt * sizeof(float)); + data_out->px = (float *)malloc(particle_cnt * sizeof(float)); + data_out->py = (float *)malloc(particle_cnt * sizeof(float)); + data_out->pz = (float *)malloc(particle_cnt * sizeof(float)); + data_out->id_1 = (int *)malloc(particle_cnt * sizeof(int)); + data_out->id_2 = (float *)malloc(particle_cnt * sizeof(float)); + + long idx = 0; + for (long i1 = 0; i1 < dim_1; i1++) { + for (long i2 = 0; i2 < dim_2; i2++) { + data_out->id_1[idx] = i1; + data_out->id_2[idx] = (float)(i1 * 2); + data_out->x[idx] = uniform_random_number() * X_DIM; + data_out->y[idx] = uniform_random_number() * Y_DIM; + data_out->px[idx] = uniform_random_number() * X_DIM; + data_out->py[idx] = uniform_random_number() * Y_DIM; + data_out->z[idx] = ((float)data_out->id_1[idx] / NUM_PARTICLES) * Z_DIM; + data_out->pz[idx] = (data_out->id_2[idx] / NUM_PARTICLES) * Z_DIM; + idx++; + } + } + *data_size_out = particle_cnt * (7 * sizeof(float) + sizeof(int)); + + return data_out; +} + +data_contig_md * +prepare_data_contig_3D(unsigned long long particle_cnt, long dim_1, long dim_2, long dim_3, + unsigned long *data_size_out) +{ + if (particle_cnt != dim_1 * dim_2 * dim_3) { + if (MY_RANK == 0) + printf("Invalid dimension definition: dim_1(%ld) * dim_2(%ld) * dim_3(%ld) = %ld," + " must equal num_particles (%llu) per rank.\n", + dim_1, dim_2, dim_3, dim_1 * dim_2 * dim_3, particle_cnt); + return NULL; + } + + assert(particle_cnt == dim_1 * dim_2 * dim_3); + data_contig_md *data_out = (data_contig_md *)malloc(sizeof(data_contig_md)); + data_out->particle_cnt = particle_cnt; + data_out->dim_1 = dim_1; + data_out->dim_2 = dim_2; + data_out->dim_3 = dim_3; + data_out->x = (float *)malloc(particle_cnt * sizeof(float)); + data_out->y = (float *)malloc(particle_cnt * sizeof(float)); + data_out->z = (float *)malloc(particle_cnt * sizeof(float)); + data_out->px = (float *)malloc(particle_cnt * sizeof(float)); + data_out->py = (float *)malloc(particle_cnt * sizeof(float)); + data_out->pz = (float *)malloc(particle_cnt * sizeof(float)); + data_out->id_1 = (int *)malloc(particle_cnt * sizeof(int)); + data_out->id_2 = (float *)malloc(particle_cnt * sizeof(float)); + long idx = 0; + for (long i1 = 0; i1 < dim_1; i1++) { + for (long i2 = 0; i2 < dim_2; i2++) { + for (long i3 = 0; i3 < dim_3; i3++) { + data_out->x[idx] = uniform_random_number() * X_DIM; + data_out->id_1[idx] = i1; + data_out->id_2[idx] = (float)(i1 * 2); + data_out->x[idx] = uniform_random_number() * X_DIM; + data_out->y[idx] = uniform_random_number() * Y_DIM; + data_out->px[idx] = uniform_random_number() * X_DIM; + data_out->py[idx] = uniform_random_number() * Y_DIM; + data_out->z[idx] = ((float)data_out->id_1[idx] / NUM_PARTICLES) * Z_DIM; + data_out->pz[idx] = (data_out->id_2[idx] / NUM_PARTICLES) * Z_DIM; + idx++; + } + } + } + *data_size_out = particle_cnt * (7 * sizeof(float) + sizeof(int)); + return data_out; +} + +void +data_free(write_pattern mode, void *data) +{ + assert(data); + switch (mode) { + case CONTIG_CONTIG_1D: + case CONTIG_COMPOUND_1D: + case CONTIG_COMPOUND_2D: + case CONTIG_CONTIG_2D: + case CONTIG_CONTIG_3D: + free(((data_contig_md *)data)->x); + free(((data_contig_md *)data)->y); + free(((data_contig_md *)data)->z); + free(((data_contig_md *)data)->px); + free(((data_contig_md *)data)->py); + free(((data_contig_md *)data)->pz); + free(((data_contig_md *)data)->id_1); + free(((data_contig_md *)data)->id_2); + free(((data_contig_md *)data)); + break; + case COMPOUND_CONTIG_1D: + case COMPOUND_CONTIG_2D: + case COMPOUND_COMPOUND_1D: + case COMPOUND_COMPOUND_2D: + free(data); + break; + default: + break; + } +} + +void +set_dspace_plist(hid_t *plist_id_out, int data_collective) +{ + *plist_id_out = H5Pcreate(H5P_DATASET_XFER); + if (data_collective == 1) + H5Pset_dxpl_mpio(*plist_id_out, H5FD_MPIO_COLLECTIVE); + else + H5Pset_dxpl_mpio(*plist_id_out, H5FD_MPIO_INDEPENDENT); +} + +int +set_select_spaces_default(hid_t *filespace_out, hid_t *memspace_out) +{ + hsize_t count[1] = {1}; + *filespace_out = H5Screate_simple(1, (hsize_t *)&TOTAL_PARTICLES, NULL); + *memspace_out = H5Screate_simple(1, (hsize_t *)&NUM_PARTICLES, NULL); + H5Sselect_hyperslab(*filespace_out, H5S_SELECT_SET, (hsize_t *)&FILE_OFFSET, NULL, count, + (hsize_t *)&NUM_PARTICLES); + // printf("TOTAL_PARTICLES = %lld, NUM_PARTICLES = %lld \n", TOTAL_PARTICLES, NUM_PARTICLES); + return 0; +} + +unsigned long +set_select_spaces_strided(bench_params params, hid_t *filespace_out, hid_t *memspace_out) +{ + if (MY_RANK == 0) { + printf("Stride parameters: STRIDE_SIZE = %lu, BLOCK_SIZE = %lu, BLOCK_CNT = %lu\n", params.stride, + params.block_size, params.block_cnt); + } + if ((params.stride + params.block_size) * params.block_cnt > params.dim_1) { + printf("\n\nInvalid hyperslab setting: (STRIDE_SIZE + BLOCK_SIZE) * BLOCK_CNT" + "must be no greater than the number of available particles per rank(%lu).\n\n", + params.chunk_dim_1); + return 0; + } + + unsigned long actual_elem_cnt = params.block_size * params.block_cnt; + + *memspace_out = H5Screate_simple(1, (hsize_t *)&actual_elem_cnt, NULL); + *filespace_out = H5Screate_simple(1, (hsize_t *)&TOTAL_PARTICLES, NULL); + H5Sselect_hyperslab(*filespace_out, H5S_SELECT_SET, (hsize_t *)&FILE_OFFSET, // start-offset + (hsize_t *)¶ms.stride, // stride + (hsize_t *)¶ms.block_cnt, // block cnt + (hsize_t *)¶ms.block_size); // block size + return actual_elem_cnt; +} + +int +set_select_space_2D_array(hid_t *filespace_out, hid_t *memspace_out, unsigned long dim_1, unsigned long dim_2) +{ // dim_1 * dim_2 === NUM_PARTICLES + hsize_t mem_dims[2], file_dims[2]; + mem_dims[0] = (hsize_t)dim_1; + mem_dims[1] = (hsize_t)dim_2; + file_dims[0] = (hsize_t)dim_1 * NUM_RANKS; // total x length: dim_1 * world_size. + file_dims[1] = (hsize_t)dim_2; // always the same dim_2 + + hsize_t count[2] = {1, 1}; + hsize_t file_starts[2], block[2]; // select start point and range in each dimension. + file_starts[0] = dim_1 * (MY_RANK); // file offset for each rank + file_starts[1] = 0; + block[0] = dim_1; + block[1] = dim_2; + + *filespace_out = H5Screate_simple(2, file_dims, NULL); + *memspace_out = H5Screate_simple(2, mem_dims, NULL); + if (MY_RANK == 0) + printf("%lu * %lu 2D array, my x_start = %lu, y_start = %lu, x_blk = %lu, y_blk = %lu\n", dim_1, + dim_2, file_starts[0], file_starts[1], block[0], block[1]); + H5Sselect_hyperslab(*filespace_out, H5S_SELECT_SET, file_starts, NULL, count, block); + return 0; +} + +int +set_select_space_multi_3D_array(hid_t *filespace_out, hid_t *memspace_out, unsigned long dim_1, + unsigned long dim_2, unsigned long dim_3) +{ + hsize_t mem_dims[3]; + hsize_t file_dims[3]; + mem_dims[0] = (hsize_t)dim_1; + mem_dims[1] = (hsize_t)dim_2; + mem_dims[2] = (hsize_t)dim_3; + file_dims[0] = (hsize_t)dim_1 * NUM_RANKS; + file_dims[1] = (hsize_t)dim_2; + file_dims[2] = (hsize_t)dim_3; + + hsize_t count[3] = {1, 1, 1}; + hsize_t file_starts[3], file_range[3]; // select start point and range in each dimension. + file_starts[0] = dim_1 * (MY_RANK); + file_starts[1] = 0; + file_starts[2] = 0; + file_range[0] = dim_1; + file_range[1] = dim_2; + file_range[2] = dim_3; + + *filespace_out = H5Screate_simple(3, file_dims, NULL); + *memspace_out = H5Screate_simple(3, mem_dims, NULL); + + H5Sselect_hyperslab(*filespace_out, H5S_SELECT_SET, file_starts, NULL, count, file_range); + return 0; +} + +/* + * write file: create m-D array as the dateset type, now linear-linear is 8 datasets of 1D array + */ +void +data_write_contig_contig_MD_array(time_step *ts, hid_t loc, hid_t *dset_ids, hid_t filespace, hid_t memspace, + hid_t plist_id, data_contig_md *data_in, unsigned long *metadata_time, + unsigned long *data_time) +{ + assert(data_in && data_in->x); + hid_t dcpl; + if (COMPRESS_INFO.USE_COMPRESS) + dcpl = COMPRESS_INFO.dcpl_id; + else + dcpl = H5P_DEFAULT; + + if (MY_RANK == 0) { + if (COMPRESS_INFO.USE_COMPRESS) + printf("Parallel compressed: chunk_dim1 = %lu, chunk_dim2 = %lu\n", COMPRESS_INFO.chunk_dims[0], + COMPRESS_INFO.chunk_dims[1]); + } + + unsigned t1 = get_time_usec(); + + dset_ids[0] = H5Dcreate_async(loc, "x", H5T_NATIVE_FLOAT, filespace, H5P_DEFAULT, dcpl, H5P_DEFAULT, + ts->es_meta_create); + dset_ids[1] = H5Dcreate_async(loc, "y", H5T_NATIVE_FLOAT, filespace, H5P_DEFAULT, dcpl, H5P_DEFAULT, + ts->es_meta_create); + dset_ids[2] = H5Dcreate_async(loc, "z", H5T_NATIVE_FLOAT, filespace, H5P_DEFAULT, dcpl, H5P_DEFAULT, + ts->es_meta_create); + dset_ids[3] = H5Dcreate_async(loc, "px", H5T_NATIVE_FLOAT, filespace, H5P_DEFAULT, dcpl, H5P_DEFAULT, + ts->es_meta_create); + dset_ids[4] = H5Dcreate_async(loc, "py", H5T_NATIVE_FLOAT, filespace, H5P_DEFAULT, dcpl, H5P_DEFAULT, + ts->es_meta_create); + dset_ids[5] = H5Dcreate_async(loc, "pz", H5T_NATIVE_FLOAT, filespace, H5P_DEFAULT, dcpl, H5P_DEFAULT, + ts->es_meta_create); + dset_ids[6] = H5Dcreate_async(loc, "id_1", H5T_NATIVE_INT, filespace, H5P_DEFAULT, dcpl, H5P_DEFAULT, + ts->es_meta_create); + dset_ids[7] = H5Dcreate_async(loc, "id_2", H5T_NATIVE_FLOAT, filespace, H5P_DEFAULT, dcpl, H5P_DEFAULT, + ts->es_meta_create); + + unsigned t2 = get_time_usec(); + + ierr = + H5Dwrite_async(dset_ids[0], H5T_NATIVE_FLOAT, memspace, filespace, plist_id, data_in->x, ts->es_data); + ierr = + H5Dwrite_async(dset_ids[1], H5T_NATIVE_FLOAT, memspace, filespace, plist_id, data_in->y, ts->es_data); + ierr = + H5Dwrite_async(dset_ids[2], H5T_NATIVE_FLOAT, memspace, filespace, plist_id, data_in->z, ts->es_data); + ierr = H5Dwrite_async(dset_ids[3], H5T_NATIVE_FLOAT, memspace, filespace, plist_id, data_in->px, + ts->es_data); + ierr = H5Dwrite_async(dset_ids[4], H5T_NATIVE_FLOAT, memspace, filespace, plist_id, data_in->py, + ts->es_data); + ierr = H5Dwrite_async(dset_ids[5], H5T_NATIVE_FLOAT, memspace, filespace, plist_id, data_in->pz, + ts->es_data); + ierr = H5Dwrite_async(dset_ids[6], H5T_NATIVE_INT, memspace, filespace, plist_id, data_in->id_1, + ts->es_data); + ierr = H5Dwrite_async(dset_ids[7], H5T_NATIVE_FLOAT, memspace, filespace, plist_id, data_in->id_2, + ts->es_data); + + unsigned t3 = get_time_usec(); + + *metadata_time = t2 - t1; + *data_time = t3 - t2; + + if (MY_RANK == 0) + printf(" %s: Finished writing time step \n", __func__); +} + +void +data_write_contig_to_interleaved(time_step *ts, hid_t loc, hid_t *dset_ids, hid_t filespace, hid_t memspace, + hid_t plist_id, data_contig_md *data_in, unsigned long *metadata_time, + unsigned long *data_time) +{ + assert(data_in && data_in->x); + hid_t dcpl; + if (COMPRESS_INFO.USE_COMPRESS) + dcpl = COMPRESS_INFO.dcpl_id; + else + dcpl = H5P_DEFAULT; + + unsigned t1 = get_time_usec(); + dset_ids[0] = H5Dcreate_async(loc, "particles", PARTICLE_COMPOUND_TYPE, filespace, H5P_DEFAULT, dcpl, + H5P_DEFAULT, ts->es_meta_create); + + unsigned t2 = get_time_usec(); + ierr = H5Dwrite_async(dset_ids[0], PARTICLE_COMPOUND_TYPE_SEPARATES[0], memspace, filespace, plist_id, + data_in->x, ts->es_data); + ierr = H5Dwrite_async(dset_ids[0], PARTICLE_COMPOUND_TYPE_SEPARATES[1], memspace, filespace, plist_id, + data_in->y, ts->es_data); + ierr = H5Dwrite_async(dset_ids[0], PARTICLE_COMPOUND_TYPE_SEPARATES[2], memspace, filespace, plist_id, + data_in->z, ts->es_data); + ierr = H5Dwrite_async(dset_ids[0], PARTICLE_COMPOUND_TYPE_SEPARATES[3], memspace, filespace, plist_id, + data_in->px, ts->es_data); + ierr = H5Dwrite_async(dset_ids[0], PARTICLE_COMPOUND_TYPE_SEPARATES[4], memspace, filespace, plist_id, + data_in->py, ts->es_data); + ierr = H5Dwrite_async(dset_ids[0], PARTICLE_COMPOUND_TYPE_SEPARATES[5], memspace, filespace, plist_id, + data_in->pz, ts->es_data); + ierr = H5Dwrite_async(dset_ids[0], PARTICLE_COMPOUND_TYPE_SEPARATES[6], memspace, filespace, plist_id, + data_in->id_1, ts->es_data); + ierr = H5Dwrite_async(dset_ids[0], PARTICLE_COMPOUND_TYPE_SEPARATES[7], memspace, filespace, plist_id, + data_in->id_2, ts->es_data); + + unsigned t3 = get_time_usec(); + *metadata_time = t2 - t1; + *data_time = t3 - t2; + if (MY_RANK == 0) + printf(" %s: Finished writing time step \n", __func__); +} + +void +data_write_interleaved_to_contig(time_step *ts, hid_t loc, hid_t *dset_ids, hid_t filespace, hid_t memspace, + hid_t plist_id, particle *data_in, unsigned long *metadata_time, + unsigned long *data_time) +{ + assert(data_in); + hid_t dcpl; + if (COMPRESS_INFO.USE_COMPRESS) + dcpl = COMPRESS_INFO.dcpl_id; + else + dcpl = H5P_DEFAULT; + + unsigned t1 = get_time_usec(); + dset_ids[0] = H5Dcreate_async(loc, "x", PARTICLE_COMPOUND_TYPE_SEPARATES[0], filespace, H5P_DEFAULT, dcpl, + H5P_DEFAULT, ts->es_meta_create); + dset_ids[1] = H5Dcreate_async(loc, "y", PARTICLE_COMPOUND_TYPE_SEPARATES[1], filespace, H5P_DEFAULT, dcpl, + H5P_DEFAULT, ts->es_meta_create); + dset_ids[2] = H5Dcreate_async(loc, "z", PARTICLE_COMPOUND_TYPE_SEPARATES[2], filespace, H5P_DEFAULT, dcpl, + H5P_DEFAULT, ts->es_meta_create); + dset_ids[3] = H5Dcreate_async(loc, "px", PARTICLE_COMPOUND_TYPE_SEPARATES[3], filespace, H5P_DEFAULT, + dcpl, H5P_DEFAULT, ts->es_meta_create); + dset_ids[4] = H5Dcreate_async(loc, "py", PARTICLE_COMPOUND_TYPE_SEPARATES[4], filespace, H5P_DEFAULT, + dcpl, H5P_DEFAULT, ts->es_meta_create); + dset_ids[5] = H5Dcreate_async(loc, "pz", PARTICLE_COMPOUND_TYPE_SEPARATES[5], filespace, H5P_DEFAULT, + dcpl, H5P_DEFAULT, ts->es_meta_create); + dset_ids[6] = H5Dcreate_async(loc, "id_1", PARTICLE_COMPOUND_TYPE_SEPARATES[6], filespace, H5P_DEFAULT, + dcpl, H5P_DEFAULT, ts->es_meta_create); + dset_ids[7] = H5Dcreate_async(loc, "id_2", PARTICLE_COMPOUND_TYPE_SEPARATES[7], filespace, H5P_DEFAULT, + dcpl, H5P_DEFAULT, ts->es_meta_create); + + unsigned t2 = get_time_usec(); + + ierr = H5Dwrite_async(dset_ids[0], PARTICLE_COMPOUND_TYPE, memspace, filespace, plist_id, data_in, + ts->es_data); + ierr = H5Dwrite_async(dset_ids[1], PARTICLE_COMPOUND_TYPE, memspace, filespace, plist_id, data_in, + ts->es_data); + ierr = H5Dwrite_async(dset_ids[2], PARTICLE_COMPOUND_TYPE, memspace, filespace, plist_id, data_in, + ts->es_data); + ierr = H5Dwrite_async(dset_ids[3], PARTICLE_COMPOUND_TYPE, memspace, filespace, plist_id, data_in, + ts->es_data); + ierr = H5Dwrite_async(dset_ids[4], PARTICLE_COMPOUND_TYPE, memspace, filespace, plist_id, data_in, + ts->es_data); + ierr = H5Dwrite_async(dset_ids[5], PARTICLE_COMPOUND_TYPE, memspace, filespace, plist_id, data_in, + ts->es_data); + ierr = H5Dwrite_async(dset_ids[6], PARTICLE_COMPOUND_TYPE, memspace, filespace, plist_id, data_in, + ts->es_data); + ierr = H5Dwrite_async(dset_ids[7], PARTICLE_COMPOUND_TYPE, memspace, filespace, plist_id, data_in, + ts->es_data); + + unsigned t3 = get_time_usec(); + *metadata_time = t2 - t1; + *data_time = t3 - t2; + if (MY_RANK == 0) + printf(" %s: Finished writing time step \n", __func__); +} + +void +data_write_interleaved_to_interleaved(time_step *ts, hid_t loc, hid_t *dset_ids, hid_t filespace, + hid_t memspace, hid_t plist_id, particle *data_in, + unsigned long *metadata_time, unsigned long *data_time) +{ + assert(data_in); + hid_t dcpl; + if (COMPRESS_INFO.USE_COMPRESS) + dcpl = COMPRESS_INFO.dcpl_id; + else + dcpl = H5P_DEFAULT; + unsigned t1 = get_time_usec(); + dset_ids[0] = H5Dcreate_async(loc, "particles", PARTICLE_COMPOUND_TYPE, filespace, H5P_DEFAULT, dcpl, + H5P_DEFAULT, ts->es_meta_create); + + unsigned t2 = get_time_usec(); + ierr = H5Dwrite_async(dset_ids[0], PARTICLE_COMPOUND_TYPE, memspace, filespace, plist_id, data_in, + ts->es_data); + + // should write all things in data_in + unsigned t3 = get_time_usec(); + *metadata_time = t2 - t1; + *data_time = t3 - t2; + if (MY_RANK == 0) + printf(" %s: Finished writing time step \n", __func__); +} + +void * +_prepare_data(bench_params params, hid_t *filespace_out, hid_t *memspace_out, + unsigned long *data_preparation_time, unsigned long *data_size) +{ + void *data = NULL; + + make_compound_type_separates(); + make_compound_type(); + hid_t filespace, memspace; + *data_preparation_time = 0; + + // unsigned long data_size; + unsigned long long particle_cnt = NUM_PARTICLES; + unsigned long actual_elem_cnt = 0; // only for set_select_spaces_strided() + int dset_cnt = 0; + unsigned long t_prep_start = get_time_usec(); + switch (params.access_pattern.pattern_write) { + case CONTIG_CONTIG_1D: + set_select_spaces_default(filespace_out, memspace_out); + data = (void *)prepare_data_contig_1D(particle_cnt, data_size); + dset_cnt = 8; + break; + + case CONTIG_CONTIG_2D: + set_select_space_2D_array(filespace_out, memspace_out, params.dim_1, params.dim_2); + data = (void *)prepare_data_contig_2D(particle_cnt, params.dim_1, params.dim_2, data_size); + dset_cnt = 8; + break; + + case CONTIG_CONTIG_STRIDED_1D: + actual_elem_cnt = set_select_spaces_strided(params, filespace_out, memspace_out); + if (actual_elem_cnt < 1) { + printf("Strided write setting error.\n"); + return NULL; + } + data = (void *)prepare_data_contig_1D(actual_elem_cnt, data_size); + dset_cnt = 8; + break; + + case CONTIG_COMPOUND_1D: + set_select_spaces_default(filespace_out, memspace_out); + data = (void *)prepare_data_contig_1D(particle_cnt, data_size); + dset_cnt = 1; + break; + + case CONTIG_COMPOUND_2D: + set_select_space_2D_array(filespace_out, memspace_out, params.dim_1, params.dim_2); + data = (void *)prepare_data_contig_2D(particle_cnt, params.dim_1, params.dim_2, data_size); + dset_cnt = 1; + break; + + case COMPOUND_CONTIG_1D: + set_select_spaces_default(filespace_out, memspace_out); + data = (void *)prepare_data_interleaved(particle_cnt, data_size); + dset_cnt = 8; + break; + + case COMPOUND_CONTIG_2D: + set_select_space_2D_array(filespace_out, memspace_out, params.dim_1, params.dim_2); + data = (void *)prepare_data_interleaved(particle_cnt, data_size); + dset_cnt = 8; + break; + + case COMPOUND_COMPOUND_1D: + set_select_spaces_default(filespace_out, memspace_out); + data = (void *)prepare_data_interleaved(particle_cnt, data_size); + dset_cnt = 1; + break; + + case COMPOUND_COMPOUND_2D: + set_select_space_2D_array(filespace_out, memspace_out, params.dim_1, params.dim_2); + data = (void *)prepare_data_interleaved(particle_cnt, data_size); + dset_cnt = 1; + break; + + case CONTIG_CONTIG_3D: + set_select_space_multi_3D_array(filespace_out, memspace_out, params.dim_1, params.dim_2, + params.dim_3); + data = (void *)prepare_data_contig_3D(particle_cnt, params.dim_1, params.dim_2, params.dim_3, + data_size); + dset_cnt = 8; + break; + default: + assert(0 && "this mode is not yet implemented"); + break; + } + *data_preparation_time = get_time_usec() - t_prep_start; + return data; +} + +int +_run_benchmark_write(bench_params params, hid_t file_id, hid_t fapl, hid_t filespace, hid_t memspace, + void *data, unsigned long data_size, unsigned long *total_data_size_out, + unsigned long *data_time_total, unsigned long *metadata_time_total) +{ + unsigned long long data_preparation_time; + + write_pattern pattern = params.access_pattern.pattern_write; + int timestep_cnt = params.cnt_time_step; + *metadata_time_total = 0; + *data_time_total = 0; + char grp_name[128]; + int grp_cnt = 0, dset_cnt = 0; + hid_t plist_id; //, filespace, memspace; + + if (params.file_per_proc) { + plist_id = H5Pcreate(H5P_DATASET_XFER); + } + else { + set_dspace_plist(&plist_id, params.data_coll); + } + + if (!data) { + if (MY_RANK == 0) + printf("Failed to generate data for writing, " + "please check dimension settings in the config file.\n"); + return -1; + } + + MEM_MONITOR = mem_monitor_new(timestep_cnt, ASYNC_MODE, data_size, params.io_mem_limit); + + if (!MEM_MONITOR) { + printf("Invalid MEM_MONITOR returned: NULL\n"); + return -1; + } + + timestep_es_id_set(); + + unsigned long metadata_time_exp = 0, data_time_exp = 0, t0, t1, t2, t3, t4; + unsigned long metadata_time_imp = 0, data_time_imp = 0; + unsigned long meta_time1 = 0, meta_time2 = 0, meta_time3 = 0, meta_time4 = 0, meta_time5 = 0; + for (int ts_index = 0; ts_index < timestep_cnt; ts_index++) { + meta_time1 = 0, meta_time2 = 0, meta_time3 = 0, meta_time4 = 0, meta_time5 = 0; + time_step *ts = &(MEM_MONITOR->time_steps[ts_index]); + MEM_MONITOR->mem_used += ts->mem_size; + // print_mem_bound(MEM_MONITOR); + sprintf(grp_name, "Timestep_%d", ts_index); + assert(ts); + + if (params.cnt_time_step_delay > 0) { + if (ts_index > params.cnt_time_step_delay - 1) // delayed close on all ids of the previous ts + ts_delayed_close(MEM_MONITOR, &meta_time1, dset_cnt); + } + + mem_monitor_check_run(MEM_MONITOR, &meta_time2, &data_time_imp); + + t0 = get_time_usec(); + ts->grp_id = + H5Gcreate_async(file_id, grp_name, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT, ts->es_meta_create); + + t1 = get_time_usec(); + meta_time3 = (t1 - t0); + + if (MY_RANK == 0) + printf("Writing %s ... \n", grp_name); + + switch (pattern) { + case CONTIG_CONTIG_1D: + case CONTIG_CONTIG_2D: + case CONTIG_CONTIG_3D: + case CONTIG_CONTIG_STRIDED_1D: + data_write_contig_contig_MD_array(ts, ts->grp_id, ts->dset_ids, filespace, memspace, plist_id, + (data_contig_md *)data, &meta_time4, &data_time_exp); + dset_cnt = 8; + break; + + case CONTIG_COMPOUND_1D: + case CONTIG_COMPOUND_2D: + data_write_contig_to_interleaved(ts, ts->grp_id, ts->dset_ids, filespace, memspace, plist_id, + (data_contig_md *)data, &meta_time4, &data_time_exp); + dset_cnt = 1; + break; + + case COMPOUND_CONTIG_1D: + case COMPOUND_CONTIG_2D: + data_write_interleaved_to_contig(ts, ts->grp_id, ts->dset_ids, filespace, memspace, plist_id, + (particle *)data, &meta_time4, &data_time_exp); + dset_cnt = 8; + break; + + case COMPOUND_COMPOUND_1D: + case COMPOUND_COMPOUND_2D: + data_write_interleaved_to_interleaved(ts, ts->grp_id, ts->dset_ids, filespace, memspace, + plist_id, (particle *)data, &meta_time4, + &data_time_exp); + dset_cnt = 1; + break; + + default: + break; + } + + ts->status = TS_DELAY; + + if (params.cnt_time_step_delay == 0) { + t3 = get_time_usec(); + + for (int j = 0; j < dset_cnt; j++) { + if (ts->dset_ids[j] != 0) { + H5Dclose_async(ts->dset_ids[j], ts->es_meta_close); + } + } + H5Gclose_async(ts->grp_id, ts->es_meta_close); + + ts->status = TS_READY; + t4 = get_time_usec(); + meta_time5 += (t4 - t3); + } + + if (ts_index != timestep_cnt - 1) { // no sleep after the last ts + if (params.compute_time.time_num >= 0) { + if (MY_RANK == 0) + printf("Computing...\n"); + async_sleep(ts->es_data, params.compute_time); + } + } + + *metadata_time_total += (meta_time1 + meta_time2 + meta_time3 + meta_time4); + *data_time_total += (data_time_exp + data_time_imp); + } // end for timestep_cnt + + // all done, check if any timesteps undone + + mem_monitor_final_run(MEM_MONITOR, &metadata_time_imp, &data_time_imp); + + *metadata_time_total += metadata_time_imp; + *data_time_total += data_time_imp; + + H5Tclose(PARTICLE_COMPOUND_TYPE); + for (int i = 0; i < 8; i++) + H5Tclose(PARTICLE_COMPOUND_TYPE_SEPARATES[i]); + + *total_data_size_out = timestep_cnt * data_size; + + data_free(pattern, data); + H5Sclose(memspace); + H5Sclose(filespace); + H5Pclose(plist_id); + return 0; +} + +void +set_globals(const bench_params *params) +{ + NUM_PARTICLES = params->num_particles; + NUM_TIMESTEPS = params->cnt_time_step; + // Following variables only used to generate data + X_DIM = X_RAND; + Y_DIM = Y_RAND; + Z_DIM = Z_RAND; + COMPRESS_INFO.USE_COMPRESS = params->useCompress; + COMPRESS_INFO.chunk_dims[0] = params->chunk_dim_1; + COMPRESS_INFO.chunk_dims[1] = params->chunk_dim_2; + COMPRESS_INFO.chunk_dims[2] = params->chunk_dim_3; + + if (COMPRESS_INFO.USE_COMPRESS) { // set DCPL + herr_t ret; + COMPRESS_INFO.dcpl_id = H5Pcreate(H5P_DATASET_CREATE); + assert(COMPRESS_INFO.dcpl_id > 0); + + /* Set chunked layout and chunk dimensions */ + ret = H5Pset_layout(COMPRESS_INFO.dcpl_id, H5D_CHUNKED); + assert(ret >= 0); + ret = + H5Pset_chunk(COMPRESS_INFO.dcpl_id, params->num_dims, (const hsize_t *)COMPRESS_INFO.chunk_dims); + assert(ret >= 0); + ret = H5Pset_deflate(COMPRESS_INFO.dcpl_id, 9); + assert(ret >= 0); + } + + ASYNC_MODE = params->asyncMode; +} + +hid_t +set_fapl() +{ + hid_t fapl = H5Pcreate(H5P_FILE_ACCESS); + return fapl; +} + +hid_t +set_metadata(hid_t fapl, int align, unsigned long threshold, unsigned long alignment_len, int meta_collective) +{ + hsize_t threshold_o, alignment_len_o; + herr_t ret; + if (align == 1) { + H5Pset_alignment(fapl, threshold, alignment_len); + + ret = H5Pget_alignment(fapl, &threshold_o, &alignment_len_o); + if (ret < 0) + if (MY_RANK == 0) + printf("H5Pget_alignment failed \n"); + + if (MY_RANK == 0) { + printf("GPFS alignment settings: ON\n"); + printf("Value of alignment length : %ld\n", alignment_len_o); + printf("Value of alignment threshold : %ld\n", threshold_o); + } + } + if (meta_collective == 1) { + if (MY_RANK == 0) + printf("Collective Metadata operations: ON\n"); +#if H5_VERSION_GE(1, 10, 0) + H5Pset_all_coll_metadata_ops(fapl, 1); + H5Pset_coll_metadata_write(fapl, 1); +#endif + } + else { + if (MY_RANK == 0) + printf("Collective Metadata operations: OFF\n"); + } + + // Defer metadata flush + if (DEFER_METADATA_FLUSH) { + H5AC_cache_config_t cache_config; + cache_config.version = H5AC__CURR_CACHE_CONFIG_VERSION; + H5Pget_mdc_config(fapl, &cache_config); + cache_config.set_initial_size = 1; + cache_config.initial_size = 16 * M_VAL; + cache_config.evictions_enabled = 0; + cache_config.incr_mode = H5C_incr__off; + cache_config.flash_incr_mode = H5C_flash_incr__off; + cache_config.decr_mode = H5C_decr__off; + H5Pset_mdc_config(fapl, &cache_config); + } + return fapl; +} + +void +print_usage(char *name) +{ + if (MY_RANK == 0) { + printf("=============== Usage: %s /path_to_config_file /path_to_output_data_file [CSV " + "csv_file_path]=============== \n", + name); + printf("- CSV is optional.\n"); + printf("- Only CC/CI/IC/II/CC2D/CC3D is used to set benchmark mode in the config file, stands for " + "CONTIG_CONTIG_1D, CONTIG_COMPOUND_1D, COMPOUND_CONTIG_1D, COMPOUND_COMPOUND_1D, 2D Array and " + "3D Array\n"); + printf("- For 2D/3D benchmarks, make sure the dimensions are set correctly and matches the per rank " + "particle number.\n"); + printf("- For example, when your PATTERN is CC3D, and PARTICLE_CNT_M is 1, setting DIM_1~3 to 64, " + "64, and 256 is valid, because 64*64*256 = 1,048,576 (1M); and 10*20*30 is invalid. \n"); + } +} + +int +main(int argc, char *argv[]) +{ + int mpi_thread_lvl_provided = -1; + MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &mpi_thread_lvl_provided); + assert(MPI_THREAD_MULTIPLE == mpi_thread_lvl_provided); + MPI_Comm_rank(MPI_COMM_WORLD, &MY_RANK); + MPI_Comm_size(MPI_COMM_WORLD, &NUM_RANKS); + MPI_Comm comm = MPI_COMM_WORLD; + MPI_Info info = MPI_INFO_NULL; + char * num_str = "1024 Ks"; + unsigned long long num = 0; + + char buffer[200]; + + int rand_seed_value = time(NULL); + srand(rand_seed_value); + + if (MY_RANK == 0) { + if (argc != 3) { + print_usage(argv[0]); + return 0; + } + } + + char * output_file; + bench_params params; + + char *cfg_file_path = argv[1]; + output_file = argv[2]; + if (MY_RANK == 0) { + printf("Configuration file: %s\n", argv[1]); + printf("Output data file: %s\n", argv[2]); + } + int do_write = 1; + if (read_config(cfg_file_path, ¶ms, do_write) < 0) { + if (MY_RANK == 0) + printf("Configuration file read failed. Please, check %s\n", cfg_file_path); + return 0; + } + + if (params.io_op != IO_WRITE) { + if (MY_RANK == 0) + printf("Make sure the configuration file has IO_OPERATION=WRITE defined\n"); + return 0; + } + + if (params.useCompress) + params.data_coll = 1; + + if (params.subfiling) + subfiling = 1; + +#if H5_VERSION_GE(1, 13, 0) + if (H5VLis_connector_registered_by_name("async")) { + if (MY_RANK == 0) { + printf("Using 'async' VOL connector\n"); + } + } +#endif + + if (MY_RANK == 0) { + print_params(¶ms); + } + + set_globals(¶ms); + + NUM_TIMESTEPS = params.cnt_time_step; + + STDEV_DIM_1 = params.stdev_dim_1; + + holder = (long long *)malloc(NUM_RANKS * sizeof(long long)); + + if (MY_RANK == 0) { + printf("Start benchmark: h5bench_write\n"); + printf("Given mean particles : %llu \n", NUM_PARTICLES); + printf("Given standard deviation : %ld \n", STDEV_DIM_1); + printf("Total ranks %i \n", NUM_RANKS); + + for (int i = 0; i < NUM_RANKS; i++) { + holder[i] = (long long)normal_dist_particle_giver(NUM_PARTICLES, STDEV_DIM_1); + } + } + MPI_Barrier(MPI_COMM_WORLD); + + MPI_Scatter(&holder[0], 1, MPI_LONG_LONG, &NUM_PARTICLES, 1, MPI_LONG_LONG, 0, MPI_COMM_WORLD); + printf("\n Number of particles in rank %d = %lld \n", MY_RANK, NUM_PARTICLES); + + unsigned long total_write_size = + NUM_RANKS * NUM_TIMESTEPS * NUM_PARTICLES * (7 * sizeof(float) + sizeof(int)); + hid_t filespace = 0, memspace = 0; + unsigned long data_size = 0; + unsigned long data_preparation_time = 0; + + MPI_Barrier(MPI_COMM_WORLD); + + MPI_Allreduce(&NUM_PARTICLES, &TOTAL_PARTICLES, 1, MPI_LONG_LONG, MPI_SUM, comm); + MPI_Scan(&NUM_PARTICLES, &FILE_OFFSET, 1, MPI_LONG_LONG, MPI_SUM, comm); + + FILE_OFFSET -= NUM_PARTICLES; + + if (MY_RANK == 0) + printf("Total number of particles: %lldM\n", TOTAL_PARTICLES / (M_VAL)); + + hid_t fapl = set_fapl(); + ALIGN = params.align; + ALIGN_THRESHOLD = params.align_threshold; + ALIGN_LEN = params.align_len; + + if (params.file_per_proc) { + } + else { +#ifdef HAVE_SUBFILING + if (params.subfiling == 1) + H5Pset_fapl_subfiling(fapl, NULL); + else +#endif + H5Pset_fapl_mpio(fapl, comm, info); + set_metadata(fapl, ALIGN, ALIGN_THRESHOLD, ALIGN_LEN, params.meta_coll); + } + + void *data = _prepare_data(params, &filespace, &memspace, &data_preparation_time, &data_size); + + unsigned long t1 = get_time_usec(); + + hid_t file_id; + + unsigned long tfopen_start = get_time_usec(); + if (params.file_per_proc) { + char mpi_rank_output_file_path[4096]; + sprintf(mpi_rank_output_file_path, "%s/rank_%d_%s", get_dir_from_path(output_file), MY_RANK, + get_file_name_from_path(output_file)); + + file_id = H5Fcreate_async(mpi_rank_output_file_path, H5F_ACC_TRUNC, H5P_DEFAULT, fapl, 0); + } + else { + file_id = H5Fcreate_async(output_file, H5F_ACC_TRUNC, H5P_DEFAULT, fapl, 0); + } + unsigned long tfopen_end = get_time_usec(); + + if (MY_RANK == 0) + printf("Opened HDF5 file... \n"); + + MPI_Barrier(MPI_COMM_WORLD); + unsigned long t2 = get_time_usec(); // t2 - t1: metadata: creating/opening + + unsigned long raw_write_time, inner_metadata_time, local_data_size; + int stat = _run_benchmark_write(params, file_id, fapl, filespace, memspace, data, data_size, + &local_data_size, &raw_write_time, &inner_metadata_time); + + if (stat < 0) { + if (MY_RANK == 0) + printf("\n==================== Benchmark Failed ====================\n"); + assert(0); + } + + unsigned long t3 = get_time_usec(); // t3 - t2: writting data, including metadata + + H5Pclose(fapl); + unsigned long tflush_start = get_time_usec(); + H5Fflush(file_id, H5F_SCOPE_LOCAL); + MPI_Barrier(MPI_COMM_WORLD); + unsigned long tflush_end = get_time_usec(); + + unsigned long tfclose_start = get_time_usec(); + + H5Fclose_async(file_id, 0); + + unsigned long tfclose_end = get_time_usec(); + MPI_Barrier(MPI_COMM_WORLD); + unsigned long t4 = get_time_usec(); + + long final_mean = 0, final_std = 0, std_1 = 0; + final_mean = TOTAL_PARTICLES / NUM_RANKS; + std_1 = pow(NUM_PARTICLES - final_mean, 2); + MPI_Allreduce(&std_1, &final_std, 1, MPI_LONG_LONG, MPI_SUM, comm); + + if (MY_RANK == 0) { + human_readable value; + char * mode_str = NULL; + + if (has_vol_async) { + mode_str = "ASYNC"; + } + else { + mode_str = "SYNC"; + } + printf("\n=================== Performance Results ==================\n"); + + printf("Total number of ranks: %d\n", NUM_RANKS); + printf("Total number of particles: %lldM\n", TOTAL_PARTICLES / (M_VAL)); + printf("Final mean particles: %ld \n", final_mean); + printf("Final standard deviation: %f \n", sqrt(final_std / NUM_RANKS)); + unsigned long long total_sleep_time_us = + read_time_val(params.compute_time, TIME_US) * (params.cnt_time_step - 1); + printf("Total emulated compute time: %.3lf s\n", total_sleep_time_us / (1000.0 * 1000.0)); + + double total_size_bytes = NUM_RANKS * local_data_size; + value = format_human_readable(total_size_bytes); + printf("Total write size: %.3lf %cB\n", value.value, value.unit); + + float rwt_s = (float)raw_write_time / (1000.0 * 1000.0); + float raw_rate = (float)total_size_bytes / rwt_s; + printf("Raw write time: %.3f s\n", rwt_s); + + float meta_time_s = (float)inner_metadata_time / (1000.0 * 1000.0); + printf("Metadata time: %.3f s\n", meta_time_s); + + float fcreate_time_s = (float)(tfopen_end - tfopen_start) / (1000.0 * 1000.0); + printf("H5Fcreate() time: %.3f s\n", fcreate_time_s); + + float flush_time_s = (float)(tflush_end - tflush_start) / (1000.0 * 1000.0); + printf("H5Fflush() time: %.3f s\n", flush_time_s); + + float fclose_time_s = (float)(tfclose_end - tfclose_start) / (1000.0 * 1000.0); + printf("H5Fclose() time: %.3f s\n", fclose_time_s); + + float oct_s = (float)(t4 - t1) / (1000.0 * 1000.0); + printf("Observed completion time: %.3f s\n", oct_s); + + value = format_human_readable(raw_rate); + printf("%s Raw write rate: %.3f %cB/s \n", mode_str, value.value, value.unit); + + float or_bs = (float)total_size_bytes / ((float)(t4 - t1 - total_sleep_time_us) / (1000.0 * 1000.0)); + value = format_human_readable(or_bs); + printf("%s Observed write rate: %.3f %cB/s\n", mode_str, value.value, value.unit); + + printf("===========================================================\n"); + + if (params.useCSV) { + fprintf(params.csv_fs, "metric, value, unit\n"); + fprintf(params.csv_fs, "operation, %s, %s\n", "write_var_normal_dist", ""); + fprintf(params.csv_fs, "ranks, %d, %s\n", NUM_RANKS, ""); + fprintf(params.csv_fs, "Total number of particles, %lldM, %s\n", TOTAL_PARTICLES / (M_VAL), ""); + fprintf(params.csv_fs, "Final mean particles, %ld, %s\n", final_mean, ""); + fprintf(params.csv_fs, "Final standard deviation, %f, %s\n", sqrt(final_std / NUM_RANKS), ""); + fprintf(params.csv_fs, "collective data, %s, %s\n", params.data_coll == 1 ? "YES" : "NO", ""); + fprintf(params.csv_fs, "collective meta, %s, %s\n", params.meta_coll == 1 ? "YES" : "NO", ""); + fprintf(params.csv_fs, "subfiling, %s, %s\n", params.subfiling == 1 ? "YES" : "NO", ""); + fprintf(params.csv_fs, "total compute time, %.3lf, %s\n", total_sleep_time_us / (1000.0 * 1000.0), + "seconds"); + value = format_human_readable(total_size_bytes); + fprintf(params.csv_fs, "total size, %.3lf, %cB\n", value.value, value.unit); + fprintf(params.csv_fs, "raw time, %.3f, %s\n", rwt_s, "seconds"); + value = format_human_readable(raw_rate); + fprintf(params.csv_fs, "raw rate, %.3lf, %cB/s\n", value.value, value.unit); + fprintf(params.csv_fs, "metadata time, %.3f, %s\n", meta_time_s, "seconds"); + value = format_human_readable(or_bs); + fprintf(params.csv_fs, "observed rate, %.3f, %cB/s\n", value.value, value.unit); + fprintf(params.csv_fs, "observed time, %.3f, %s\n", oct_s, "seconds"); + fclose(params.csv_fs); + } + } + + MPI_Finalize(); + return 0; +} From bddc4c9d04978c09931fa932c354d9e5bf8b205b Mon Sep 17 00:00:00 2001 From: John-Paul Robinson Date: Wed, 10 Apr 2024 00:36:24 -0500 Subject: [PATCH 02/11] Add write_var_data_dist benchmark support to h5bench.py Created a new benchmark test name and added selection support into h5bench.py test wrapper. --- src/h5bench.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/h5bench.py b/src/h5bench.py index b50b7850..e8efb372 100755 --- a/src/h5bench.py +++ b/src/h5bench.py @@ -23,6 +23,7 @@ class H5bench: H5BENCH_PATTERNS_WRITE = 'h5bench_write' H5BENCH_PATTERNS_WRITE_UNLIMITED = 'h5bench_write_unlimited' H5BENCH_PATTERNS_WRITE_VAR_NORMAL_DIST = 'h5bench_write_var_normal_dist' + H5BENCH_PATTERNS_WRITE_VAR_DATA_DIST = 'h5bench_write_var_data_dist' H5BENCH_PATTERNS_APPEND = 'h5bench_append' H5BENCH_PATTERNS_OVERWRITE = 'h5bench_overwrite' H5BENCH_PATTERNS_READ = 'h5bench_read' @@ -231,7 +232,7 @@ def run(self): self.prepare_parallel(setup['mpi']) - if name in ['write', 'write-unlimited', 'overwrite', 'append', 'read', 'write_var_normal_dist']: + if name in ['write', 'write-unlimited', 'overwrite', 'append', 'read', 'write_var_normal_dist', 'write_var_data_dist']: self.run_pattern(id, name, benchmark, setup['vol']) elif name == 'exerciser': self.run_exerciser(id, benchmark) @@ -389,6 +390,9 @@ def run_pattern(self, id, operation, setup, vol): if operation == 'write_var_normal_dist': benchmark_path = self.H5BENCH_PATTERNS_WRITE_VAR_NORMAL_DIST + if operation == 'write_var_data_dist': + benchmark_path = self.H5BENCH_PATTERNS_WRITE_VAR_DATA_DIST + if operation == 'overwrite': benchmark_path = self.H5BENCH_PATTERNS_OVERWRITE From 776cc517b7497af36ef4cd95c60c4c49aae5b458 Mon Sep 17 00:00:00 2001 From: John-Paul Robinson Date: Wed, 10 Apr 2024 00:38:56 -0500 Subject: [PATCH 03/11] Add DATA_DIST_PATH support to params struct and config file Added parsing of DATA_DIST_PATH into params struct to record the data distrbution based on an input file. --- commons/h5bench_util.c | 6 +++++- commons/h5bench_util.h | 2 ++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/commons/h5bench_util.c b/commons/h5bench_util.c index 5f898c86..906a3a0d 100644 --- a/commons/h5bench_util.c +++ b/commons/h5bench_util.c @@ -978,7 +978,11 @@ _set_params(char *key, char *val_in, bench_params *params_in_out, int do_write) } else if (strcmp(key, "CSV_FILE") == 0) { (*params_in_out).useCSV = 1; - (*params_in_out).csv_path = strdup(val); + (*params_in_out).csv_path = strdup(val);\ + } + else if (strcmp(key, "DATA_DIST_PATH") == 0) { + (*params_in_out).useDataDist = 1; + (*params_in_out).data_dist_path = strdup(val); } else if (strcmp(key, "ENV_METADATA_FILE") == 0) { (*params_in_out).env_meta_path = strdup(val); diff --git a/commons/h5bench_util.h b/commons/h5bench_util.h index 5fc8adb4..0400572b 100644 --- a/commons/h5bench_util.h +++ b/commons/h5bench_util.h @@ -107,6 +107,7 @@ typedef struct bench_params { read_option read_option; int useCompress; int useCSV; + int useDataDist; async_mode asyncMode; int subfiling; union access_pattern { @@ -143,6 +144,7 @@ typedef struct bench_params { char * csv_path; char * env_meta_path; FILE * csv_fs; + char * data_dist_path; int file_per_proc; int align; unsigned long align_threshold; From 2088710d6d8b2c24d6f70248bb49438b3fadd6e6 Mon Sep 17 00:00:00 2001 From: John-Paul Robinson Date: Wed, 10 Apr 2024 00:40:47 -0500 Subject: [PATCH 04/11] Add data dist file parsing to populate memory allocation pattern Takes the provided DATA_DIST_PATH file and reads the data sizes per rank from the file feeding into holder array. --- h5bench_patterns/h5bench_write_data_dist.c | 38 ++++++++++++++++++++-- 1 file changed, 35 insertions(+), 3 deletions(-) diff --git a/h5bench_patterns/h5bench_write_data_dist.c b/h5bench_patterns/h5bench_write_data_dist.c index 46deeb59..8fe8f8be 100644 --- a/h5bench_patterns/h5bench_write_data_dist.c +++ b/h5bench_patterns/h5bench_write_data_dist.c @@ -1088,10 +1088,42 @@ main(int argc, char *argv[]) printf("Given standard deviation : %ld \n", STDEV_DIM_1); printf("Total ranks %i \n", NUM_RANKS); - for (int i = 0; i < NUM_RANKS; i++) { - holder[i] = (long long)normal_dist_particle_giver(NUM_PARTICLES, STDEV_DIM_1); - } + + if (params.useDataDist) { + // read data file listed in config file + char size_line[256] = ""; + + printf("Begin data dist processing\n"); + printf("Read data file %s\n", params.data_dist_path); + + FILE *file = fopen(params.data_dist_path, "r"); + + while (fgets(size_line, 256, file)) { + + printf("Read line: %s\n", size_line); + char *tokens[2]; + int index; + long long size; + char *tok = strtok(size_line, " "); + if (tok) { + index = atoi(tok); + tok = strtok(NULL, " "); + if (tok) { + holder[index] = strtoll(tok, NULL, 10); + } + } else { + return -1; + } + } + } + + else { + for (int i = 0; i < NUM_RANKS; i++) { + holder[i] = (long long)normal_dist_particle_giver(NUM_PARTICLES, STDEV_DIM_1); + } + } } + MPI_Barrier(MPI_COMM_WORLD); MPI_Scatter(&holder[0], 1, MPI_LONG_LONG, &NUM_PARTICLES, 1, MPI_LONG_LONG, 0, MPI_COMM_WORLD); From d8991165d8fbfab0979101106d1691986d3c57dd Mon Sep 17 00:00:00 2001 From: John-Paul Robinson Date: Wed, 10 Apr 2024 00:43:25 -0500 Subject: [PATCH 05/11] Add build and install targets for new data distribution test Create make targets and add file to install list. --- CMakeLists.txt | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index cf22a019..b936635a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -73,6 +73,7 @@ message(STATUS "Using HDF5 version: ${HDF5_VERSION}") include_directories(${HDF5_HOME}/include) link_directories(${HDF5_HOME}/lib) +link_directories(/usr/lib/x86_64-linux-gnu/hdf5/openmpi) # Check if HDF5 has subfiling VFD set(HAVE_subfiling 0) @@ -128,6 +129,14 @@ set(h5bench_write_normal_dist_src h5bench_patterns/h5bench_write_normal_dist.c) add_executable(h5bench_write_var_normal_dist ${h5bench_write_normal_dist_src}) target_link_libraries(h5bench_write_var_normal_dist h5bench_util hdf5 z m ${CMAKE_DL_LIBS} MPI::MPI_C) +# h5bench WRITE varying particle based on data distribution ###################### +# + +set(h5bench_write_data_dist_src h5bench_patterns/h5bench_write_data_dist.c) + +add_executable(h5bench_write_data_dist ${h5bench_write_data_dist_src}) +target_link_libraries(h5bench_write_data_dist h5bench_util hdf5 z m ${CMAKE_DL_LIBS} MPI::MPI_C) + # h5bench WRITE ############################################################### # @@ -356,6 +365,7 @@ install( h5bench_write h5bench_write_unlimited h5bench_write_var_normal_dist + h5bench_write_data_dist h5bench_overwrite h5bench_append h5bench_read From e05120350f11d6575230bcd73214bf55f16a5e08 Mon Sep 17 00:00:00 2001 From: John-Paul Robinson Date: Sun, 14 Apr 2024 12:46:41 -0500 Subject: [PATCH 06/11] Update per process accounting Remove rank 0 output summary limit so all processes report performance rather than just accepting rank 0 results. This is especially important for non-uniform data distribution but should be considered for uniform since no summary stats are computed. Recommend --tag-output to track per-rank stdout. Remove naive total size compute and limit it to a per rank size value. --- h5bench_patterns/h5bench_write_data_dist.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/h5bench_patterns/h5bench_write_data_dist.c b/h5bench_patterns/h5bench_write_data_dist.c index 8fe8f8be..ffa8555d 100644 --- a/h5bench_patterns/h5bench_write_data_dist.c +++ b/h5bench_patterns/h5bench_write_data_dist.c @@ -1218,7 +1218,7 @@ main(int argc, char *argv[]) std_1 = pow(NUM_PARTICLES - final_mean, 2); MPI_Allreduce(&std_1, &final_std, 1, MPI_LONG_LONG, MPI_SUM, comm); - if (MY_RANK == 0) { + //if (MY_RANK == 0) { human_readable value; char * mode_str = NULL; @@ -1238,7 +1238,8 @@ main(int argc, char *argv[]) read_time_val(params.compute_time, TIME_US) * (params.cnt_time_step - 1); printf("Total emulated compute time: %.3lf s\n", total_sleep_time_us / (1000.0 * 1000.0)); - double total_size_bytes = NUM_RANKS * local_data_size; + //double total_size_bytes = NUM_RANKS * local_data_size; + double total_size_bytes = local_data_size; value = format_human_readable(total_size_bytes); printf("Total write size: %.3lf %cB\n", value.value, value.unit); @@ -1293,7 +1294,7 @@ main(int argc, char *argv[]) fprintf(params.csv_fs, "observed time, %.3f, %s\n", oct_s, "seconds"); fclose(params.csv_fs); } - } + //} MPI_Finalize(); return 0; From 9cf8232076ba061376fb8e54cba65c571d03f7fe Mon Sep 17 00:00:00 2001 From: John-Paul Robinson Date: Tue, 30 Apr 2024 02:55:30 -0500 Subject: [PATCH 07/11] Add scaling factor for data distribution particle counts Add scaling paramter to tests to scale particle count in order to create memory footprints that more accurately reflect a data distribution. Data distribution inputs are in particle counts. Particles are 32-byte structures so a data distribution measured in bytes needs to be scaled down so the particals instiated match the actual data footprint, in multiples of 32-byte particles. --- commons/h5bench_util.c | 12 ++++++++++++ commons/h5bench_util.h | 1 + h5bench_patterns/h5bench_write_data_dist.c | 6 +++++- 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/commons/h5bench_util.c b/commons/h5bench_util.c index 906a3a0d..0141c2ec 100644 --- a/commons/h5bench_util.c +++ b/commons/h5bench_util.c @@ -984,6 +984,15 @@ _set_params(char *key, char *val_in, bench_params *params_in_out, int do_write) (*params_in_out).useDataDist = 1; (*params_in_out).data_dist_path = strdup(val); } + else if (strcmp(key, "DATA_DIST_SCALE") == 0) { + float num = 0.0; + char *tok; + tok = strtok(val, "/"); + num = strtof(tok, NULL); + if (tok = strtok(NULL, "/")) + num = num / strtof(tok, NULL); // two terms with / delim is fraction + (*params_in_out).data_dist_scale = num; + } else if (strcmp(key, "ENV_METADATA_FILE") == 0) { (*params_in_out).env_meta_path = strdup(val); } @@ -1070,6 +1079,7 @@ bench_params_init(bench_params *params_out) (*params_out).data_coll = 0; (*params_out).asyncMode = MODE_SYNC; (*params_out).subfiling = 0; + (*params_out).useDataDist = 0; (*params_out).cnt_time_step = 0; (*params_out).cnt_time_step_delay = 0; @@ -1098,6 +1108,8 @@ bench_params_init(bench_params *params_out) (*params_out).csv_path = NULL; (*params_out).csv_fs = NULL; + (*params_out).data_dist_path = NULL; + (*params_out).data_dist_scale = 1.0; (*params_out).env_meta_path = NULL; (*params_out).file_per_proc = 0; (*params_out).align = 0; diff --git a/commons/h5bench_util.h b/commons/h5bench_util.h index 0400572b..ed028545 100644 --- a/commons/h5bench_util.h +++ b/commons/h5bench_util.h @@ -145,6 +145,7 @@ typedef struct bench_params { char * env_meta_path; FILE * csv_fs; char * data_dist_path; + float data_dist_scale; int file_per_proc; int align; unsigned long align_threshold; diff --git a/h5bench_patterns/h5bench_write_data_dist.c b/h5bench_patterns/h5bench_write_data_dist.c index ffa8555d..0fb0475d 100644 --- a/h5bench_patterns/h5bench_write_data_dist.c +++ b/h5bench_patterns/h5bench_write_data_dist.c @@ -1109,7 +1109,11 @@ main(int argc, char *argv[]) index = atoi(tok); tok = strtok(NULL, " "); if (tok) { - holder[index] = strtoll(tok, NULL, 10); + /* don't compute scale if factor is 1, identity */ + if (params.data_dist_scale == 1.0) + holder[index] = strtoll(tok, NULL, 10); + else + holder[index] = (long long) (params.data_dist_scale * strtoll(tok, NULL, 10)); } } else { return -1; From de1d7872f03e699b4fc88b1b8833d4d9a3161352 Mon Sep 17 00:00:00 2001 From: John-Paul Robinson Date: Sat, 22 Jun 2024 14:33:36 -0500 Subject: [PATCH 08/11] Fix the data dist benchmark binary name to use "var" naming convention Change the deployed binary to "write_var_data_dist" so it matches the configured test reference in h5bench. The binary name needs to match the test name so that it can be called by the hbench wrapper. This follows convention of "write_var_normal_dist". Update code to log the correct benchmark name. --- CMakeLists.txt | 6 +++--- h5bench_patterns/h5bench_write_data_dist.c | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b936635a..532b2e65 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -134,8 +134,8 @@ target_link_libraries(h5bench_write_var_normal_dist h5bench_util hdf5 z m ${CMAK set(h5bench_write_data_dist_src h5bench_patterns/h5bench_write_data_dist.c) -add_executable(h5bench_write_data_dist ${h5bench_write_data_dist_src}) -target_link_libraries(h5bench_write_data_dist h5bench_util hdf5 z m ${CMAKE_DL_LIBS} MPI::MPI_C) +add_executable(h5bench_write_var_data_dist ${h5bench_write_data_dist_src}) +target_link_libraries(h5bench_write_var_data_dist h5bench_util hdf5 z m ${CMAKE_DL_LIBS} MPI::MPI_C) # h5bench WRITE ############################################################### # @@ -365,7 +365,7 @@ install( h5bench_write h5bench_write_unlimited h5bench_write_var_normal_dist - h5bench_write_data_dist + h5bench_write_var_data_dist h5bench_overwrite h5bench_append h5bench_read diff --git a/h5bench_patterns/h5bench_write_data_dist.c b/h5bench_patterns/h5bench_write_data_dist.c index 0fb0475d..8f6f1bdb 100644 --- a/h5bench_patterns/h5bench_write_data_dist.c +++ b/h5bench_patterns/h5bench_write_data_dist.c @@ -1277,7 +1277,7 @@ main(int argc, char *argv[]) if (params.useCSV) { fprintf(params.csv_fs, "metric, value, unit\n"); - fprintf(params.csv_fs, "operation, %s, %s\n", "write_var_normal_dist", ""); + fprintf(params.csv_fs, "operation, %s, %s\n", "write_var_data_dist", ""); fprintf(params.csv_fs, "ranks, %d, %s\n", NUM_RANKS, ""); fprintf(params.csv_fs, "Total number of particles, %lldM, %s\n", TOTAL_PARTICLES / (M_VAL), ""); fprintf(params.csv_fs, "Final mean particles, %ld, %s\n", final_mean, ""); From 241fee7306d14366cb9239aaa6986fe74b29e3df Mon Sep 17 00:00:00 2001 From: John-Paul Robinson Date: Mon, 9 Sep 2024 13:50:11 -0500 Subject: [PATCH 09/11] Update the github action/artifacts-upload version Change action/artifacts-upload from v2 to v4 to remove dependence on deprecated v2. The v4 syntax for the artifact-upload remains the same so a simple update of the version number should be sufficient. See blog post for details: https://github.blog/changelog/2024-02-13-deprecation-notice-v1-and-v2-of-the-artifact-actions/ --- .github/workflows/h5bench-hdf5-1.10.4.yml | 2 +- .github/workflows/h5bench-hdf5-1.10.7.yml | 2 +- .github/workflows/h5bench-hdf5-1.10.8.yml | 2 +- .github/workflows/h5bench-hdf5-1.12.0.yml | 2 +- .github/workflows/h5bench-hdf5-1.14.0.yml | 2 +- .github/workflows/h5bench-hdf5-1.14.1.yml | 2 +- .github/workflows/h5bench-hdf5-develop-test.yml | 2 +- .github/workflows/h5bench-hdf5-develop.yml | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) diff --git a/.github/workflows/h5bench-hdf5-1.10.4.yml b/.github/workflows/h5bench-hdf5-1.10.4.yml index 712eb4e9..e239116c 100644 --- a/.github/workflows/h5bench-hdf5-1.10.4.yml +++ b/.github/workflows/h5bench-hdf5-1.10.4.yml @@ -223,7 +223,7 @@ jobs: - name: Upload artifact if: always() - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v4 with: name: test path: build/storage/**/std* diff --git a/.github/workflows/h5bench-hdf5-1.10.7.yml b/.github/workflows/h5bench-hdf5-1.10.7.yml index 8834b81b..56dded8b 100644 --- a/.github/workflows/h5bench-hdf5-1.10.7.yml +++ b/.github/workflows/h5bench-hdf5-1.10.7.yml @@ -223,7 +223,7 @@ jobs: - name: Upload artifact if: always() - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v4 with: name: test path: build/h5bench_e3sm-prefix/src/h5bench_e3sm-stamp/* diff --git a/.github/workflows/h5bench-hdf5-1.10.8.yml b/.github/workflows/h5bench-hdf5-1.10.8.yml index 3c2fc5f1..af65f61e 100644 --- a/.github/workflows/h5bench-hdf5-1.10.8.yml +++ b/.github/workflows/h5bench-hdf5-1.10.8.yml @@ -223,7 +223,7 @@ jobs: - name: Upload artifact if: always() - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v4 with: name: test path: build/storage/**/std* diff --git a/.github/workflows/h5bench-hdf5-1.12.0.yml b/.github/workflows/h5bench-hdf5-1.12.0.yml index d194ae41..b02d7ba3 100644 --- a/.github/workflows/h5bench-hdf5-1.12.0.yml +++ b/.github/workflows/h5bench-hdf5-1.12.0.yml @@ -258,7 +258,7 @@ jobs: - name: Upload artifact if: always() - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v4 with: name: test path: build/storage/**/std* diff --git a/.github/workflows/h5bench-hdf5-1.14.0.yml b/.github/workflows/h5bench-hdf5-1.14.0.yml index b3be7393..58d82558 100644 --- a/.github/workflows/h5bench-hdf5-1.14.0.yml +++ b/.github/workflows/h5bench-hdf5-1.14.0.yml @@ -513,7 +513,7 @@ jobs: - name: Upload artifact if: always() - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v4 with: name: test path: build*/storage/**/std* diff --git a/.github/workflows/h5bench-hdf5-1.14.1.yml b/.github/workflows/h5bench-hdf5-1.14.1.yml index 2de7aceb..1d4f8316 100644 --- a/.github/workflows/h5bench-hdf5-1.14.1.yml +++ b/.github/workflows/h5bench-hdf5-1.14.1.yml @@ -513,7 +513,7 @@ jobs: - name: Upload artifact if: always() - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v4 with: name: test path: build*/storage/**/std* diff --git a/.github/workflows/h5bench-hdf5-develop-test.yml b/.github/workflows/h5bench-hdf5-develop-test.yml index e995e9e1..9b8545b3 100644 --- a/.github/workflows/h5bench-hdf5-develop-test.yml +++ b/.github/workflows/h5bench-hdf5-develop-test.yml @@ -152,7 +152,7 @@ jobs: - name: Upload artifact if: always() - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v4 with: name: test path: build*/storage/**/std* diff --git a/.github/workflows/h5bench-hdf5-develop.yml b/.github/workflows/h5bench-hdf5-develop.yml index b68c92e2..f796ec7f 100644 --- a/.github/workflows/h5bench-hdf5-develop.yml +++ b/.github/workflows/h5bench-hdf5-develop.yml @@ -645,7 +645,7 @@ jobs: - name: Upload artifact if: always() - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v4 with: name: test path: build*/storage/**/std* From 761037732e7385d3e3ec184f11cd7adbbb5e93ed Mon Sep 17 00:00:00 2001 From: John-Paul Robinson Date: Mon, 9 Sep 2024 14:06:12 -0500 Subject: [PATCH 10/11] Update clang-format container version to 0.18.2 Update to latest version of container to see if it avoids the missing distutils dependency reported with the @0.11 version. --- .github/workflows/clang-format-check.yml | 2 +- .github/workflows/clang-format-fix.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/clang-format-check.yml b/.github/workflows/clang-format-check.yml index 52e4d061..9efa4c77 100644 --- a/.github/workflows/clang-format-check.yml +++ b/.github/workflows/clang-format-check.yml @@ -9,7 +9,7 @@ jobs: steps: - uses: actions/checkout@v2 - name: Run clang-format style check for C programs. - uses: DoozyX/clang-format-lint-action@v0.11 + uses: DoozyX/clang-format-lint-action@v0.18.2 with: source: '.' extensions: 'c,h,cpp,hpp' diff --git a/.github/workflows/clang-format-fix.yml b/.github/workflows/clang-format-fix.yml index 9145f65c..a77e955f 100644 --- a/.github/workflows/clang-format-fix.yml +++ b/.github/workflows/clang-format-fix.yml @@ -10,7 +10,7 @@ jobs: steps: - uses: actions/checkout@v2 - name: Run clang-format style check for C programs. - uses: DoozyX/clang-format-lint-action@v0.11 + uses: DoozyX/clang-format-lint-action@v0.18.2 with: source: '.' extensions: 'c,h,cpp,hpp' From fd64d7adad5004c5d5fc07e4ffecf8f94d15a696 Mon Sep 17 00:00:00 2001 From: github-actions Date: Mon, 9 Sep 2024 20:57:25 +0000 Subject: [PATCH 11/11] Committing clang-format changes --- commons/h5bench_util.c | 18 +- h5bench_patterns/h5bench_write_data_dist.c | 224 ++++++++++----------- 2 files changed, 121 insertions(+), 121 deletions(-) diff --git a/commons/h5bench_util.c b/commons/h5bench_util.c index 0141c2ec..40f9ed17 100644 --- a/commons/h5bench_util.c +++ b/commons/h5bench_util.c @@ -978,20 +978,20 @@ _set_params(char *key, char *val_in, bench_params *params_in_out, int do_write) } else if (strcmp(key, "CSV_FILE") == 0) { (*params_in_out).useCSV = 1; - (*params_in_out).csv_path = strdup(val);\ + (*params_in_out).csv_path = strdup(val); } else if (strcmp(key, "DATA_DIST_PATH") == 0) { - (*params_in_out).useDataDist = 1; + (*params_in_out).useDataDist = 1; (*params_in_out).data_dist_path = strdup(val); } else if (strcmp(key, "DATA_DIST_SCALE") == 0) { - float num = 0.0; - char *tok; - tok = strtok(val, "/"); - num = strtof(tok, NULL); - if (tok = strtok(NULL, "/")) - num = num / strtof(tok, NULL); // two terms with / delim is fraction - (*params_in_out).data_dist_scale = num; + float num = 0.0; + char *tok; + tok = strtok(val, "/"); + num = strtof(tok, NULL); + if (tok = strtok(NULL, "/")) + num = num / strtof(tok, NULL); // two terms with / delim is fraction + (*params_in_out).data_dist_scale = num; } else if (strcmp(key, "ENV_METADATA_FILE") == 0) { (*params_in_out).env_meta_path = strdup(val); diff --git a/h5bench_patterns/h5bench_write_data_dist.c b/h5bench_patterns/h5bench_write_data_dist.c index 8f6f1bdb..f2c1e839 100644 --- a/h5bench_patterns/h5bench_write_data_dist.c +++ b/h5bench_patterns/h5bench_write_data_dist.c @@ -1088,44 +1088,44 @@ main(int argc, char *argv[]) printf("Given standard deviation : %ld \n", STDEV_DIM_1); printf("Total ranks %i \n", NUM_RANKS); + if (params.useDataDist) { + // read data file listed in config file + char size_line[256] = ""; + + printf("Begin data dist processing\n"); + printf("Read data file %s\n", params.data_dist_path); + + FILE *file = fopen(params.data_dist_path, "r"); + + while (fgets(size_line, 256, file)) { + + printf("Read line: %s\n", size_line); + char * tokens[2]; + int index; + long long size; + char * tok = strtok(size_line, " "); + if (tok) { + index = atoi(tok); + tok = strtok(NULL, " "); + if (tok) { + /* don't compute scale if factor is 1, identity */ + if (params.data_dist_scale == 1.0) + holder[index] = strtoll(tok, NULL, 10); + else + holder[index] = (long long)(params.data_dist_scale * strtoll(tok, NULL, 10)); + } + } + else { + return -1; + } + } + } - if (params.useDataDist) { - // read data file listed in config file - char size_line[256] = ""; - - printf("Begin data dist processing\n"); - printf("Read data file %s\n", params.data_dist_path); - - FILE *file = fopen(params.data_dist_path, "r"); - - while (fgets(size_line, 256, file)) { - - printf("Read line: %s\n", size_line); - char *tokens[2]; - int index; - long long size; - char *tok = strtok(size_line, " "); - if (tok) { - index = atoi(tok); - tok = strtok(NULL, " "); - if (tok) { - /* don't compute scale if factor is 1, identity */ - if (params.data_dist_scale == 1.0) - holder[index] = strtoll(tok, NULL, 10); - else - holder[index] = (long long) (params.data_dist_scale * strtoll(tok, NULL, 10)); - } - } else { - return -1; - } - } - } - - else { - for (int i = 0; i < NUM_RANKS; i++) { - holder[i] = (long long)normal_dist_particle_giver(NUM_PARTICLES, STDEV_DIM_1); - } - } + else { + for (int i = 0; i < NUM_RANKS; i++) { + holder[i] = (long long)normal_dist_particle_giver(NUM_PARTICLES, STDEV_DIM_1); + } + } } MPI_Barrier(MPI_COMM_WORLD); @@ -1222,83 +1222,83 @@ main(int argc, char *argv[]) std_1 = pow(NUM_PARTICLES - final_mean, 2); MPI_Allreduce(&std_1, &final_std, 1, MPI_LONG_LONG, MPI_SUM, comm); - //if (MY_RANK == 0) { - human_readable value; - char * mode_str = NULL; - - if (has_vol_async) { - mode_str = "ASYNC"; - } - else { - mode_str = "SYNC"; - } - printf("\n=================== Performance Results ==================\n"); - - printf("Total number of ranks: %d\n", NUM_RANKS); - printf("Total number of particles: %lldM\n", TOTAL_PARTICLES / (M_VAL)); - printf("Final mean particles: %ld \n", final_mean); - printf("Final standard deviation: %f \n", sqrt(final_std / NUM_RANKS)); - unsigned long long total_sleep_time_us = - read_time_val(params.compute_time, TIME_US) * (params.cnt_time_step - 1); - printf("Total emulated compute time: %.3lf s\n", total_sleep_time_us / (1000.0 * 1000.0)); - - //double total_size_bytes = NUM_RANKS * local_data_size; - double total_size_bytes = local_data_size; - value = format_human_readable(total_size_bytes); - printf("Total write size: %.3lf %cB\n", value.value, value.unit); - - float rwt_s = (float)raw_write_time / (1000.0 * 1000.0); - float raw_rate = (float)total_size_bytes / rwt_s; - printf("Raw write time: %.3f s\n", rwt_s); - - float meta_time_s = (float)inner_metadata_time / (1000.0 * 1000.0); - printf("Metadata time: %.3f s\n", meta_time_s); - - float fcreate_time_s = (float)(tfopen_end - tfopen_start) / (1000.0 * 1000.0); - printf("H5Fcreate() time: %.3f s\n", fcreate_time_s); - - float flush_time_s = (float)(tflush_end - tflush_start) / (1000.0 * 1000.0); - printf("H5Fflush() time: %.3f s\n", flush_time_s); - - float fclose_time_s = (float)(tfclose_end - tfclose_start) / (1000.0 * 1000.0); - printf("H5Fclose() time: %.3f s\n", fclose_time_s); - - float oct_s = (float)(t4 - t1) / (1000.0 * 1000.0); - printf("Observed completion time: %.3f s\n", oct_s); + // if (MY_RANK == 0) { + human_readable value; + char * mode_str = NULL; + if (has_vol_async) { + mode_str = "ASYNC"; + } + else { + mode_str = "SYNC"; + } + printf("\n=================== Performance Results ==================\n"); + + printf("Total number of ranks: %d\n", NUM_RANKS); + printf("Total number of particles: %lldM\n", TOTAL_PARTICLES / (M_VAL)); + printf("Final mean particles: %ld \n", final_mean); + printf("Final standard deviation: %f \n", sqrt(final_std / NUM_RANKS)); + unsigned long long total_sleep_time_us = + read_time_val(params.compute_time, TIME_US) * (params.cnt_time_step - 1); + printf("Total emulated compute time: %.3lf s\n", total_sleep_time_us / (1000.0 * 1000.0)); + + // double total_size_bytes = NUM_RANKS * local_data_size; + double total_size_bytes = local_data_size; + value = format_human_readable(total_size_bytes); + printf("Total write size: %.3lf %cB\n", value.value, value.unit); + + float rwt_s = (float)raw_write_time / (1000.0 * 1000.0); + float raw_rate = (float)total_size_bytes / rwt_s; + printf("Raw write time: %.3f s\n", rwt_s); + + float meta_time_s = (float)inner_metadata_time / (1000.0 * 1000.0); + printf("Metadata time: %.3f s\n", meta_time_s); + + float fcreate_time_s = (float)(tfopen_end - tfopen_start) / (1000.0 * 1000.0); + printf("H5Fcreate() time: %.3f s\n", fcreate_time_s); + + float flush_time_s = (float)(tflush_end - tflush_start) / (1000.0 * 1000.0); + printf("H5Fflush() time: %.3f s\n", flush_time_s); + + float fclose_time_s = (float)(tfclose_end - tfclose_start) / (1000.0 * 1000.0); + printf("H5Fclose() time: %.3f s\n", fclose_time_s); + + float oct_s = (float)(t4 - t1) / (1000.0 * 1000.0); + printf("Observed completion time: %.3f s\n", oct_s); + + value = format_human_readable(raw_rate); + printf("%s Raw write rate: %.3f %cB/s \n", mode_str, value.value, value.unit); + + float or_bs = (float)total_size_bytes / ((float)(t4 - t1 - total_sleep_time_us) / (1000.0 * 1000.0)); + value = format_human_readable(or_bs); + printf("%s Observed write rate: %.3f %cB/s\n", mode_str, value.value, value.unit); + + printf("===========================================================\n"); + + if (params.useCSV) { + fprintf(params.csv_fs, "metric, value, unit\n"); + fprintf(params.csv_fs, "operation, %s, %s\n", "write_var_data_dist", ""); + fprintf(params.csv_fs, "ranks, %d, %s\n", NUM_RANKS, ""); + fprintf(params.csv_fs, "Total number of particles, %lldM, %s\n", TOTAL_PARTICLES / (M_VAL), ""); + fprintf(params.csv_fs, "Final mean particles, %ld, %s\n", final_mean, ""); + fprintf(params.csv_fs, "Final standard deviation, %f, %s\n", sqrt(final_std / NUM_RANKS), ""); + fprintf(params.csv_fs, "collective data, %s, %s\n", params.data_coll == 1 ? "YES" : "NO", ""); + fprintf(params.csv_fs, "collective meta, %s, %s\n", params.meta_coll == 1 ? "YES" : "NO", ""); + fprintf(params.csv_fs, "subfiling, %s, %s\n", params.subfiling == 1 ? "YES" : "NO", ""); + fprintf(params.csv_fs, "total compute time, %.3lf, %s\n", total_sleep_time_us / (1000.0 * 1000.0), + "seconds"); + value = format_human_readable(total_size_bytes); + fprintf(params.csv_fs, "total size, %.3lf, %cB\n", value.value, value.unit); + fprintf(params.csv_fs, "raw time, %.3f, %s\n", rwt_s, "seconds"); value = format_human_readable(raw_rate); - printf("%s Raw write rate: %.3f %cB/s \n", mode_str, value.value, value.unit); - - float or_bs = (float)total_size_bytes / ((float)(t4 - t1 - total_sleep_time_us) / (1000.0 * 1000.0)); - value = format_human_readable(or_bs); - printf("%s Observed write rate: %.3f %cB/s\n", mode_str, value.value, value.unit); - - printf("===========================================================\n"); - - if (params.useCSV) { - fprintf(params.csv_fs, "metric, value, unit\n"); - fprintf(params.csv_fs, "operation, %s, %s\n", "write_var_data_dist", ""); - fprintf(params.csv_fs, "ranks, %d, %s\n", NUM_RANKS, ""); - fprintf(params.csv_fs, "Total number of particles, %lldM, %s\n", TOTAL_PARTICLES / (M_VAL), ""); - fprintf(params.csv_fs, "Final mean particles, %ld, %s\n", final_mean, ""); - fprintf(params.csv_fs, "Final standard deviation, %f, %s\n", sqrt(final_std / NUM_RANKS), ""); - fprintf(params.csv_fs, "collective data, %s, %s\n", params.data_coll == 1 ? "YES" : "NO", ""); - fprintf(params.csv_fs, "collective meta, %s, %s\n", params.meta_coll == 1 ? "YES" : "NO", ""); - fprintf(params.csv_fs, "subfiling, %s, %s\n", params.subfiling == 1 ? "YES" : "NO", ""); - fprintf(params.csv_fs, "total compute time, %.3lf, %s\n", total_sleep_time_us / (1000.0 * 1000.0), - "seconds"); - value = format_human_readable(total_size_bytes); - fprintf(params.csv_fs, "total size, %.3lf, %cB\n", value.value, value.unit); - fprintf(params.csv_fs, "raw time, %.3f, %s\n", rwt_s, "seconds"); - value = format_human_readable(raw_rate); - fprintf(params.csv_fs, "raw rate, %.3lf, %cB/s\n", value.value, value.unit); - fprintf(params.csv_fs, "metadata time, %.3f, %s\n", meta_time_s, "seconds"); - value = format_human_readable(or_bs); - fprintf(params.csv_fs, "observed rate, %.3f, %cB/s\n", value.value, value.unit); - fprintf(params.csv_fs, "observed time, %.3f, %s\n", oct_s, "seconds"); - fclose(params.csv_fs); - } - //} + fprintf(params.csv_fs, "raw rate, %.3lf, %cB/s\n", value.value, value.unit); + fprintf(params.csv_fs, "metadata time, %.3f, %s\n", meta_time_s, "seconds"); + value = format_human_readable(or_bs); + fprintf(params.csv_fs, "observed rate, %.3f, %cB/s\n", value.value, value.unit); + fprintf(params.csv_fs, "observed time, %.3f, %s\n", oct_s, "seconds"); + fclose(params.csv_fs); + } + //} MPI_Finalize(); return 0;