From 3216c3a9e6f27879655b4fe0ba70c37e51171e57 Mon Sep 17 00:00:00 2001 From: robjmcgibbon Date: Tue, 29 Apr 2025 14:34:41 +0100 Subject: [PATCH 01/11] Script v1 --- misc/fof_prop.py | 253 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 253 insertions(+) create mode 100644 misc/fof_prop.py diff --git a/misc/fof_prop.py b/misc/fof_prop.py new file mode 100644 index 00000000..e3288f08 --- /dev/null +++ b/misc/fof_prop.py @@ -0,0 +1,253 @@ +#!/bin/env python + +""" +TODO: WRITE DOCSTRING +convert_eagle.py + +This script converts EAGLE GADGET snapshots into SWIFT snapshots. +Usage: + + mpirun -- python convert_eagle.py \ + --snapshot-basename=SNAPSHOT \ + --fof-basename=FOF \ + --output-basename=OUTPUT \ + --membership-basename=MEMBERSHIP + +where SNAPSHOT is the EAGLE snapshot (use the particledata_*** files, +since the normal snapshots don't store SubGroupNumber), and OUTPUT & +MEMBERSHIP are the names of the output files. You must run with +the same number of ranks as input files. +""" + +import argparse +import os + +from mpi4py import MPI + +comm = MPI.COMM_WORLD +comm_rank = comm.Get_rank() +comm_size = comm.Get_size() + +import h5py +import numpy as np +import unyt + +import virgo.mpi.parallel_sort as psort +import virgo.mpi.parallel_hdf5 as phdf5 +from virgo.mpi.gather_array import gather_array + +# Parse arguments +parser = argparse.ArgumentParser( + description=( + "Script to calculate extent of FoF groups." + ) +) +parser.add_argument( + "--snap-basename", + type=str, + required=True, + help=( + "The basename for the snapshot files (the snapshot " + "name without the .{file_nr}.hdf5 suffix)" + ), +) +parser.add_argument( + "--fof-basename", + type=str, + required=True, + help="The basename for the output files", +) +parser.add_argument( + "--output-basename", + type=str, + required=True, + help=( + "The basename for the output files" + ), +) +parser.add_argument( + "--null-fof-id", + type=int, + required=False, + default=2147483647, + help=( + "The FOFGroupIDs of particles not in a FOF group" + ), +) +args = parser.parse_args() +snap_filename = args.snap_basename + ".{file_nr}.hdf5" +fof_filename = args.fof_basename + ".{file_nr}.hdf5" +output_filename = args.output_basename + ".{file_nr}.hdf5" +os.makedirs(os.path.dirname(output_filename), exist_ok=True) + +if comm_rank == 0: + with h5py.File(snap_filename.format(file_nr=0), "r") as file: + header = file['Header'].attrs + boxsize = header['BoxSize'] + ptypes = np.where(header['TotalNumberOfParticles'] != 0)[0] + coordinate_unit_attrs = dict(file['PartType1/Coordinates'].attrs) + + + # TODO: Split across ranks + for i_file in range(4): + src_filename = fof_filename.format(file_nr=i_file) + dst_filename = output_filename.format(file_nr=i_file) + + def copy_attrs(src_obj, dst_obj): + for key, val in src_obj.attrs.items(): + dst_obj.attrs[key] = val + + with h5py.File(src_filename, "r") as src_file, h5py.File(dst_filename, "w") as dst_file: + + def create_virtual_datasets(src_group, dst_group, src_filename, prefix=""): + copy_attrs(src_group, dst_group) + + for name, item in src_group.items(): + if isinstance(item, h5py.Dataset): + shape = item.shape + dtype = item.dtype + layout = h5py.VirtualLayout(shape=shape, dtype=dtype) + vsource = h5py.VirtualSource(src_filename, prefix + name, shape=shape) + layout[...] = vsource + vds = dst_group.create_virtual_dataset(name, layout) + copy_attrs(item, vds) # Copy dataset attributes + elif isinstance(item, h5py.Group): + new_group = dst_group.create_group(name) + create_virtual_datasets(item, new_group, src_filename, prefix + name + "/") + + create_virtual_datasets(src_file, dst_file, src_filename) + + print("Virtual copy with attributes created successfully.") + + +else: + boxsize = None + ptypes = None + coordinate_unit_attrs = None +boxsize = comm.bcast(boxsize) +ptypes = comm.bcast(ptypes) +coordinate_unit_attrs = comm.bcast(coordinate_unit_attrs) + +# Load FOF catalogue +fof_file = phdf5.MultiFile( + fof_filename, file_nr_attr=("Header", "NumFilesPerSnapshot"), comm=comm +) +fof_group_ids = fof_file.read(f"Groups/GroupIDs") +fof_sizes = fof_file.read(f"Groups/Sizes") +fof_centres = fof_file.read(f"Groups/Centres") + +# Initialise arrays for storing results +fof_min_pos = np.inf * np.ones_like(fof_centres) +fof_max_pos = -np.inf * np.ones_like(fof_centres) +total_part_counts = np.zeros_like(fof_sizes) + +# Open snapshot file +snap_file = phdf5.MultiFile( + snap_filename, file_nr_attr=("Header", "NumFilesPerSnapshot"), comm=comm +) +for ptype in ptypes: + if comm_rank == 0: + print(f'Processing PartType{ptype}') + + # Load particle positions and their FOF IDs + part_pos = snap_file.read(f"PartType{ptype}/Coordinates") + part_fof_ids = snap_file.read(f"PartType{ptype}/FOFGroupIDs") + + # Ignore particles which aren't part of a FOF group + mask = part_fof_ids != args.null_fof_id + part_pos = part_pos[mask] + part_fof_ids = part_fof_ids[mask] + + # Get the centre of the FOF each particle is part of + idx = psort.parallel_match(part_fof_ids, fof_group_ids, comm=comm) + assert np.all(idx != -1), 'FOFs could not be found for some particles' + part_centre = psort.fetch_elements(fof_centres, idx, comm=comm) + + # Centre the particles + shift = (boxsize[None, :] / 2) - part_centre + part_pos = ((part_pos + shift) % boxsize[None, :]) - (boxsize[None, :] / 2) + + # Count the number of particles found for each FOF + unique_fof_ids, unique_counts = psort.parallel_unique( + part_fof_ids, + return_counts=True, + comm=comm, + ) + + # Keep track of number of particles in each FOF (to compare with "Groups/Sizes") + idx = psort.parallel_match(fof_group_ids, unique_fof_ids, comm=comm) + mask = idx != -1 + total_part_counts[mask] += psort.fetch_elements(unique_counts, idx[mask], comm=comm) + + # Sort based on unique_fof_ids + order = psort.parallel_sort(unique_fof_ids, return_index=True, comm=comm) + unique_counts = psort.fetch_elements(unique_counts, order, comm=comm) + + # Determine how to partition the particles, so groups will not span ranks + gathered_counts = gather_array(unique_counts) + if comm_rank == 0: + n_part_target = np.sum(gathered_counts) / comm_size + cumsum = np.cumsum(gathered_counts) + ranks = np.floor(cumsum / n_part_target).astype(int) + ranks = np.clip(ranks, 0, comm_size-1) + n_part_per_rank = np.bincount(ranks, weights=gathered_counts, minlength=comm_size) + assert np.sum(n_part_per_rank) == np.sum(gathered_counts) + else: + n_part_per_rank = None + del gathered_counts + n_part_per_rank = comm.bcast(n_part_per_rank) + + # Repartition particle data + part_fof_ids = psort.repartition(part_fof_ids, n_part_per_rank, comm=comm) + part_pos = psort.repartition(part_pos, n_part_per_rank, comm=comm) + + # Sort particles by FOF ID + order = psort.parallel_sort(part_fof_ids, return_index=True, comm=comm) + part_pos = psort.fetch_elements(part_pos, order, comm=comm) + + # Find boundaries where FOF ID changes + if part_fof_ids.shape[0] != 0: + change_idx = np.flatnonzero(np.diff(part_fof_ids)) + 1 + reduce_idx = np.concatenate([np.array([0]), change_idx]) + else: + reduce_idx = np.array([], dtype=int) + + # Determine link to arrays from original FOF catalogue + local_fof_ids = part_fof_ids[reduce_idx] + idx = psort.parallel_match(fof_group_ids, local_fof_ids, comm=comm) + mask = idx != -1 + + for i in range(3): + # Calculate max and minimum particle positions for each FOF ID + local_min_pos = np.minimum.reduceat(part_pos[:, i], reduce_idx) + local_max_pos = np.maximum.reduceat(part_pos[:, i], reduce_idx) + + # Compare with values from other particle types + min_pos = psort.fetch_elements(local_min_pos, idx[mask], comm=comm) + max_pos = psort.fetch_elements(local_max_pos, idx[mask], comm=comm) + fof_min_pos[mask, i] = np.minimum(fof_min_pos[mask, i], min_pos) + fof_max_pos[mask, i] = np.maximum(fof_max_pos[mask, i], max_pos) + +# Carry out some sanity checks +assert np.all(total_part_counts == fof_sizes), 'Not all particles found for some FOFs' +assert np.max(fof_min_pos) < 0 +assert np.min(fof_max_pos) > 0 + +# Write data to file +output_data = { + "MinParticlePosition": fof_min_pos, + "MaxParticlePosition": fof_max_pos, +} +elements_per_file = fof_file.get_elements_per_file("Groups/GroupIDs") +fof_file.write( + output_data, + elements_per_file, + filenames=output_filename, + mode='r+', + group="Groups", + attrs=coordinate_unit_attrs, +) + +if comm_rank == 0: + print('Done') + From 9e81ba5225d4aeb83e94ec633a01384a3dec043e Mon Sep 17 00:00:00 2001 From: robjmcgibbon Date: Tue, 29 Apr 2025 15:00:40 +0100 Subject: [PATCH 02/11] Create files across ranks --- ...fof_prop.py => calculate_fof_positions.py} | 95 +++++++++++-------- 1 file changed, 54 insertions(+), 41 deletions(-) rename misc/{fof_prop.py => calculate_fof_positions.py} (75%) diff --git a/misc/fof_prop.py b/misc/calculate_fof_positions.py similarity index 75% rename from misc/fof_prop.py rename to misc/calculate_fof_positions.py index e3288f08..7eace260 100644 --- a/misc/fof_prop.py +++ b/misc/calculate_fof_positions.py @@ -1,6 +1,11 @@ #!/bin/env python """ + +mpirun -np 8 python -u misc/fof_prop.py \ + --snap-basename "${sim_dir}/snapshots/colibre_${snap_nr}/colibre_${snap_nr}" \ + --fof-basename "${sim_dir}/fof/fof_output_${snap_nr}/fof_output_${snap_nr}" \ + --output-basename "${output_dir}/${sim_name}/fof_extent_${snap_nr}/fof_extent_${snap_nr}" TODO: WRITE DOCSTRING convert_eagle.py @@ -82,52 +87,60 @@ if comm_rank == 0: with h5py.File(snap_filename.format(file_nr=0), "r") as file: - header = file['Header'].attrs - boxsize = header['BoxSize'] - ptypes = np.where(header['TotalNumberOfParticles'] != 0)[0] + header = dict(file['Header'].attrs) coordinate_unit_attrs = dict(file['PartType1/Coordinates'].attrs) - - - # TODO: Split across ranks - for i_file in range(4): - src_filename = fof_filename.format(file_nr=i_file) - dst_filename = output_filename.format(file_nr=i_file) - - def copy_attrs(src_obj, dst_obj): - for key, val in src_obj.attrs.items(): - dst_obj.attrs[key] = val - - with h5py.File(src_filename, "r") as src_file, h5py.File(dst_filename, "w") as dst_file: - - def create_virtual_datasets(src_group, dst_group, src_filename, prefix=""): - copy_attrs(src_group, dst_group) - - for name, item in src_group.items(): - if isinstance(item, h5py.Dataset): - shape = item.shape - dtype = item.dtype - layout = h5py.VirtualLayout(shape=shape, dtype=dtype) - vsource = h5py.VirtualSource(src_filename, prefix + name, shape=shape) - layout[...] = vsource - vds = dst_group.create_virtual_dataset(name, layout) - copy_attrs(item, vds) # Copy dataset attributes - elif isinstance(item, h5py.Group): - new_group = dst_group.create_group(name) - create_virtual_datasets(item, new_group, src_filename, prefix + name + "/") - - create_virtual_datasets(src_file, dst_file, src_filename) - - print("Virtual copy with attributes created successfully.") - - else: - boxsize = None - ptypes = None + header = None coordinate_unit_attrs = None -boxsize = comm.bcast(boxsize) -ptypes = comm.bcast(ptypes) +header = comm.bcast(header) coordinate_unit_attrs = comm.bcast(coordinate_unit_attrs) +boxsize = header['BoxSize'] +ptypes = np.where(header['TotalNumberOfParticles'] != 0)[0] +nr_files = header['NumFilesPerSnapshot'][0] + +# Assign files to ranks +files_per_rank = np.zeros(comm_size, dtype=int) +files_per_rank[:] = nr_files // comm_size +remainder = nr_files % comm_size +if remainder > 0: + step = max(nr_files // (remainder+1), 1) + for i in range(remainder): + files_per_rank[(i*step) % comm_size] += 1 +first_file = np.cumsum(files_per_rank) - files_per_rank +assert sum(files_per_rank) == nr_files + +# Create output files +for i_file in range( + first_file[comm_rank], first_file[comm_rank] + files_per_rank[comm_rank] +): + src_filename = fof_filename.format(file_nr=i_file) + dst_filename = output_filename.format(file_nr=i_file) + + def copy_attrs(src_obj, dst_obj): + for key, val in src_obj.attrs.items(): + dst_obj.attrs[key] = val + + with h5py.File(src_filename, "r") as src_file, h5py.File(dst_filename, "w") as dst_file: + + def create_virtual_datasets(src_group, dst_group, src_filename, prefix=""): + copy_attrs(src_group, dst_group) + + for name, item in src_group.items(): + if isinstance(item, h5py.Dataset): + shape = item.shape + dtype = item.dtype + layout = h5py.VirtualLayout(shape=shape, dtype=dtype) + vsource = h5py.VirtualSource(src_filename, prefix + name, shape=shape) + layout[...] = vsource + vds = dst_group.create_virtual_dataset(name, layout) + copy_attrs(item, vds) + elif isinstance(item, h5py.Group): + new_group = dst_group.create_group(name) + create_virtual_datasets(item, new_group, src_filename, prefix + name + "/") + + create_virtual_datasets(src_file, dst_file, src_filename) + # Load FOF catalogue fof_file = phdf5.MultiFile( fof_filename, file_nr_attr=("Header", "NumFilesPerSnapshot"), comm=comm From c0ba8cf186ce29d60a0cfaf2cd1f1ac49971b946 Mon Sep 17 00:00:00 2001 From: robjmcgibbon Date: Tue, 29 Apr 2025 15:05:10 +0100 Subject: [PATCH 03/11] Update docs --- misc/calculate_fof_positions.py | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/misc/calculate_fof_positions.py b/misc/calculate_fof_positions.py index 7eace260..19c90cb5 100644 --- a/misc/calculate_fof_positions.py +++ b/misc/calculate_fof_positions.py @@ -1,27 +1,19 @@ #!/bin/env python """ +calculate_fof_positions.py -mpirun -np 8 python -u misc/fof_prop.py \ - --snap-basename "${sim_dir}/snapshots/colibre_${snap_nr}/colibre_${snap_nr}" \ - --fof-basename "${sim_dir}/fof/fof_output_${snap_nr}/fof_output_${snap_nr}" \ - --output-basename "${output_dir}/${sim_name}/fof_extent_${snap_nr}/fof_extent_${snap_nr}" -TODO: WRITE DOCSTRING -convert_eagle.py - -This script converts EAGLE GADGET snapshots into SWIFT snapshots. +This script calculates the maximum and minimum particles positions for each FOF. Usage: - mpirun -- python convert_eagle.py \ + mpirun -- python misc/calculate_fof_positions.py \ --snapshot-basename=SNAPSHOT \ --fof-basename=FOF \ - --output-basename=OUTPUT \ - --membership-basename=MEMBERSHIP + --output-basename=OUTPUT -where SNAPSHOT is the EAGLE snapshot (use the particledata_*** files, -since the normal snapshots don't store SubGroupNumber), and OUTPUT & -MEMBERSHIP are the names of the output files. You must run with -the same number of ranks as input files. +where SNAPSHOT is the basename of the snapshot files (the snapshot +name without the .{file_nr}.hdf5 suffix), FOF is the basename of the +fof catalogues, and OUTPUT is the basename of the output fof catalogues. """ import argparse @@ -261,6 +253,8 @@ def create_virtual_datasets(src_group, dst_group, src_filename, prefix=""): attrs=coordinate_unit_attrs, ) +# TODO: Create virtual files + if comm_rank == 0: print('Done') From 9dca6d0e3a039ba1c7c634360b3ffd7de0ae1fa0 Mon Sep 17 00:00:00 2001 From: robjmcgibbon Date: Wed, 30 Apr 2025 12:03:02 +0100 Subject: [PATCH 04/11] Create virtual file --- misc/calculate_fof_positions.py | 86 +++++++++++++++++++++++---------- 1 file changed, 61 insertions(+), 25 deletions(-) diff --git a/misc/calculate_fof_positions.py b/misc/calculate_fof_positions.py index 19c90cb5..a19bf195 100644 --- a/misc/calculate_fof_positions.py +++ b/misc/calculate_fof_positions.py @@ -91,6 +91,27 @@ ptypes = np.where(header['TotalNumberOfParticles'] != 0)[0] nr_files = header['NumFilesPerSnapshot'][0] +def copy_attrs(src_obj, dst_obj): + for key, val in src_obj.attrs.items(): + dst_obj.attrs[key] = val + +def copy_object(src_obj, dst_obj, src_filename, prefix="", skip_datasets=False): + copy_attrs(src_obj, dst_obj) + for name, item in src_obj.items(): + if isinstance(item, h5py.Dataset): + if skip_datasets and (item.name != "/Header/PartTypeNames"): + continue + shape = item.shape + dtype = item.dtype + layout = h5py.VirtualLayout(shape=shape, dtype=dtype) + vsource = h5py.VirtualSource(src_filename, prefix + name, shape=shape) + layout[...] = vsource + vds = dst_obj.create_virtual_dataset(name, layout) + copy_attrs(item, vds) + elif isinstance(item, h5py.Group): + new_group = dst_obj.create_group(name) + copy_object(item, new_group, src_filename, prefix + name + "/", skip_datasets=skip_datasets) + # Assign files to ranks files_per_rank = np.zeros(comm_size, dtype=int) files_per_rank[:] = nr_files // comm_size @@ -108,30 +129,8 @@ ): src_filename = fof_filename.format(file_nr=i_file) dst_filename = output_filename.format(file_nr=i_file) - - def copy_attrs(src_obj, dst_obj): - for key, val in src_obj.attrs.items(): - dst_obj.attrs[key] = val - with h5py.File(src_filename, "r") as src_file, h5py.File(dst_filename, "w") as dst_file: - - def create_virtual_datasets(src_group, dst_group, src_filename, prefix=""): - copy_attrs(src_group, dst_group) - - for name, item in src_group.items(): - if isinstance(item, h5py.Dataset): - shape = item.shape - dtype = item.dtype - layout = h5py.VirtualLayout(shape=shape, dtype=dtype) - vsource = h5py.VirtualSource(src_filename, prefix + name, shape=shape) - layout[...] = vsource - vds = dst_group.create_virtual_dataset(name, layout) - copy_attrs(item, vds) - elif isinstance(item, h5py.Group): - new_group = dst_group.create_group(name) - create_virtual_datasets(item, new_group, src_filename, prefix + name + "/") - - create_virtual_datasets(src_file, dst_file, src_filename) + copy_object(src_file, dst_file, src_filename) # Load FOF catalogue fof_file = phdf5.MultiFile( @@ -253,8 +252,45 @@ def create_virtual_datasets(src_group, dst_group, src_filename, prefix=""): attrs=coordinate_unit_attrs, ) -# TODO: Create virtual files - if comm_rank == 0: + + # Create virtual file + dst_filename = args.output_basename + ".hdf5" + with h5py.File(dst_filename, "w") as dst_file: + + # Copy original virtual file, skip datasets + src_filename = args.fof_basename + ".hdf5" + with h5py.File(src_filename, "r") as src_file: + copy_object(src_file, dst_file, src_filename, skip_datasets=True) + nr_groups = dst_file['Header'].attrs['NumGroups_Total'][0] + + # Get number of groups in each chunk file + counts = [] + for i_file in range(nr_files): + src_filename = output_filename.format(file_nr=i_file) + with h5py.File(src_filename, 'r') as src_file: + if i_file == 0: + props = list(src_file['Groups'].keys()) + shapes = [src_file[f'Groups/{prop}'].shape for prop in props] + dtypes = [src_file[f'Groups/{prop}'].dtype for prop in props] + counts.append(src_file['Header'].attrs['NumGroups_ThisFile'][0]) + + # Create virtual datasets + for prop, shape, dtype in zip(props, shapes, dtypes): + full_shape = (nr_groups, *shape[1:]) + layout = h5py.VirtualLayout(shape=full_shape, dtype=dtype) + + offset = 0 + for i_file in range(nr_files): + src_filename = output_filename.format(file_nr=i_file) + count = counts[i_file] + layout[offset : offset + count] = h5py.VirtualSource( + src_filename, f"Groups/{prop}", shape=(count, *shape[1:]) + ) + offset += count + dst_file.create_virtual_dataset( + f"Groups/{prop}", layout, fillvalue=-999 + ) + print('Done') From d589feb383a1c596cd33fb4a1b2a4efbeba9334e Mon Sep 17 00:00:00 2001 From: robjmcgibbon Date: Wed, 30 Apr 2025 12:23:16 +0100 Subject: [PATCH 05/11] Use relative paths --- misc/calculate_fof_positions.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/misc/calculate_fof_positions.py b/misc/calculate_fof_positions.py index a19bf195..3d2664c6 100644 --- a/misc/calculate_fof_positions.py +++ b/misc/calculate_fof_positions.py @@ -130,7 +130,8 @@ def copy_object(src_obj, dst_obj, src_filename, prefix="", skip_datasets=False): src_filename = fof_filename.format(file_nr=i_file) dst_filename = output_filename.format(file_nr=i_file) with h5py.File(src_filename, "r") as src_file, h5py.File(dst_filename, "w") as dst_file: - copy_object(src_file, dst_file, src_filename) + rel_filename = os.path.relpath(src_filename, os.path.dirname(dst_filename)) + copy_object(src_file, dst_file, rel_filename) # Load FOF catalogue fof_file = phdf5.MultiFile( @@ -283,9 +284,10 @@ def copy_object(src_obj, dst_obj, src_filename, prefix="", skip_datasets=False): offset = 0 for i_file in range(nr_files): src_filename = output_filename.format(file_nr=i_file) + rel_filename = os.path.relpath(src_filename, os.path.dirname(dst_filename)) count = counts[i_file] layout[offset : offset + count] = h5py.VirtualSource( - src_filename, f"Groups/{prop}", shape=(count, *shape[1:]) + rel_filename, f"Groups/{prop}", shape=(count, *shape[1:]) ) offset += count dst_file.create_virtual_dataset( From a45f9ef79933c8d6df6f10606e8147708775a1f5 Mon Sep 17 00:00:00 2001 From: robjmcgibbon Date: Wed, 30 Apr 2025 12:24:34 +0100 Subject: [PATCH 06/11] Format --- misc/calculate_fof_positions.py | 79 ++++++++++++++++++--------------- 1 file changed, 43 insertions(+), 36 deletions(-) diff --git a/misc/calculate_fof_positions.py b/misc/calculate_fof_positions.py index 3d2664c6..18926d8a 100644 --- a/misc/calculate_fof_positions.py +++ b/misc/calculate_fof_positions.py @@ -35,9 +35,7 @@ # Parse arguments parser = argparse.ArgumentParser( - description=( - "Script to calculate extent of FoF groups." - ) + description=("Script to calculate extent of FoF groups.") ) parser.add_argument( "--snap-basename", @@ -58,18 +56,14 @@ "--output-basename", type=str, required=True, - help=( - "The basename for the output files" - ), + help=("The basename for the output files"), ) parser.add_argument( "--null-fof-id", type=int, required=False, default=2147483647, - help=( - "The FOFGroupIDs of particles not in a FOF group" - ), + help=("The FOFGroupIDs of particles not in a FOF group"), ) args = parser.parse_args() snap_filename = args.snap_basename + ".{file_nr}.hdf5" @@ -79,22 +73,24 @@ if comm_rank == 0: with h5py.File(snap_filename.format(file_nr=0), "r") as file: - header = dict(file['Header'].attrs) - coordinate_unit_attrs = dict(file['PartType1/Coordinates'].attrs) + header = dict(file["Header"].attrs) + coordinate_unit_attrs = dict(file["PartType1/Coordinates"].attrs) else: header = None coordinate_unit_attrs = None header = comm.bcast(header) coordinate_unit_attrs = comm.bcast(coordinate_unit_attrs) -boxsize = header['BoxSize'] -ptypes = np.where(header['TotalNumberOfParticles'] != 0)[0] -nr_files = header['NumFilesPerSnapshot'][0] +boxsize = header["BoxSize"] +ptypes = np.where(header["TotalNumberOfParticles"] != 0)[0] +nr_files = header["NumFilesPerSnapshot"][0] + def copy_attrs(src_obj, dst_obj): for key, val in src_obj.attrs.items(): dst_obj.attrs[key] = val + def copy_object(src_obj, dst_obj, src_filename, prefix="", skip_datasets=False): copy_attrs(src_obj, dst_obj) for name, item in src_obj.items(): @@ -110,16 +106,23 @@ def copy_object(src_obj, dst_obj, src_filename, prefix="", skip_datasets=False): copy_attrs(item, vds) elif isinstance(item, h5py.Group): new_group = dst_obj.create_group(name) - copy_object(item, new_group, src_filename, prefix + name + "/", skip_datasets=skip_datasets) + copy_object( + item, + new_group, + src_filename, + prefix + name + "/", + skip_datasets=skip_datasets, + ) + # Assign files to ranks files_per_rank = np.zeros(comm_size, dtype=int) files_per_rank[:] = nr_files // comm_size remainder = nr_files % comm_size if remainder > 0: - step = max(nr_files // (remainder+1), 1) + step = max(nr_files // (remainder + 1), 1) for i in range(remainder): - files_per_rank[(i*step) % comm_size] += 1 + files_per_rank[(i * step) % comm_size] += 1 first_file = np.cumsum(files_per_rank) - files_per_rank assert sum(files_per_rank) == nr_files @@ -129,7 +132,10 @@ def copy_object(src_obj, dst_obj, src_filename, prefix="", skip_datasets=False): ): src_filename = fof_filename.format(file_nr=i_file) dst_filename = output_filename.format(file_nr=i_file) - with h5py.File(src_filename, "r") as src_file, h5py.File(dst_filename, "w") as dst_file: + with ( + h5py.File(src_filename, "r") as src_file, + h5py.File(dst_filename, "w") as dst_file, + ): rel_filename = os.path.relpath(src_filename, os.path.dirname(dst_filename)) copy_object(src_file, dst_file, rel_filename) @@ -152,7 +158,7 @@ def copy_object(src_obj, dst_obj, src_filename, prefix="", skip_datasets=False): ) for ptype in ptypes: if comm_rank == 0: - print(f'Processing PartType{ptype}') + print(f"Processing PartType{ptype}") # Load particle positions and their FOF IDs part_pos = snap_file.read(f"PartType{ptype}/Coordinates") @@ -165,7 +171,7 @@ def copy_object(src_obj, dst_obj, src_filename, prefix="", skip_datasets=False): # Get the centre of the FOF each particle is part of idx = psort.parallel_match(part_fof_ids, fof_group_ids, comm=comm) - assert np.all(idx != -1), 'FOFs could not be found for some particles' + assert np.all(idx != -1), "FOFs could not be found for some particles" part_centre = psort.fetch_elements(fof_centres, idx, comm=comm) # Centre the particles @@ -194,8 +200,10 @@ def copy_object(src_obj, dst_obj, src_filename, prefix="", skip_datasets=False): n_part_target = np.sum(gathered_counts) / comm_size cumsum = np.cumsum(gathered_counts) ranks = np.floor(cumsum / n_part_target).astype(int) - ranks = np.clip(ranks, 0, comm_size-1) - n_part_per_rank = np.bincount(ranks, weights=gathered_counts, minlength=comm_size) + ranks = np.clip(ranks, 0, comm_size - 1) + n_part_per_rank = np.bincount( + ranks, weights=gathered_counts, minlength=comm_size + ) assert np.sum(n_part_per_rank) == np.sum(gathered_counts) else: n_part_per_rank = None @@ -234,7 +242,7 @@ def copy_object(src_obj, dst_obj, src_filename, prefix="", skip_datasets=False): fof_max_pos[mask, i] = np.maximum(fof_max_pos[mask, i], max_pos) # Carry out some sanity checks -assert np.all(total_part_counts == fof_sizes), 'Not all particles found for some FOFs' +assert np.all(total_part_counts == fof_sizes), "Not all particles found for some FOFs" assert np.max(fof_min_pos) < 0 assert np.min(fof_max_pos) > 0 @@ -248,7 +256,7 @@ def copy_object(src_obj, dst_obj, src_filename, prefix="", skip_datasets=False): output_data, elements_per_file, filenames=output_filename, - mode='r+', + mode="r+", group="Groups", attrs=coordinate_unit_attrs, ) @@ -263,18 +271,18 @@ def copy_object(src_obj, dst_obj, src_filename, prefix="", skip_datasets=False): src_filename = args.fof_basename + ".hdf5" with h5py.File(src_filename, "r") as src_file: copy_object(src_file, dst_file, src_filename, skip_datasets=True) - nr_groups = dst_file['Header'].attrs['NumGroups_Total'][0] + nr_groups = dst_file["Header"].attrs["NumGroups_Total"][0] # Get number of groups in each chunk file counts = [] for i_file in range(nr_files): src_filename = output_filename.format(file_nr=i_file) - with h5py.File(src_filename, 'r') as src_file: + with h5py.File(src_filename, "r") as src_file: if i_file == 0: - props = list(src_file['Groups'].keys()) - shapes = [src_file[f'Groups/{prop}'].shape for prop in props] - dtypes = [src_file[f'Groups/{prop}'].dtype for prop in props] - counts.append(src_file['Header'].attrs['NumGroups_ThisFile'][0]) + props = list(src_file["Groups"].keys()) + shapes = [src_file[f"Groups/{prop}"].shape for prop in props] + dtypes = [src_file[f"Groups/{prop}"].dtype for prop in props] + counts.append(src_file["Header"].attrs["NumGroups_ThisFile"][0]) # Create virtual datasets for prop, shape, dtype in zip(props, shapes, dtypes): @@ -284,15 +292,14 @@ def copy_object(src_obj, dst_obj, src_filename, prefix="", skip_datasets=False): offset = 0 for i_file in range(nr_files): src_filename = output_filename.format(file_nr=i_file) - rel_filename = os.path.relpath(src_filename, os.path.dirname(dst_filename)) + rel_filename = os.path.relpath( + src_filename, os.path.dirname(dst_filename) + ) count = counts[i_file] layout[offset : offset + count] = h5py.VirtualSource( rel_filename, f"Groups/{prop}", shape=(count, *shape[1:]) ) offset += count - dst_file.create_virtual_dataset( - f"Groups/{prop}", layout, fillvalue=-999 - ) - - print('Done') + dst_file.create_virtual_dataset(f"Groups/{prop}", layout, fillvalue=-999) + print("Done") From 0a04c431b5577b3162aeae031867ef0914437e77 Mon Sep 17 00:00:00 2001 From: robjmcgibbon Date: Wed, 30 Apr 2025 13:31:56 +0100 Subject: [PATCH 07/11] Add test --- misc/calculate_fof_positions.py | 22 +++++++--- misc/test_fof_positions.py | 72 +++++++++++++++++++++++++++++++++ 2 files changed, 88 insertions(+), 6 deletions(-) create mode 100644 misc/test_fof_positions.py diff --git a/misc/calculate_fof_positions.py b/misc/calculate_fof_positions.py index 18926d8a..e5674744 100644 --- a/misc/calculate_fof_positions.py +++ b/misc/calculate_fof_positions.py @@ -7,7 +7,7 @@ Usage: mpirun -- python misc/calculate_fof_positions.py \ - --snapshot-basename=SNAPSHOT \ + --snap-basename=SNAPSHOT \ --fof-basename=FOF \ --output-basename=OUTPUT @@ -251,6 +251,10 @@ def copy_object(src_obj, dst_obj, src_filename, prefix="", skip_datasets=False): "MinParticlePosition": fof_min_pos, "MaxParticlePosition": fof_max_pos, } +attrs = { + "MinParticlePosition": coordinate_unit_attrs, + "MaxParticlePosition": coordinate_unit_attrs, +} elements_per_file = fof_file.get_elements_per_file("Groups/GroupIDs") fof_file.write( output_data, @@ -258,7 +262,7 @@ def copy_object(src_obj, dst_obj, src_filename, prefix="", skip_datasets=False): filenames=output_filename, mode="r+", group="Groups", - attrs=coordinate_unit_attrs, + attrs=attrs, ) if comm_rank == 0: @@ -274,14 +278,16 @@ def copy_object(src_obj, dst_obj, src_filename, prefix="", skip_datasets=False): nr_groups = dst_file["Header"].attrs["NumGroups_Total"][0] # Get number of groups in each chunk file - counts = [] + counts, shapes, dtypes, attrs = [], [], [], {} for i_file in range(nr_files): src_filename = output_filename.format(file_nr=i_file) with h5py.File(src_filename, "r") as src_file: if i_file == 0: props = list(src_file["Groups"].keys()) - shapes = [src_file[f"Groups/{prop}"].shape for prop in props] - dtypes = [src_file[f"Groups/{prop}"].dtype for prop in props] + for prop in props: + shapes.append(src_file[f"Groups/{prop}"].shape) + dtypes.append(src_file[f"Groups/{prop}"].dtype) + attrs[prop] = dict(src_file[f"Groups/{prop}"].attrs) counts.append(src_file["Header"].attrs["NumGroups_ThisFile"][0]) # Create virtual datasets @@ -300,6 +306,10 @@ def copy_object(src_obj, dst_obj, src_filename, prefix="", skip_datasets=False): rel_filename, f"Groups/{prop}", shape=(count, *shape[1:]) ) offset += count - dst_file.create_virtual_dataset(f"Groups/{prop}", layout, fillvalue=-999) + dset = dst_file.create_virtual_dataset( + f"Groups/{prop}", layout, fillvalue=-999 + ) + for k, v in attrs[prop].items(): + dset.attrs[k] = v print("Done") diff --git a/misc/test_fof_positions.py b/misc/test_fof_positions.py new file mode 100644 index 00000000..8fa6bb02 --- /dev/null +++ b/misc/test_fof_positions.py @@ -0,0 +1,72 @@ +#!/bin/env python + +""" +test_fof_positions.py + +This script tests the output of calculate_fof_positions.py. +Usage: + + python misc/calculate_fof_positions.py \ + --snap-filename=SNAPSHOT \ + --fof-filename=FOF + +where SNAPSHOT is the filename of the virtual snapshot file, +and FOF is the filename of the virtual FOF cataloge file. +""" + +import argparse + +import numpy as np +import swiftsimio as sw +import tqdm + +# Parse arguments +parser = argparse.ArgumentParser( + description="Script to calculate extent of FoF groups." +) +parser.add_argument( + "--snap-filename", + type=str, + required=True, + help="The filename of the virtual snapshot file", +) +parser.add_argument( + "--fof-filename", + type=str, + required=True, + help="The filename of the virtual FOF catalogue file", +) +parser.add_argument( + "--n-test", + type=int, + required=False, + default=-1, + help="The number of FOFs to check. If -1 all objects will be checked", +) +args = parser.parse_args() + +# Load the FOF file +fof = sw.load(args.fof_filename) +min_pos = fof.fof_groups.centres + fof.fof_groups.min_particle_position +max_pos = fof.fof_groups.centres + fof.fof_groups.max_particle_position + +n_test = args.n_test if args.n_test != -1 else fof.fof_groups.sizes.shape[0] +for i_fof in tqdm.tqdm(range(n_test)): + + # Create mask and load data + mask = sw.mask(args.snap_filename) + load_region = [ + [min_pos[i_fof, 0], max_pos[i_fof, 0]], + [min_pos[i_fof, 1], max_pos[i_fof, 1]], + [min_pos[i_fof, 2], max_pos[i_fof, 2]], + ] + mask.constrain_spatial(load_region) + snap = sw.load(args.snap_filename, mask=mask) + + # Check we loaded all the particles + n_total = 0 + for ptype in ["gas", "dark_matter", "stars", "black_holes"]: + fof_id = fof.fof_groups.group_ids[i_fof].value + part_fof_ids = getattr(snap, ptype).fofgroup_ids.value + n_total += np.sum(part_fof_ids == fof_id) + assert n_total == fof.fof_groups.sizes[i_fof].value, f"Failed for object {i_fof}" From ad1136f8862b4049b411cadc73f85054d99abc48 Mon Sep 17 00:00:00 2001 From: robjmcgibbon Date: Wed, 30 Apr 2025 16:36:36 +0100 Subject: [PATCH 08/11] Use absolute positions --- misc/calculate_fof_positions.py | 13 ++----------- misc/test_fof_positions.py | 4 ++-- 2 files changed, 4 insertions(+), 13 deletions(-) diff --git a/misc/calculate_fof_positions.py b/misc/calculate_fof_positions.py index e5674744..d96a1e62 100644 --- a/misc/calculate_fof_positions.py +++ b/misc/calculate_fof_positions.py @@ -169,15 +169,6 @@ def copy_object(src_obj, dst_obj, src_filename, prefix="", skip_datasets=False): part_pos = part_pos[mask] part_fof_ids = part_fof_ids[mask] - # Get the centre of the FOF each particle is part of - idx = psort.parallel_match(part_fof_ids, fof_group_ids, comm=comm) - assert np.all(idx != -1), "FOFs could not be found for some particles" - part_centre = psort.fetch_elements(fof_centres, idx, comm=comm) - - # Centre the particles - shift = (boxsize[None, :] / 2) - part_centre - part_pos = ((part_pos + shift) % boxsize[None, :]) - (boxsize[None, :] / 2) - # Count the number of particles found for each FOF unique_fof_ids, unique_counts = psort.parallel_unique( part_fof_ids, @@ -243,8 +234,8 @@ def copy_object(src_obj, dst_obj, src_filename, prefix="", skip_datasets=False): # Carry out some sanity checks assert np.all(total_part_counts == fof_sizes), "Not all particles found for some FOFs" -assert np.max(fof_min_pos) < 0 -assert np.min(fof_max_pos) > 0 +assert np.max(fof_min_pos) < np.inf +assert np.min(fof_max_pos) > -np.inf # Write data to file output_data = { diff --git a/misc/test_fof_positions.py b/misc/test_fof_positions.py index 8fa6bb02..d853e096 100644 --- a/misc/test_fof_positions.py +++ b/misc/test_fof_positions.py @@ -47,8 +47,8 @@ # Load the FOF file fof = sw.load(args.fof_filename) -min_pos = fof.fof_groups.centres + fof.fof_groups.min_particle_position -max_pos = fof.fof_groups.centres + fof.fof_groups.max_particle_position +min_pos = fof.fof_groups.min_particle_position +max_pos = fof.fof_groups.max_particle_position n_test = args.n_test if args.n_test != -1 else fof.fof_groups.sizes.shape[0] for i_fof in tqdm.tqdm(range(n_test)): From 8daef69a318062ccd7435c037311d69b2f20d28f Mon Sep 17 00:00:00 2001 From: robjmcgibbon Date: Wed, 30 Apr 2025 16:45:56 +0100 Subject: [PATCH 09/11] Proper no wrap --- misc/calculate_fof_positions.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/misc/calculate_fof_positions.py b/misc/calculate_fof_positions.py index d96a1e62..24f3e25f 100644 --- a/misc/calculate_fof_positions.py +++ b/misc/calculate_fof_positions.py @@ -169,6 +169,16 @@ def copy_object(src_obj, dst_obj, src_filename, prefix="", skip_datasets=False): part_pos = part_pos[mask] part_fof_ids = part_fof_ids[mask] + # Get the centre of the FOF each particle is part of + idx = psort.parallel_match(part_fof_ids, fof_group_ids, comm=comm) + assert np.all(idx != -1), "FOFs could not be found for some particles" + part_centre = psort.fetch_elements(fof_centres, idx, comm=comm) + + # Move particles outside the box if required + shift = (boxsize[None, :] / 2) - part_centre + part_pos = (part_pos + shift) % boxsize[None, :] + part_pos -= shift + # Count the number of particles found for each FOF unique_fof_ids, unique_counts = psort.parallel_unique( part_fof_ids, From 32070615b84ee6840c66a9fe6b2a213fc0442dc2 Mon Sep 17 00:00:00 2001 From: robjmcgibbon Date: Fri, 2 May 2025 13:34:48 +0100 Subject: [PATCH 10/11] FOF radii --- misc/calculate_fof_positions.py | 36 +++++++++++---------------------- misc/test_fof_positions.py | 4 ++-- 2 files changed, 14 insertions(+), 26 deletions(-) diff --git a/misc/calculate_fof_positions.py b/misc/calculate_fof_positions.py index 24f3e25f..ec448592 100644 --- a/misc/calculate_fof_positions.py +++ b/misc/calculate_fof_positions.py @@ -148,8 +148,7 @@ def copy_object(src_obj, dst_obj, src_filename, prefix="", skip_datasets=False): fof_centres = fof_file.read(f"Groups/Centres") # Initialise arrays for storing results -fof_min_pos = np.inf * np.ones_like(fof_centres) -fof_max_pos = -np.inf * np.ones_like(fof_centres) +fof_radius = np.zeros_like(fof_centres[:, 0]) total_part_counts = np.zeros_like(fof_sizes) # Open snapshot file @@ -174,10 +173,9 @@ def copy_object(src_obj, dst_obj, src_filename, prefix="", skip_datasets=False): assert np.all(idx != -1), "FOFs could not be found for some particles" part_centre = psort.fetch_elements(fof_centres, idx, comm=comm) - # Move particles outside the box if required + # Centre the particles shift = (boxsize[None, :] / 2) - part_centre - part_pos = (part_pos + shift) % boxsize[None, :] - part_pos -= shift + part_pos = ((part_pos + shift) % boxsize[None, :]) - (boxsize[None, :] / 2) # Count the number of particles found for each FOF unique_fof_ids, unique_counts = psort.parallel_unique( @@ -231,31 +229,21 @@ def copy_object(src_obj, dst_obj, src_filename, prefix="", skip_datasets=False): idx = psort.parallel_match(fof_group_ids, local_fof_ids, comm=comm) mask = idx != -1 - for i in range(3): - # Calculate max and minimum particle positions for each FOF ID - local_min_pos = np.minimum.reduceat(part_pos[:, i], reduce_idx) - local_max_pos = np.maximum.reduceat(part_pos[:, i], reduce_idx) + # Calculate max particle radius for each FOF ID + part_radius = np.sqrt(np.sum(part_pos**2, axis=1)) + local_radius = np.maximum.reduceat(part_radius, reduce_idx) - # Compare with values from other particle types - min_pos = psort.fetch_elements(local_min_pos, idx[mask], comm=comm) - max_pos = psort.fetch_elements(local_max_pos, idx[mask], comm=comm) - fof_min_pos[mask, i] = np.minimum(fof_min_pos[mask, i], min_pos) - fof_max_pos[mask, i] = np.maximum(fof_max_pos[mask, i], max_pos) + # Compare with values from other particle types + radius = psort.fetch_elements(local_radius, idx[mask], comm=comm) + fof_radius[mask] = np.maximum(fof_radius[mask], radius) # Carry out some sanity checks assert np.all(total_part_counts == fof_sizes), "Not all particles found for some FOFs" -assert np.max(fof_min_pos) < np.inf -assert np.min(fof_max_pos) > -np.inf +assert np.min(fof_radius) > 0 # Write data to file -output_data = { - "MinParticlePosition": fof_min_pos, - "MaxParticlePosition": fof_max_pos, -} -attrs = { - "MinParticlePosition": coordinate_unit_attrs, - "MaxParticlePosition": coordinate_unit_attrs, -} +output_data = {"Radii": fof_radius} +attrs = {"Radii": coordinate_unit_attrs} elements_per_file = fof_file.get_elements_per_file("Groups/GroupIDs") fof_file.write( output_data, diff --git a/misc/test_fof_positions.py b/misc/test_fof_positions.py index d853e096..065dbc1b 100644 --- a/misc/test_fof_positions.py +++ b/misc/test_fof_positions.py @@ -47,8 +47,8 @@ # Load the FOF file fof = sw.load(args.fof_filename) -min_pos = fof.fof_groups.min_particle_position -max_pos = fof.fof_groups.max_particle_position +min_pos = fof.fof_groups.centres - fof.fof_groups.radii[:, None] +max_pos = fof.fof_groups.centres + fof.fof_groups.radii[:, None] n_test = args.n_test if args.n_test != -1 else fof.fof_groups.sizes.shape[0] for i_fof in tqdm.tqdm(range(n_test)): From 03efb2d17babb65f2fe9c106d4d458f18c1b9260 Mon Sep 17 00:00:00 2001 From: robjmcgibbon Date: Fri, 2 May 2025 13:38:36 +0100 Subject: [PATCH 11/11] Update SOAP to read new values --- SOAP/core/combine_chunks.py | 42 ++++++++++++++++++++++++++++++++++++- SOAP/core/soap_args.py | 9 ++++++++ SOAP/property_table.py | 16 ++++++++++++-- parameter_files/README.md | 1 + 4 files changed, 65 insertions(+), 3 deletions(-) diff --git a/SOAP/core/combine_chunks.py b/SOAP/core/combine_chunks.py index 7b491f05..2af22ce8 100644 --- a/SOAP/core/combine_chunks.py +++ b/SOAP/core/combine_chunks.py @@ -175,7 +175,10 @@ def combine_chunks( # Add metadata for FOF properties fof_metadata = [] if (args.fof_group_filename != "") and (args.halo_format == "HBTplus"): - for fofkey in ["Centres", "Masses", "Sizes"]: + fof_keys = ["Centres", "Masses", "Sizes"] + if (args.fof_radius_filename != ""): + fof_keys.append('Radii') + for fofkey in fof_keys: prop = PropertyTable.full_property_list[f"FOF/{fofkey}"] name = f"InputHalos/{prop.name}" size = prop.shape @@ -488,6 +491,43 @@ def combine_chunks( comm=comm_world, ) + # Handle radius differently since SWIFT did not always output radius + # Assumes the FOF radii files are the same order as the other FOFs + read_radii = 'InputHalos/FOF/Radii' in [m[0] for m in fof_metadata] + if read_radii: + # Open file in parallel + pf = PartialFormatter() + fof_filename = pf.format( + args.fof_radius_filename, snap_nr=args.snapshot_nr, file_nr=None + ) + fof_file = phdf5.MultiFile( + fof_filename, + file_nr_attr=("Header", "NumFilesPerSnapshot"), + comm=comm_world, + ) + + fof_radii = np.zeros(keep.shape[0], dtype=np.float32) + fof_radii[keep] = psort.fetch_elements( + fof_file.read("Groups/Radii"), indices, comm=comm_world + ) + prop = PropertyTable.full_property_list[f"FOF/Radii"] + soap_radii_unit = cellgrid.get_unit(prop.unit) + physical = prop.output_physical + a_exponent = prop.a_scale_exponent + if not physical: + soap_radii_unit = soap_radii_unit * cellgrid.get_unit("a") ** a_exponent + fof_radii = (fof_radii * fof_com_unit).to(soap_radii_unit) + phdf5.collective_write( + outfile, + "InputHalos/FOF/Radii", + fof_radii, + create_dataset=False, + comm=comm_world, + ) + else: + if comm_world.Get_rank() == 0: + print("Groups/Radii not found in FOF files") + # Calculate the index in the SOAP output of the host field halo (VR) or the central subhalo of the host FOF group (HBTplus) if "SOAP/HostHaloIndex" in soap_props: with MPITimer("Calculate and write host index of each satellite", comm_world): diff --git a/SOAP/core/soap_args.py b/SOAP/core/soap_args.py index da86c8e9..e2bf61b0 100644 --- a/SOAP/core/soap_args.py +++ b/SOAP/core/soap_args.py @@ -122,6 +122,7 @@ def get_soap_args(comm): args.halo_basename = all_args["HaloFinder"]["filename"] args.halo_format = all_args["HaloFinder"]["type"] args.fof_group_filename = all_args["HaloFinder"].get("fof_filename", "") + args.fof_radius_filename = all_args["HaloFinder"].get("fof_radius_filename", "") args.output_file = all_args["HaloProperties"]["filename"] args.snapshot_nr = all_args["Parameters"]["snap_nr"] args.chunks = all_args["Parameters"]["chunks"] @@ -179,5 +180,13 @@ def get_soap_args(comm): if not os.path.exists(fof_filename): print(f"Could not find FOF group catalogue: {fof_filename}") comm.Abort(1) + if args.fof_radius_filename != "": + assert args.fof_group_filename != "" + fof_filename = args.fof_radius_filename.format( + snap_nr=args.snapshot_nr, file_nr=0 + ) + if not os.path.exists(fof_filename): + print(f"Could not find FOF radius catalogue: {fof_filename}") + comm.Abort(1) return args diff --git a/SOAP/property_table.py b/SOAP/property_table.py index 09e67be9..127f0505 100644 --- a/SOAP/property_table.py +++ b/SOAP/property_table.py @@ -4232,6 +4232,18 @@ class PropertyTable: output_physical=True, a_scale_exponent=None, ), + "FOF/Radii": Property( + name="FOF/Radii", + shape=1, + dtype=np.float32, + unit="snap_length", + description="Radius of the particle furthest from the FOF centre of mass. Zero for satellite and hostless subhalos. Missing for older runs.", + lossy_compression_filter="FMantissa9", + dmo_property=True, + particle_properties=[], + output_physical=False, + a_scale_exponent=1, + ), # SOAP properties "SOAP/SubhaloRankByBoundMass": Property( name="SOAP/SubhaloRankByBoundMass", @@ -4484,8 +4496,8 @@ def generate_tex_files(self, output_dir: str): prop = self.properties[prop_name] footnotes = self.get_footnotes(prop_name) prop_outputname = f"{prop['name']}{footnotes}" - if prop_outputname.split('/')[0] in ['HBTplus', 'VR', 'FOF']: - prop_outputname = 'InputHalos/' + prop_outputname + if prop_outputname.split("/")[0] in ["HBTplus", "VR", "FOF"]: + prop_outputname = "InputHalos/" + prop_outputname prop_outputname = word_wrap_name(prop_outputname) prop_shape = f'{prop["shape"]}' prop_dtype = prop["dtype"] diff --git a/parameter_files/README.md b/parameter_files/README.md index 30369915..feadc201 100644 --- a/parameter_files/README.md +++ b/parameter_files/README.md @@ -38,6 +38,7 @@ Settings for the halo finding algorithm and output file locations. - **type**: The subhalo finder being used. Possible options are `HBTplus`, `VR`, `Subfind`, and `Rockstar`. - **filename**: Template for input halo catalogue files. The format of this depends on the halo finder as they have different output structure. HBTplus example: `"{sim_dir}/{sim_name}/HBT/{snap_nr:03d}/SubSnap_{snap_nr:03d}"` - **fof_filename**: Template for FOF catalog files. Used for storing host FOF information for central subhalos. This is currently only supported for HBTplus +- **fof_radius_filename**: Template for FOF catalog files which contain the "Groups/Radii" dataset. These were produced by a post-processing script, and are missing from the main FOFs ### Group Membership