From 3216c3a9e6f27879655b4fe0ba70c37e51171e57 Mon Sep 17 00:00:00 2001
From: robjmcgibbon <robjmcgibbon@gmail.com>
Date: Tue, 29 Apr 2025 14:34:41 +0100
Subject: [PATCH 01/11] Script v1

---
 misc/fof_prop.py | 253 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 253 insertions(+)
 create mode 100644 misc/fof_prop.py

diff --git a/misc/fof_prop.py b/misc/fof_prop.py
new file mode 100644
index 00000000..e3288f08
--- /dev/null
+++ b/misc/fof_prop.py
@@ -0,0 +1,253 @@
+#!/bin/env python
+
+"""
+TODO: WRITE DOCSTRING
+convert_eagle.py
+
+This script converts EAGLE GADGET snapshots into SWIFT snapshots.
+Usage:
+
+  mpirun -- python convert_eagle.py \
+          --snapshot-basename=SNAPSHOT \
+          --fof-basename=FOF \
+          --output-basename=OUTPUT \
+          --membership-basename=MEMBERSHIP
+
+where SNAPSHOT is the EAGLE snapshot (use the particledata_*** files,
+since the normal snapshots don't store SubGroupNumber), and OUTPUT &
+MEMBERSHIP are the names of the output files. You must run with
+the same number of ranks as input files.
+"""
+
+import argparse
+import os
+
+from mpi4py import MPI
+
+comm = MPI.COMM_WORLD
+comm_rank = comm.Get_rank()
+comm_size = comm.Get_size()
+
+import h5py
+import numpy as np
+import unyt
+
+import virgo.mpi.parallel_sort as psort
+import virgo.mpi.parallel_hdf5 as phdf5
+from virgo.mpi.gather_array import gather_array
+
+# Parse arguments
+parser = argparse.ArgumentParser(
+    description=(
+        "Script to calculate extent of FoF groups."
+    )
+)
+parser.add_argument(
+    "--snap-basename",
+    type=str,
+    required=True,
+    help=(
+        "The basename for the snapshot files (the snapshot "
+        "name without the .{file_nr}.hdf5 suffix)"
+    ),
+)
+parser.add_argument(
+    "--fof-basename",
+    type=str,
+    required=True,
+    help="The basename for the output files",
+)
+parser.add_argument(
+    "--output-basename",
+    type=str,
+    required=True,
+    help=(
+        "The basename for the output files"
+    ),
+)
+parser.add_argument(
+    "--null-fof-id",
+    type=int,
+    required=False,
+    default=2147483647,
+    help=(
+        "The FOFGroupIDs of particles not in a FOF group"
+    ),
+)
+args = parser.parse_args()
+snap_filename = args.snap_basename + ".{file_nr}.hdf5"
+fof_filename = args.fof_basename + ".{file_nr}.hdf5"
+output_filename = args.output_basename + ".{file_nr}.hdf5"
+os.makedirs(os.path.dirname(output_filename), exist_ok=True)
+
+if comm_rank == 0:
+    with h5py.File(snap_filename.format(file_nr=0), "r") as file:
+        header = file['Header'].attrs
+        boxsize = header['BoxSize']
+        ptypes = np.where(header['TotalNumberOfParticles'] != 0)[0]
+        coordinate_unit_attrs = dict(file['PartType1/Coordinates'].attrs)
+
+
+    # TODO: Split across ranks
+    for i_file in range(4):
+        src_filename = fof_filename.format(file_nr=i_file)
+        dst_filename = output_filename.format(file_nr=i_file)
+
+        def copy_attrs(src_obj, dst_obj):
+            for key, val in src_obj.attrs.items():
+                dst_obj.attrs[key] = val
+
+        with h5py.File(src_filename, "r") as src_file, h5py.File(dst_filename, "w") as dst_file:
+
+            def create_virtual_datasets(src_group, dst_group, src_filename, prefix=""):
+                copy_attrs(src_group, dst_group)
+
+                for name, item in src_group.items():
+                    if isinstance(item, h5py.Dataset):
+                        shape = item.shape
+                        dtype = item.dtype
+                        layout = h5py.VirtualLayout(shape=shape, dtype=dtype)
+                        vsource = h5py.VirtualSource(src_filename, prefix + name, shape=shape)
+                        layout[...] = vsource
+                        vds = dst_group.create_virtual_dataset(name, layout)
+                        copy_attrs(item, vds)  # Copy dataset attributes
+                    elif isinstance(item, h5py.Group):
+                        new_group = dst_group.create_group(name)
+                        create_virtual_datasets(item, new_group, src_filename, prefix + name + "/")
+
+            create_virtual_datasets(src_file, dst_file, src_filename)
+
+        print("Virtual copy with attributes created successfully.")
+
+
+else:
+    boxsize = None
+    ptypes = None
+    coordinate_unit_attrs = None
+boxsize = comm.bcast(boxsize)
+ptypes = comm.bcast(ptypes)
+coordinate_unit_attrs = comm.bcast(coordinate_unit_attrs)
+
+# Load FOF catalogue
+fof_file = phdf5.MultiFile(
+    fof_filename, file_nr_attr=("Header", "NumFilesPerSnapshot"), comm=comm
+)
+fof_group_ids = fof_file.read(f"Groups/GroupIDs")
+fof_sizes = fof_file.read(f"Groups/Sizes")
+fof_centres = fof_file.read(f"Groups/Centres")
+
+# Initialise arrays for storing results
+fof_min_pos = np.inf * np.ones_like(fof_centres)
+fof_max_pos = -np.inf * np.ones_like(fof_centres)
+total_part_counts = np.zeros_like(fof_sizes)
+
+# Open snapshot file
+snap_file = phdf5.MultiFile(
+    snap_filename, file_nr_attr=("Header", "NumFilesPerSnapshot"), comm=comm
+)
+for ptype in ptypes:
+    if comm_rank == 0:
+        print(f'Processing PartType{ptype}')
+
+    # Load particle positions and their FOF IDs
+    part_pos = snap_file.read(f"PartType{ptype}/Coordinates")
+    part_fof_ids = snap_file.read(f"PartType{ptype}/FOFGroupIDs")
+
+    # Ignore particles which aren't part of a FOF group
+    mask = part_fof_ids != args.null_fof_id
+    part_pos = part_pos[mask]
+    part_fof_ids = part_fof_ids[mask]
+
+    # Get the centre of the FOF each particle is part of
+    idx = psort.parallel_match(part_fof_ids, fof_group_ids, comm=comm)
+    assert np.all(idx != -1), 'FOFs could not be found for some particles'
+    part_centre = psort.fetch_elements(fof_centres, idx, comm=comm)
+
+    # Centre the particles
+    shift = (boxsize[None, :] / 2) - part_centre
+    part_pos = ((part_pos + shift) % boxsize[None, :]) - (boxsize[None, :] / 2)
+
+    # Count the number of particles found for each FOF
+    unique_fof_ids, unique_counts = psort.parallel_unique(
+        part_fof_ids,
+        return_counts=True,
+        comm=comm,
+    )
+
+    # Keep track of number of particles in each FOF (to compare with "Groups/Sizes")
+    idx = psort.parallel_match(fof_group_ids, unique_fof_ids, comm=comm)
+    mask = idx != -1
+    total_part_counts[mask] += psort.fetch_elements(unique_counts, idx[mask], comm=comm)
+
+    # Sort based on unique_fof_ids
+    order = psort.parallel_sort(unique_fof_ids, return_index=True, comm=comm)
+    unique_counts = psort.fetch_elements(unique_counts, order, comm=comm)
+
+    # Determine how to partition the particles, so groups will not span ranks
+    gathered_counts = gather_array(unique_counts)
+    if comm_rank == 0:
+        n_part_target = np.sum(gathered_counts) / comm_size
+        cumsum = np.cumsum(gathered_counts)
+        ranks = np.floor(cumsum / n_part_target).astype(int)
+        ranks = np.clip(ranks, 0, comm_size-1)
+        n_part_per_rank = np.bincount(ranks, weights=gathered_counts, minlength=comm_size)
+        assert np.sum(n_part_per_rank) == np.sum(gathered_counts)
+    else:
+        n_part_per_rank = None
+    del gathered_counts
+    n_part_per_rank = comm.bcast(n_part_per_rank)
+
+    # Repartition particle data
+    part_fof_ids = psort.repartition(part_fof_ids, n_part_per_rank, comm=comm)
+    part_pos = psort.repartition(part_pos, n_part_per_rank, comm=comm)
+
+    # Sort particles by FOF ID
+    order = psort.parallel_sort(part_fof_ids, return_index=True, comm=comm)
+    part_pos = psort.fetch_elements(part_pos, order, comm=comm)
+
+    # Find boundaries where FOF ID changes
+    if part_fof_ids.shape[0] != 0:
+        change_idx = np.flatnonzero(np.diff(part_fof_ids)) + 1
+        reduce_idx = np.concatenate([np.array([0]), change_idx])
+    else:
+        reduce_idx = np.array([], dtype=int)
+
+    # Determine link to arrays from original FOF catalogue
+    local_fof_ids = part_fof_ids[reduce_idx]
+    idx = psort.parallel_match(fof_group_ids, local_fof_ids, comm=comm)
+    mask = idx != -1
+
+    for i in range(3):
+        # Calculate max and minimum particle positions for each FOF ID
+        local_min_pos = np.minimum.reduceat(part_pos[:, i], reduce_idx)
+        local_max_pos = np.maximum.reduceat(part_pos[:, i], reduce_idx)
+
+        # Compare with values from other particle types
+        min_pos = psort.fetch_elements(local_min_pos, idx[mask], comm=comm)
+        max_pos = psort.fetch_elements(local_max_pos, idx[mask], comm=comm)
+        fof_min_pos[mask, i] = np.minimum(fof_min_pos[mask, i], min_pos)
+        fof_max_pos[mask, i] = np.maximum(fof_max_pos[mask, i], max_pos)
+
+# Carry out some sanity checks
+assert np.all(total_part_counts == fof_sizes), 'Not all particles found for some FOFs'
+assert np.max(fof_min_pos) < 0
+assert np.min(fof_max_pos) > 0
+
+# Write data to file
+output_data = {
+    "MinParticlePosition": fof_min_pos,
+    "MaxParticlePosition": fof_max_pos,
+}
+elements_per_file = fof_file.get_elements_per_file("Groups/GroupIDs")
+fof_file.write(
+    output_data,
+    elements_per_file,
+    filenames=output_filename,
+    mode='r+',
+    group="Groups",
+    attrs=coordinate_unit_attrs,
+)
+
+if comm_rank == 0:
+    print('Done')
+

From 9e81ba5225d4aeb83e94ec633a01384a3dec043e Mon Sep 17 00:00:00 2001
From: robjmcgibbon <robjmcgibbon@gmail.com>
Date: Tue, 29 Apr 2025 15:00:40 +0100
Subject: [PATCH 02/11] Create files across ranks

---
 ...fof_prop.py => calculate_fof_positions.py} | 95 +++++++++++--------
 1 file changed, 54 insertions(+), 41 deletions(-)
 rename misc/{fof_prop.py => calculate_fof_positions.py} (75%)

diff --git a/misc/fof_prop.py b/misc/calculate_fof_positions.py
similarity index 75%
rename from misc/fof_prop.py
rename to misc/calculate_fof_positions.py
index e3288f08..7eace260 100644
--- a/misc/fof_prop.py
+++ b/misc/calculate_fof_positions.py
@@ -1,6 +1,11 @@
 #!/bin/env python
 
 """
+
+mpirun -np 8 python -u misc/fof_prop.py \
+    --snap-basename "${sim_dir}/snapshots/colibre_${snap_nr}/colibre_${snap_nr}" \
+    --fof-basename "${sim_dir}/fof/fof_output_${snap_nr}/fof_output_${snap_nr}" \
+    --output-basename "${output_dir}/${sim_name}/fof_extent_${snap_nr}/fof_extent_${snap_nr}"
 TODO: WRITE DOCSTRING
 convert_eagle.py
 
@@ -82,52 +87,60 @@
 
 if comm_rank == 0:
     with h5py.File(snap_filename.format(file_nr=0), "r") as file:
-        header = file['Header'].attrs
-        boxsize = header['BoxSize']
-        ptypes = np.where(header['TotalNumberOfParticles'] != 0)[0]
+        header = dict(file['Header'].attrs)
         coordinate_unit_attrs = dict(file['PartType1/Coordinates'].attrs)
-
-
-    # TODO: Split across ranks
-    for i_file in range(4):
-        src_filename = fof_filename.format(file_nr=i_file)
-        dst_filename = output_filename.format(file_nr=i_file)
-
-        def copy_attrs(src_obj, dst_obj):
-            for key, val in src_obj.attrs.items():
-                dst_obj.attrs[key] = val
-
-        with h5py.File(src_filename, "r") as src_file, h5py.File(dst_filename, "w") as dst_file:
-
-            def create_virtual_datasets(src_group, dst_group, src_filename, prefix=""):
-                copy_attrs(src_group, dst_group)
-
-                for name, item in src_group.items():
-                    if isinstance(item, h5py.Dataset):
-                        shape = item.shape
-                        dtype = item.dtype
-                        layout = h5py.VirtualLayout(shape=shape, dtype=dtype)
-                        vsource = h5py.VirtualSource(src_filename, prefix + name, shape=shape)
-                        layout[...] = vsource
-                        vds = dst_group.create_virtual_dataset(name, layout)
-                        copy_attrs(item, vds)  # Copy dataset attributes
-                    elif isinstance(item, h5py.Group):
-                        new_group = dst_group.create_group(name)
-                        create_virtual_datasets(item, new_group, src_filename, prefix + name + "/")
-
-            create_virtual_datasets(src_file, dst_file, src_filename)
-
-        print("Virtual copy with attributes created successfully.")
-
-
 else:
-    boxsize = None
-    ptypes = None
+    header = None
     coordinate_unit_attrs = None
-boxsize = comm.bcast(boxsize)
-ptypes = comm.bcast(ptypes)
+header = comm.bcast(header)
 coordinate_unit_attrs = comm.bcast(coordinate_unit_attrs)
 
+boxsize = header['BoxSize']
+ptypes = np.where(header['TotalNumberOfParticles'] != 0)[0]
+nr_files = header['NumFilesPerSnapshot'][0]
+
+# Assign files to ranks
+files_per_rank = np.zeros(comm_size, dtype=int)
+files_per_rank[:] = nr_files // comm_size
+remainder = nr_files % comm_size
+if remainder > 0:
+    step = max(nr_files // (remainder+1), 1)
+    for i in range(remainder):
+        files_per_rank[(i*step) % comm_size] += 1
+first_file = np.cumsum(files_per_rank) - files_per_rank
+assert sum(files_per_rank) == nr_files
+
+# Create output files
+for i_file in range(
+    first_file[comm_rank], first_file[comm_rank] + files_per_rank[comm_rank]
+):
+    src_filename = fof_filename.format(file_nr=i_file)
+    dst_filename = output_filename.format(file_nr=i_file)
+
+    def copy_attrs(src_obj, dst_obj):
+        for key, val in src_obj.attrs.items():
+            dst_obj.attrs[key] = val
+
+    with h5py.File(src_filename, "r") as src_file, h5py.File(dst_filename, "w") as dst_file:
+
+        def create_virtual_datasets(src_group, dst_group, src_filename, prefix=""):
+            copy_attrs(src_group, dst_group)
+
+            for name, item in src_group.items():
+                if isinstance(item, h5py.Dataset):
+                    shape = item.shape
+                    dtype = item.dtype
+                    layout = h5py.VirtualLayout(shape=shape, dtype=dtype)
+                    vsource = h5py.VirtualSource(src_filename, prefix + name, shape=shape)
+                    layout[...] = vsource
+                    vds = dst_group.create_virtual_dataset(name, layout)
+                    copy_attrs(item, vds)
+                elif isinstance(item, h5py.Group):
+                    new_group = dst_group.create_group(name)
+                    create_virtual_datasets(item, new_group, src_filename, prefix + name + "/")
+
+        create_virtual_datasets(src_file, dst_file, src_filename)
+
 # Load FOF catalogue
 fof_file = phdf5.MultiFile(
     fof_filename, file_nr_attr=("Header", "NumFilesPerSnapshot"), comm=comm

From c0ba8cf186ce29d60a0cfaf2cd1f1ac49971b946 Mon Sep 17 00:00:00 2001
From: robjmcgibbon <robjmcgibbon@gmail.com>
Date: Tue, 29 Apr 2025 15:05:10 +0100
Subject: [PATCH 03/11] Update docs

---
 misc/calculate_fof_positions.py | 24 +++++++++---------------
 1 file changed, 9 insertions(+), 15 deletions(-)

diff --git a/misc/calculate_fof_positions.py b/misc/calculate_fof_positions.py
index 7eace260..19c90cb5 100644
--- a/misc/calculate_fof_positions.py
+++ b/misc/calculate_fof_positions.py
@@ -1,27 +1,19 @@
 #!/bin/env python
 
 """
+calculate_fof_positions.py
 
-mpirun -np 8 python -u misc/fof_prop.py \
-    --snap-basename "${sim_dir}/snapshots/colibre_${snap_nr}/colibre_${snap_nr}" \
-    --fof-basename "${sim_dir}/fof/fof_output_${snap_nr}/fof_output_${snap_nr}" \
-    --output-basename "${output_dir}/${sim_name}/fof_extent_${snap_nr}/fof_extent_${snap_nr}"
-TODO: WRITE DOCSTRING
-convert_eagle.py
-
-This script converts EAGLE GADGET snapshots into SWIFT snapshots.
+This script calculates the maximum and minimum particles positions for each FOF.
 Usage:
 
-  mpirun -- python convert_eagle.py \
+  mpirun -- python misc/calculate_fof_positions.py \
           --snapshot-basename=SNAPSHOT \
           --fof-basename=FOF \
-          --output-basename=OUTPUT \
-          --membership-basename=MEMBERSHIP
+          --output-basename=OUTPUT
 
-where SNAPSHOT is the EAGLE snapshot (use the particledata_*** files,
-since the normal snapshots don't store SubGroupNumber), and OUTPUT &
-MEMBERSHIP are the names of the output files. You must run with
-the same number of ranks as input files.
+where SNAPSHOT is the basename of the snapshot files (the snapshot
+name without the .{file_nr}.hdf5 suffix), FOF is the basename of the
+fof catalogues, and OUTPUT is the basename of the output fof catalogues.
 """
 
 import argparse
@@ -261,6 +253,8 @@ def create_virtual_datasets(src_group, dst_group, src_filename, prefix=""):
     attrs=coordinate_unit_attrs,
 )
 
+# TODO: Create virtual files
+
 if comm_rank == 0:
     print('Done')
 

From 9dca6d0e3a039ba1c7c634360b3ffd7de0ae1fa0 Mon Sep 17 00:00:00 2001
From: robjmcgibbon <robjmcgibbon@gmail.com>
Date: Wed, 30 Apr 2025 12:03:02 +0100
Subject: [PATCH 04/11] Create virtual file

---
 misc/calculate_fof_positions.py | 86 +++++++++++++++++++++++----------
 1 file changed, 61 insertions(+), 25 deletions(-)

diff --git a/misc/calculate_fof_positions.py b/misc/calculate_fof_positions.py
index 19c90cb5..a19bf195 100644
--- a/misc/calculate_fof_positions.py
+++ b/misc/calculate_fof_positions.py
@@ -91,6 +91,27 @@
 ptypes = np.where(header['TotalNumberOfParticles'] != 0)[0]
 nr_files = header['NumFilesPerSnapshot'][0]
 
+def copy_attrs(src_obj, dst_obj):
+    for key, val in src_obj.attrs.items():
+        dst_obj.attrs[key] = val
+
+def copy_object(src_obj, dst_obj, src_filename, prefix="", skip_datasets=False):
+    copy_attrs(src_obj, dst_obj)
+    for name, item in src_obj.items():
+        if isinstance(item, h5py.Dataset):
+            if skip_datasets and (item.name != "/Header/PartTypeNames"):
+                continue
+            shape = item.shape
+            dtype = item.dtype
+            layout = h5py.VirtualLayout(shape=shape, dtype=dtype)
+            vsource = h5py.VirtualSource(src_filename, prefix + name, shape=shape)
+            layout[...] = vsource
+            vds = dst_obj.create_virtual_dataset(name, layout)
+            copy_attrs(item, vds)
+        elif isinstance(item, h5py.Group):
+            new_group = dst_obj.create_group(name)
+            copy_object(item, new_group, src_filename, prefix + name + "/", skip_datasets=skip_datasets)
+
 # Assign files to ranks
 files_per_rank = np.zeros(comm_size, dtype=int)
 files_per_rank[:] = nr_files // comm_size
@@ -108,30 +129,8 @@
 ):
     src_filename = fof_filename.format(file_nr=i_file)
     dst_filename = output_filename.format(file_nr=i_file)
-
-    def copy_attrs(src_obj, dst_obj):
-        for key, val in src_obj.attrs.items():
-            dst_obj.attrs[key] = val
-
     with h5py.File(src_filename, "r") as src_file, h5py.File(dst_filename, "w") as dst_file:
-
-        def create_virtual_datasets(src_group, dst_group, src_filename, prefix=""):
-            copy_attrs(src_group, dst_group)
-
-            for name, item in src_group.items():
-                if isinstance(item, h5py.Dataset):
-                    shape = item.shape
-                    dtype = item.dtype
-                    layout = h5py.VirtualLayout(shape=shape, dtype=dtype)
-                    vsource = h5py.VirtualSource(src_filename, prefix + name, shape=shape)
-                    layout[...] = vsource
-                    vds = dst_group.create_virtual_dataset(name, layout)
-                    copy_attrs(item, vds)
-                elif isinstance(item, h5py.Group):
-                    new_group = dst_group.create_group(name)
-                    create_virtual_datasets(item, new_group, src_filename, prefix + name + "/")
-
-        create_virtual_datasets(src_file, dst_file, src_filename)
+        copy_object(src_file, dst_file, src_filename)
 
 # Load FOF catalogue
 fof_file = phdf5.MultiFile(
@@ -253,8 +252,45 @@ def create_virtual_datasets(src_group, dst_group, src_filename, prefix=""):
     attrs=coordinate_unit_attrs,
 )
 
-# TODO: Create virtual files
-
 if comm_rank == 0:
+
+    # Create virtual file
+    dst_filename = args.output_basename + ".hdf5"
+    with h5py.File(dst_filename, "w") as dst_file:
+
+        # Copy original virtual file, skip datasets
+        src_filename = args.fof_basename + ".hdf5"
+        with h5py.File(src_filename, "r") as src_file:
+            copy_object(src_file, dst_file, src_filename, skip_datasets=True)
+        nr_groups = dst_file['Header'].attrs['NumGroups_Total'][0]
+
+        # Get number of groups in each chunk file
+        counts = []
+        for i_file in range(nr_files):
+            src_filename = output_filename.format(file_nr=i_file)
+            with h5py.File(src_filename, 'r') as src_file:
+                if i_file == 0:
+                    props = list(src_file['Groups'].keys())
+                    shapes = [src_file[f'Groups/{prop}'].shape for prop in props]
+                    dtypes = [src_file[f'Groups/{prop}'].dtype for prop in props]
+                counts.append(src_file['Header'].attrs['NumGroups_ThisFile'][0])
+
+        # Create virtual datasets
+        for prop, shape, dtype in zip(props, shapes, dtypes):
+            full_shape = (nr_groups, *shape[1:])
+            layout = h5py.VirtualLayout(shape=full_shape, dtype=dtype)
+
+            offset = 0
+            for i_file in range(nr_files):
+                src_filename = output_filename.format(file_nr=i_file)
+                count = counts[i_file]
+                layout[offset : offset + count] = h5py.VirtualSource(
+                    src_filename, f"Groups/{prop}", shape=(count, *shape[1:])
+                )
+                offset += count
+            dst_file.create_virtual_dataset(
+                f"Groups/{prop}", layout, fillvalue=-999
+            )
+
     print('Done')
 

From d589feb383a1c596cd33fb4a1b2a4efbeba9334e Mon Sep 17 00:00:00 2001
From: robjmcgibbon <robjmcgibbon@gmail.com>
Date: Wed, 30 Apr 2025 12:23:16 +0100
Subject: [PATCH 05/11] Use relative paths

---
 misc/calculate_fof_positions.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/misc/calculate_fof_positions.py b/misc/calculate_fof_positions.py
index a19bf195..3d2664c6 100644
--- a/misc/calculate_fof_positions.py
+++ b/misc/calculate_fof_positions.py
@@ -130,7 +130,8 @@ def copy_object(src_obj, dst_obj, src_filename, prefix="", skip_datasets=False):
     src_filename = fof_filename.format(file_nr=i_file)
     dst_filename = output_filename.format(file_nr=i_file)
     with h5py.File(src_filename, "r") as src_file, h5py.File(dst_filename, "w") as dst_file:
-        copy_object(src_file, dst_file, src_filename)
+        rel_filename = os.path.relpath(src_filename, os.path.dirname(dst_filename))
+        copy_object(src_file, dst_file, rel_filename)
 
 # Load FOF catalogue
 fof_file = phdf5.MultiFile(
@@ -283,9 +284,10 @@ def copy_object(src_obj, dst_obj, src_filename, prefix="", skip_datasets=False):
             offset = 0
             for i_file in range(nr_files):
                 src_filename = output_filename.format(file_nr=i_file)
+                rel_filename = os.path.relpath(src_filename, os.path.dirname(dst_filename))
                 count = counts[i_file]
                 layout[offset : offset + count] = h5py.VirtualSource(
-                    src_filename, f"Groups/{prop}", shape=(count, *shape[1:])
+                    rel_filename, f"Groups/{prop}", shape=(count, *shape[1:])
                 )
                 offset += count
             dst_file.create_virtual_dataset(

From a45f9ef79933c8d6df6f10606e8147708775a1f5 Mon Sep 17 00:00:00 2001
From: robjmcgibbon <robjmcgibbon@gmail.com>
Date: Wed, 30 Apr 2025 12:24:34 +0100
Subject: [PATCH 06/11] Format

---
 misc/calculate_fof_positions.py | 79 ++++++++++++++++++---------------
 1 file changed, 43 insertions(+), 36 deletions(-)

diff --git a/misc/calculate_fof_positions.py b/misc/calculate_fof_positions.py
index 3d2664c6..18926d8a 100644
--- a/misc/calculate_fof_positions.py
+++ b/misc/calculate_fof_positions.py
@@ -35,9 +35,7 @@
 
 # Parse arguments
 parser = argparse.ArgumentParser(
-    description=(
-        "Script to calculate extent of FoF groups."
-    )
+    description=("Script to calculate extent of FoF groups.")
 )
 parser.add_argument(
     "--snap-basename",
@@ -58,18 +56,14 @@
     "--output-basename",
     type=str,
     required=True,
-    help=(
-        "The basename for the output files"
-    ),
+    help=("The basename for the output files"),
 )
 parser.add_argument(
     "--null-fof-id",
     type=int,
     required=False,
     default=2147483647,
-    help=(
-        "The FOFGroupIDs of particles not in a FOF group"
-    ),
+    help=("The FOFGroupIDs of particles not in a FOF group"),
 )
 args = parser.parse_args()
 snap_filename = args.snap_basename + ".{file_nr}.hdf5"
@@ -79,22 +73,24 @@
 
 if comm_rank == 0:
     with h5py.File(snap_filename.format(file_nr=0), "r") as file:
-        header = dict(file['Header'].attrs)
-        coordinate_unit_attrs = dict(file['PartType1/Coordinates'].attrs)
+        header = dict(file["Header"].attrs)
+        coordinate_unit_attrs = dict(file["PartType1/Coordinates"].attrs)
 else:
     header = None
     coordinate_unit_attrs = None
 header = comm.bcast(header)
 coordinate_unit_attrs = comm.bcast(coordinate_unit_attrs)
 
-boxsize = header['BoxSize']
-ptypes = np.where(header['TotalNumberOfParticles'] != 0)[0]
-nr_files = header['NumFilesPerSnapshot'][0]
+boxsize = header["BoxSize"]
+ptypes = np.where(header["TotalNumberOfParticles"] != 0)[0]
+nr_files = header["NumFilesPerSnapshot"][0]
+
 
 def copy_attrs(src_obj, dst_obj):
     for key, val in src_obj.attrs.items():
         dst_obj.attrs[key] = val
 
+
 def copy_object(src_obj, dst_obj, src_filename, prefix="", skip_datasets=False):
     copy_attrs(src_obj, dst_obj)
     for name, item in src_obj.items():
@@ -110,16 +106,23 @@ def copy_object(src_obj, dst_obj, src_filename, prefix="", skip_datasets=False):
             copy_attrs(item, vds)
         elif isinstance(item, h5py.Group):
             new_group = dst_obj.create_group(name)
-            copy_object(item, new_group, src_filename, prefix + name + "/", skip_datasets=skip_datasets)
+            copy_object(
+                item,
+                new_group,
+                src_filename,
+                prefix + name + "/",
+                skip_datasets=skip_datasets,
+            )
+
 
 # Assign files to ranks
 files_per_rank = np.zeros(comm_size, dtype=int)
 files_per_rank[:] = nr_files // comm_size
 remainder = nr_files % comm_size
 if remainder > 0:
-    step = max(nr_files // (remainder+1), 1)
+    step = max(nr_files // (remainder + 1), 1)
     for i in range(remainder):
-        files_per_rank[(i*step) % comm_size] += 1
+        files_per_rank[(i * step) % comm_size] += 1
 first_file = np.cumsum(files_per_rank) - files_per_rank
 assert sum(files_per_rank) == nr_files
 
@@ -129,7 +132,10 @@ def copy_object(src_obj, dst_obj, src_filename, prefix="", skip_datasets=False):
 ):
     src_filename = fof_filename.format(file_nr=i_file)
     dst_filename = output_filename.format(file_nr=i_file)
-    with h5py.File(src_filename, "r") as src_file, h5py.File(dst_filename, "w") as dst_file:
+    with (
+        h5py.File(src_filename, "r") as src_file,
+        h5py.File(dst_filename, "w") as dst_file,
+    ):
         rel_filename = os.path.relpath(src_filename, os.path.dirname(dst_filename))
         copy_object(src_file, dst_file, rel_filename)
 
@@ -152,7 +158,7 @@ def copy_object(src_obj, dst_obj, src_filename, prefix="", skip_datasets=False):
 )
 for ptype in ptypes:
     if comm_rank == 0:
-        print(f'Processing PartType{ptype}')
+        print(f"Processing PartType{ptype}")
 
     # Load particle positions and their FOF IDs
     part_pos = snap_file.read(f"PartType{ptype}/Coordinates")
@@ -165,7 +171,7 @@ def copy_object(src_obj, dst_obj, src_filename, prefix="", skip_datasets=False):
 
     # Get the centre of the FOF each particle is part of
     idx = psort.parallel_match(part_fof_ids, fof_group_ids, comm=comm)
-    assert np.all(idx != -1), 'FOFs could not be found for some particles'
+    assert np.all(idx != -1), "FOFs could not be found for some particles"
     part_centre = psort.fetch_elements(fof_centres, idx, comm=comm)
 
     # Centre the particles
@@ -194,8 +200,10 @@ def copy_object(src_obj, dst_obj, src_filename, prefix="", skip_datasets=False):
         n_part_target = np.sum(gathered_counts) / comm_size
         cumsum = np.cumsum(gathered_counts)
         ranks = np.floor(cumsum / n_part_target).astype(int)
-        ranks = np.clip(ranks, 0, comm_size-1)
-        n_part_per_rank = np.bincount(ranks, weights=gathered_counts, minlength=comm_size)
+        ranks = np.clip(ranks, 0, comm_size - 1)
+        n_part_per_rank = np.bincount(
+            ranks, weights=gathered_counts, minlength=comm_size
+        )
         assert np.sum(n_part_per_rank) == np.sum(gathered_counts)
     else:
         n_part_per_rank = None
@@ -234,7 +242,7 @@ def copy_object(src_obj, dst_obj, src_filename, prefix="", skip_datasets=False):
         fof_max_pos[mask, i] = np.maximum(fof_max_pos[mask, i], max_pos)
 
 # Carry out some sanity checks
-assert np.all(total_part_counts == fof_sizes), 'Not all particles found for some FOFs'
+assert np.all(total_part_counts == fof_sizes), "Not all particles found for some FOFs"
 assert np.max(fof_min_pos) < 0
 assert np.min(fof_max_pos) > 0
 
@@ -248,7 +256,7 @@ def copy_object(src_obj, dst_obj, src_filename, prefix="", skip_datasets=False):
     output_data,
     elements_per_file,
     filenames=output_filename,
-    mode='r+',
+    mode="r+",
     group="Groups",
     attrs=coordinate_unit_attrs,
 )
@@ -263,18 +271,18 @@ def copy_object(src_obj, dst_obj, src_filename, prefix="", skip_datasets=False):
         src_filename = args.fof_basename + ".hdf5"
         with h5py.File(src_filename, "r") as src_file:
             copy_object(src_file, dst_file, src_filename, skip_datasets=True)
-        nr_groups = dst_file['Header'].attrs['NumGroups_Total'][0]
+        nr_groups = dst_file["Header"].attrs["NumGroups_Total"][0]
 
         # Get number of groups in each chunk file
         counts = []
         for i_file in range(nr_files):
             src_filename = output_filename.format(file_nr=i_file)
-            with h5py.File(src_filename, 'r') as src_file:
+            with h5py.File(src_filename, "r") as src_file:
                 if i_file == 0:
-                    props = list(src_file['Groups'].keys())
-                    shapes = [src_file[f'Groups/{prop}'].shape for prop in props]
-                    dtypes = [src_file[f'Groups/{prop}'].dtype for prop in props]
-                counts.append(src_file['Header'].attrs['NumGroups_ThisFile'][0])
+                    props = list(src_file["Groups"].keys())
+                    shapes = [src_file[f"Groups/{prop}"].shape for prop in props]
+                    dtypes = [src_file[f"Groups/{prop}"].dtype for prop in props]
+                counts.append(src_file["Header"].attrs["NumGroups_ThisFile"][0])
 
         # Create virtual datasets
         for prop, shape, dtype in zip(props, shapes, dtypes):
@@ -284,15 +292,14 @@ def copy_object(src_obj, dst_obj, src_filename, prefix="", skip_datasets=False):
             offset = 0
             for i_file in range(nr_files):
                 src_filename = output_filename.format(file_nr=i_file)
-                rel_filename = os.path.relpath(src_filename, os.path.dirname(dst_filename))
+                rel_filename = os.path.relpath(
+                    src_filename, os.path.dirname(dst_filename)
+                )
                 count = counts[i_file]
                 layout[offset : offset + count] = h5py.VirtualSource(
                     rel_filename, f"Groups/{prop}", shape=(count, *shape[1:])
                 )
                 offset += count
-            dst_file.create_virtual_dataset(
-                f"Groups/{prop}", layout, fillvalue=-999
-            )
-
-    print('Done')
+            dst_file.create_virtual_dataset(f"Groups/{prop}", layout, fillvalue=-999)
 
+    print("Done")

From 0a04c431b5577b3162aeae031867ef0914437e77 Mon Sep 17 00:00:00 2001
From: robjmcgibbon <robjmcgibbon@gmail.com>
Date: Wed, 30 Apr 2025 13:31:56 +0100
Subject: [PATCH 07/11] Add test

---
 misc/calculate_fof_positions.py | 22 +++++++---
 misc/test_fof_positions.py      | 72 +++++++++++++++++++++++++++++++++
 2 files changed, 88 insertions(+), 6 deletions(-)
 create mode 100644 misc/test_fof_positions.py

diff --git a/misc/calculate_fof_positions.py b/misc/calculate_fof_positions.py
index 18926d8a..e5674744 100644
--- a/misc/calculate_fof_positions.py
+++ b/misc/calculate_fof_positions.py
@@ -7,7 +7,7 @@
 Usage:
 
   mpirun -- python misc/calculate_fof_positions.py \
-          --snapshot-basename=SNAPSHOT \
+          --snap-basename=SNAPSHOT \
           --fof-basename=FOF \
           --output-basename=OUTPUT
 
@@ -251,6 +251,10 @@ def copy_object(src_obj, dst_obj, src_filename, prefix="", skip_datasets=False):
     "MinParticlePosition": fof_min_pos,
     "MaxParticlePosition": fof_max_pos,
 }
+attrs = {
+    "MinParticlePosition": coordinate_unit_attrs,
+    "MaxParticlePosition": coordinate_unit_attrs,
+}
 elements_per_file = fof_file.get_elements_per_file("Groups/GroupIDs")
 fof_file.write(
     output_data,
@@ -258,7 +262,7 @@ def copy_object(src_obj, dst_obj, src_filename, prefix="", skip_datasets=False):
     filenames=output_filename,
     mode="r+",
     group="Groups",
-    attrs=coordinate_unit_attrs,
+    attrs=attrs,
 )
 
 if comm_rank == 0:
@@ -274,14 +278,16 @@ def copy_object(src_obj, dst_obj, src_filename, prefix="", skip_datasets=False):
         nr_groups = dst_file["Header"].attrs["NumGroups_Total"][0]
 
         # Get number of groups in each chunk file
-        counts = []
+        counts, shapes, dtypes, attrs = [], [], [], {}
         for i_file in range(nr_files):
             src_filename = output_filename.format(file_nr=i_file)
             with h5py.File(src_filename, "r") as src_file:
                 if i_file == 0:
                     props = list(src_file["Groups"].keys())
-                    shapes = [src_file[f"Groups/{prop}"].shape for prop in props]
-                    dtypes = [src_file[f"Groups/{prop}"].dtype for prop in props]
+                    for prop in props:
+                        shapes.append(src_file[f"Groups/{prop}"].shape)
+                        dtypes.append(src_file[f"Groups/{prop}"].dtype)
+                        attrs[prop] = dict(src_file[f"Groups/{prop}"].attrs)
                 counts.append(src_file["Header"].attrs["NumGroups_ThisFile"][0])
 
         # Create virtual datasets
@@ -300,6 +306,10 @@ def copy_object(src_obj, dst_obj, src_filename, prefix="", skip_datasets=False):
                     rel_filename, f"Groups/{prop}", shape=(count, *shape[1:])
                 )
                 offset += count
-            dst_file.create_virtual_dataset(f"Groups/{prop}", layout, fillvalue=-999)
+            dset = dst_file.create_virtual_dataset(
+                f"Groups/{prop}", layout, fillvalue=-999
+            )
+            for k, v in attrs[prop].items():
+                dset.attrs[k] = v
 
     print("Done")
diff --git a/misc/test_fof_positions.py b/misc/test_fof_positions.py
new file mode 100644
index 00000000..8fa6bb02
--- /dev/null
+++ b/misc/test_fof_positions.py
@@ -0,0 +1,72 @@
+#!/bin/env python
+
+"""
+test_fof_positions.py
+
+This script tests the output of calculate_fof_positions.py.
+Usage:
+
+  python misc/calculate_fof_positions.py \
+      --snap-filename=SNAPSHOT \
+      --fof-filename=FOF
+
+where SNAPSHOT is the filename of the virtual snapshot file,
+and FOF is the filename of the virtual FOF cataloge file.
+"""
+
+import argparse
+
+import numpy as np
+import swiftsimio as sw
+import tqdm
+
+# Parse arguments
+parser = argparse.ArgumentParser(
+    description="Script to calculate extent of FoF groups."
+)
+parser.add_argument(
+    "--snap-filename",
+    type=str,
+    required=True,
+    help="The filename of the virtual snapshot file",
+)
+parser.add_argument(
+    "--fof-filename",
+    type=str,
+    required=True,
+    help="The filename of the virtual FOF catalogue file",
+)
+parser.add_argument(
+    "--n-test",
+    type=int,
+    required=False,
+    default=-1,
+    help="The number of FOFs to check. If -1 all objects will be checked",
+)
+args = parser.parse_args()
+
+# Load the FOF file
+fof = sw.load(args.fof_filename)
+min_pos = fof.fof_groups.centres + fof.fof_groups.min_particle_position
+max_pos = fof.fof_groups.centres + fof.fof_groups.max_particle_position
+
+n_test = args.n_test if args.n_test != -1 else fof.fof_groups.sizes.shape[0]
+for i_fof in tqdm.tqdm(range(n_test)):
+
+    # Create mask and load data
+    mask = sw.mask(args.snap_filename)
+    load_region = [
+        [min_pos[i_fof, 0], max_pos[i_fof, 0]],
+        [min_pos[i_fof, 1], max_pos[i_fof, 1]],
+        [min_pos[i_fof, 2], max_pos[i_fof, 2]],
+    ]
+    mask.constrain_spatial(load_region)
+    snap = sw.load(args.snap_filename, mask=mask)
+
+    # Check we loaded all the particles
+    n_total = 0
+    for ptype in ["gas", "dark_matter", "stars", "black_holes"]:
+        fof_id = fof.fof_groups.group_ids[i_fof].value
+        part_fof_ids = getattr(snap, ptype).fofgroup_ids.value
+        n_total += np.sum(part_fof_ids == fof_id)
+    assert n_total == fof.fof_groups.sizes[i_fof].value, f"Failed for object {i_fof}"

From ad1136f8862b4049b411cadc73f85054d99abc48 Mon Sep 17 00:00:00 2001
From: robjmcgibbon <robjmcgibbon@gmail.com>
Date: Wed, 30 Apr 2025 16:36:36 +0100
Subject: [PATCH 08/11] Use absolute positions

---
 misc/calculate_fof_positions.py | 13 ++-----------
 misc/test_fof_positions.py      |  4 ++--
 2 files changed, 4 insertions(+), 13 deletions(-)

diff --git a/misc/calculate_fof_positions.py b/misc/calculate_fof_positions.py
index e5674744..d96a1e62 100644
--- a/misc/calculate_fof_positions.py
+++ b/misc/calculate_fof_positions.py
@@ -169,15 +169,6 @@ def copy_object(src_obj, dst_obj, src_filename, prefix="", skip_datasets=False):
     part_pos = part_pos[mask]
     part_fof_ids = part_fof_ids[mask]
 
-    # Get the centre of the FOF each particle is part of
-    idx = psort.parallel_match(part_fof_ids, fof_group_ids, comm=comm)
-    assert np.all(idx != -1), "FOFs could not be found for some particles"
-    part_centre = psort.fetch_elements(fof_centres, idx, comm=comm)
-
-    # Centre the particles
-    shift = (boxsize[None, :] / 2) - part_centre
-    part_pos = ((part_pos + shift) % boxsize[None, :]) - (boxsize[None, :] / 2)
-
     # Count the number of particles found for each FOF
     unique_fof_ids, unique_counts = psort.parallel_unique(
         part_fof_ids,
@@ -243,8 +234,8 @@ def copy_object(src_obj, dst_obj, src_filename, prefix="", skip_datasets=False):
 
 # Carry out some sanity checks
 assert np.all(total_part_counts == fof_sizes), "Not all particles found for some FOFs"
-assert np.max(fof_min_pos) < 0
-assert np.min(fof_max_pos) > 0
+assert np.max(fof_min_pos) < np.inf
+assert np.min(fof_max_pos) > -np.inf
 
 # Write data to file
 output_data = {
diff --git a/misc/test_fof_positions.py b/misc/test_fof_positions.py
index 8fa6bb02..d853e096 100644
--- a/misc/test_fof_positions.py
+++ b/misc/test_fof_positions.py
@@ -47,8 +47,8 @@
 
 # Load the FOF file
 fof = sw.load(args.fof_filename)
-min_pos = fof.fof_groups.centres + fof.fof_groups.min_particle_position
-max_pos = fof.fof_groups.centres + fof.fof_groups.max_particle_position
+min_pos = fof.fof_groups.min_particle_position
+max_pos = fof.fof_groups.max_particle_position
 
 n_test = args.n_test if args.n_test != -1 else fof.fof_groups.sizes.shape[0]
 for i_fof in tqdm.tqdm(range(n_test)):

From 8daef69a318062ccd7435c037311d69b2f20d28f Mon Sep 17 00:00:00 2001
From: robjmcgibbon <robjmcgibbon@gmail.com>
Date: Wed, 30 Apr 2025 16:45:56 +0100
Subject: [PATCH 09/11] Proper no wrap

---
 misc/calculate_fof_positions.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/misc/calculate_fof_positions.py b/misc/calculate_fof_positions.py
index d96a1e62..24f3e25f 100644
--- a/misc/calculate_fof_positions.py
+++ b/misc/calculate_fof_positions.py
@@ -169,6 +169,16 @@ def copy_object(src_obj, dst_obj, src_filename, prefix="", skip_datasets=False):
     part_pos = part_pos[mask]
     part_fof_ids = part_fof_ids[mask]
 
+    # Get the centre of the FOF each particle is part of
+    idx = psort.parallel_match(part_fof_ids, fof_group_ids, comm=comm)
+    assert np.all(idx != -1), "FOFs could not be found for some particles"
+    part_centre = psort.fetch_elements(fof_centres, idx, comm=comm)
+
+    # Move particles outside the box if required
+    shift = (boxsize[None, :] / 2) - part_centre
+    part_pos = (part_pos + shift) % boxsize[None, :]
+    part_pos -= shift
+
     # Count the number of particles found for each FOF
     unique_fof_ids, unique_counts = psort.parallel_unique(
         part_fof_ids,

From 32070615b84ee6840c66a9fe6b2a213fc0442dc2 Mon Sep 17 00:00:00 2001
From: robjmcgibbon <robjmcgibbon@gmail.com>
Date: Fri, 2 May 2025 13:34:48 +0100
Subject: [PATCH 10/11] FOF radii

---
 misc/calculate_fof_positions.py | 36 +++++++++++----------------------
 misc/test_fof_positions.py      |  4 ++--
 2 files changed, 14 insertions(+), 26 deletions(-)

diff --git a/misc/calculate_fof_positions.py b/misc/calculate_fof_positions.py
index 24f3e25f..ec448592 100644
--- a/misc/calculate_fof_positions.py
+++ b/misc/calculate_fof_positions.py
@@ -148,8 +148,7 @@ def copy_object(src_obj, dst_obj, src_filename, prefix="", skip_datasets=False):
 fof_centres = fof_file.read(f"Groups/Centres")
 
 # Initialise arrays for storing results
-fof_min_pos = np.inf * np.ones_like(fof_centres)
-fof_max_pos = -np.inf * np.ones_like(fof_centres)
+fof_radius = np.zeros_like(fof_centres[:, 0])
 total_part_counts = np.zeros_like(fof_sizes)
 
 # Open snapshot file
@@ -174,10 +173,9 @@ def copy_object(src_obj, dst_obj, src_filename, prefix="", skip_datasets=False):
     assert np.all(idx != -1), "FOFs could not be found for some particles"
     part_centre = psort.fetch_elements(fof_centres, idx, comm=comm)
 
-    # Move particles outside the box if required
+    # Centre the particles
     shift = (boxsize[None, :] / 2) - part_centre
-    part_pos = (part_pos + shift) % boxsize[None, :]
-    part_pos -= shift
+    part_pos = ((part_pos + shift) % boxsize[None, :]) - (boxsize[None, :] / 2)
 
     # Count the number of particles found for each FOF
     unique_fof_ids, unique_counts = psort.parallel_unique(
@@ -231,31 +229,21 @@ def copy_object(src_obj, dst_obj, src_filename, prefix="", skip_datasets=False):
     idx = psort.parallel_match(fof_group_ids, local_fof_ids, comm=comm)
     mask = idx != -1
 
-    for i in range(3):
-        # Calculate max and minimum particle positions for each FOF ID
-        local_min_pos = np.minimum.reduceat(part_pos[:, i], reduce_idx)
-        local_max_pos = np.maximum.reduceat(part_pos[:, i], reduce_idx)
+    # Calculate max particle radius for each FOF ID
+    part_radius = np.sqrt(np.sum(part_pos**2, axis=1))
+    local_radius = np.maximum.reduceat(part_radius, reduce_idx)
 
-        # Compare with values from other particle types
-        min_pos = psort.fetch_elements(local_min_pos, idx[mask], comm=comm)
-        max_pos = psort.fetch_elements(local_max_pos, idx[mask], comm=comm)
-        fof_min_pos[mask, i] = np.minimum(fof_min_pos[mask, i], min_pos)
-        fof_max_pos[mask, i] = np.maximum(fof_max_pos[mask, i], max_pos)
+    # Compare with values from other particle types
+    radius = psort.fetch_elements(local_radius, idx[mask], comm=comm)
+    fof_radius[mask] = np.maximum(fof_radius[mask], radius)
 
 # Carry out some sanity checks
 assert np.all(total_part_counts == fof_sizes), "Not all particles found for some FOFs"
-assert np.max(fof_min_pos) < np.inf
-assert np.min(fof_max_pos) > -np.inf
+assert np.min(fof_radius) > 0
 
 # Write data to file
-output_data = {
-    "MinParticlePosition": fof_min_pos,
-    "MaxParticlePosition": fof_max_pos,
-}
-attrs = {
-    "MinParticlePosition": coordinate_unit_attrs,
-    "MaxParticlePosition": coordinate_unit_attrs,
-}
+output_data = {"Radii": fof_radius}
+attrs = {"Radii": coordinate_unit_attrs}
 elements_per_file = fof_file.get_elements_per_file("Groups/GroupIDs")
 fof_file.write(
     output_data,
diff --git a/misc/test_fof_positions.py b/misc/test_fof_positions.py
index d853e096..065dbc1b 100644
--- a/misc/test_fof_positions.py
+++ b/misc/test_fof_positions.py
@@ -47,8 +47,8 @@
 
 # Load the FOF file
 fof = sw.load(args.fof_filename)
-min_pos = fof.fof_groups.min_particle_position
-max_pos = fof.fof_groups.max_particle_position
+min_pos = fof.fof_groups.centres - fof.fof_groups.radii[:, None]
+max_pos = fof.fof_groups.centres + fof.fof_groups.radii[:, None]
 
 n_test = args.n_test if args.n_test != -1 else fof.fof_groups.sizes.shape[0]
 for i_fof in tqdm.tqdm(range(n_test)):

From 03efb2d17babb65f2fe9c106d4d458f18c1b9260 Mon Sep 17 00:00:00 2001
From: robjmcgibbon <robjmcgibbon@gmail.com>
Date: Fri, 2 May 2025 13:38:36 +0100
Subject: [PATCH 11/11] Update SOAP to read new values

---
 SOAP/core/combine_chunks.py | 42 ++++++++++++++++++++++++++++++++++++-
 SOAP/core/soap_args.py      |  9 ++++++++
 SOAP/property_table.py      | 16 ++++++++++++--
 parameter_files/README.md   |  1 +
 4 files changed, 65 insertions(+), 3 deletions(-)

diff --git a/SOAP/core/combine_chunks.py b/SOAP/core/combine_chunks.py
index 7b491f05..2af22ce8 100644
--- a/SOAP/core/combine_chunks.py
+++ b/SOAP/core/combine_chunks.py
@@ -175,7 +175,10 @@ def combine_chunks(
     # Add metadata for FOF properties
     fof_metadata = []
     if (args.fof_group_filename != "") and (args.halo_format == "HBTplus"):
-        for fofkey in ["Centres", "Masses", "Sizes"]:
+        fof_keys = ["Centres", "Masses", "Sizes"]
+        if (args.fof_radius_filename != ""):
+            fof_keys.append('Radii')
+        for fofkey in fof_keys:
             prop = PropertyTable.full_property_list[f"FOF/{fofkey}"]
             name = f"InputHalos/{prop.name}"
             size = prop.shape
@@ -488,6 +491,43 @@ def combine_chunks(
             comm=comm_world,
         )
 
+        # Handle radius differently since SWIFT did not always output radius
+        # Assumes the FOF radii files are the same order as the other FOFs
+        read_radii = 'InputHalos/FOF/Radii' in [m[0] for m in fof_metadata]
+        if read_radii:
+            # Open file in parallel
+            pf = PartialFormatter()
+            fof_filename = pf.format(
+                args.fof_radius_filename, snap_nr=args.snapshot_nr, file_nr=None
+            )
+            fof_file = phdf5.MultiFile(
+                fof_filename,
+                file_nr_attr=("Header", "NumFilesPerSnapshot"),
+                comm=comm_world,
+            )
+
+            fof_radii = np.zeros(keep.shape[0], dtype=np.float32)
+            fof_radii[keep] = psort.fetch_elements(
+                fof_file.read("Groups/Radii"), indices, comm=comm_world
+            )
+            prop = PropertyTable.full_property_list[f"FOF/Radii"]
+            soap_radii_unit = cellgrid.get_unit(prop.unit)
+            physical = prop.output_physical
+            a_exponent = prop.a_scale_exponent
+            if not physical:
+                soap_radii_unit = soap_radii_unit * cellgrid.get_unit("a") ** a_exponent
+            fof_radii = (fof_radii * fof_com_unit).to(soap_radii_unit)
+            phdf5.collective_write(
+                outfile,
+                "InputHalos/FOF/Radii",
+                fof_radii,
+                create_dataset=False,
+                comm=comm_world,
+            )
+        else:
+            if comm_world.Get_rank() == 0:
+                print("Groups/Radii not found in FOF files")
+
     # Calculate the index in the SOAP output of the host field halo (VR) or the central subhalo of the host FOF group (HBTplus)
     if "SOAP/HostHaloIndex" in soap_props:
         with MPITimer("Calculate and write host index of each satellite", comm_world):
diff --git a/SOAP/core/soap_args.py b/SOAP/core/soap_args.py
index da86c8e9..e2bf61b0 100644
--- a/SOAP/core/soap_args.py
+++ b/SOAP/core/soap_args.py
@@ -122,6 +122,7 @@ def get_soap_args(comm):
     args.halo_basename = all_args["HaloFinder"]["filename"]
     args.halo_format = all_args["HaloFinder"]["type"]
     args.fof_group_filename = all_args["HaloFinder"].get("fof_filename", "")
+    args.fof_radius_filename = all_args["HaloFinder"].get("fof_radius_filename", "")
     args.output_file = all_args["HaloProperties"]["filename"]
     args.snapshot_nr = all_args["Parameters"]["snap_nr"]
     args.chunks = all_args["Parameters"]["chunks"]
@@ -179,5 +180,13 @@ def get_soap_args(comm):
             if not os.path.exists(fof_filename):
                 print(f"Could not find FOF group catalogue: {fof_filename}")
                 comm.Abort(1)
+        if args.fof_radius_filename != "":
+            assert args.fof_group_filename != ""
+            fof_filename = args.fof_radius_filename.format(
+                snap_nr=args.snapshot_nr, file_nr=0
+            )
+            if not os.path.exists(fof_filename):
+                print(f"Could not find FOF radius catalogue: {fof_filename}")
+                comm.Abort(1)
 
     return args
diff --git a/SOAP/property_table.py b/SOAP/property_table.py
index 09e67be9..127f0505 100644
--- a/SOAP/property_table.py
+++ b/SOAP/property_table.py
@@ -4232,6 +4232,18 @@ class PropertyTable:
             output_physical=True,
             a_scale_exponent=None,
         ),
+        "FOF/Radii": Property(
+            name="FOF/Radii",
+            shape=1,
+            dtype=np.float32,
+            unit="snap_length",
+            description="Radius of the particle furthest from the FOF centre of mass. Zero for satellite and hostless subhalos. Missing for older runs.",
+            lossy_compression_filter="FMantissa9",
+            dmo_property=True,
+            particle_properties=[],
+            output_physical=False,
+            a_scale_exponent=1,
+        ),
         # SOAP properties
         "SOAP/SubhaloRankByBoundMass": Property(
             name="SOAP/SubhaloRankByBoundMass",
@@ -4484,8 +4496,8 @@ def generate_tex_files(self, output_dir: str):
             prop = self.properties[prop_name]
             footnotes = self.get_footnotes(prop_name)
             prop_outputname = f"{prop['name']}{footnotes}"
-            if prop_outputname.split('/')[0] in ['HBTplus', 'VR', 'FOF']:
-                prop_outputname = 'InputHalos/' + prop_outputname
+            if prop_outputname.split("/")[0] in ["HBTplus", "VR", "FOF"]:
+                prop_outputname = "InputHalos/" + prop_outputname
             prop_outputname = word_wrap_name(prop_outputname)
             prop_shape = f'{prop["shape"]}'
             prop_dtype = prop["dtype"]
diff --git a/parameter_files/README.md b/parameter_files/README.md
index 30369915..feadc201 100644
--- a/parameter_files/README.md
+++ b/parameter_files/README.md
@@ -38,6 +38,7 @@ Settings for the halo finding algorithm and output file locations.
 - **type**: The subhalo finder being used. Possible options are `HBTplus`, `VR`, `Subfind`, and `Rockstar`.
 - **filename**: Template for input halo catalogue files. The format of this depends on the halo finder as they have different output structure. HBTplus example: `"{sim_dir}/{sim_name}/HBT/{snap_nr:03d}/SubSnap_{snap_nr:03d}"`
 - **fof_filename**: Template for FOF catalog files. Used for storing host FOF information for central subhalos. This is currently only supported for HBTplus
+- **fof_radius_filename**: Template for FOF catalog files which contain the "Groups/Radii" dataset. These were produced by a post-processing script, and are missing from the main FOFs
 
 ### Group Membership