From daa8ce1c6e9df2de80a9a751e8a96d0a9869efc5 Mon Sep 17 00:00:00 2001
From: robjmcgibbon <robjmcgibbon@gmail.com>
Date: Wed, 7 Jan 2026 11:28:41 +0000
Subject: [PATCH 1/9] Virtual v1

---
 compression/make_virtual_snapshot.py | 37 +++++++++++++++-------------
 1 file changed, 20 insertions(+), 17 deletions(-)

diff --git a/compression/make_virtual_snapshot.py b/compression/make_virtual_snapshot.py
index 5f6e37f8..dc17eca1 100644
--- a/compression/make_virtual_snapshot.py
+++ b/compression/make_virtual_snapshot.py
@@ -5,15 +5,24 @@
 import shutil
 
 
-def make_virtual_snapshot(snapshot, membership, output_file, snap_nr):
+def make_virtual_snapshot(snapshot, auxilary_snapshots, output_file, snap_nr):
     """
-    Given a FLAMINGO snapshot and group membership files,
-    create a new virtual snapshot with group info.
+    Given a snapshot and auxilary files, create
+    a new virtual snapshot with all datasets combine.
     """
 
-    # Check which datasets exist in the membership files
+    # Copy the input virtual snapshot to the output
+    shutil.copyfile(snapshot, output_file)
+
+    # Open the output file
+    outfile = h5py.File(output_file, "r+")
+
+    # TODO: Loop
+    auxilary = auxilary_snapshots[0]
+
+    # Check which datasets exist in the auxilary files
     # and store their attributes and datatype
-    filename = membership.format(file_nr=0, snap_nr=snap_nr)
+    filename = auxilary.format(file_nr=0, snap_nr=snap_nr)
     dset_attrs = {}
     dset_dtype = {}
     with h5py.File(filename, "r") as infile:
@@ -26,32 +35,26 @@ def make_virtual_snapshot(snapshot, membership, output_file, snap_nr):
                 attrs = dict(infile[f"PartType{ptype}/{dset}"].attrs)
                 dtype = infile[f"PartType{ptype}/{dset}"].dtype
 
-                # Some membership files are missing these attributes
+                # Some auxilary files are missing these attributes
                 if not "Value stored as physical" in attrs:
                     print(f"Setting comoving attrs for PartType{ptype}/{dset}")
                     attrs["Value stored as physical"] = [1]
                     attrs["Property can be converted to comoving"] = [0]
 
-                # Add a flag that these are stored in the membership files
+                # Add a flag that these datasets are stored in the auxilary files
                 attrs["Auxilary file"] = [1]
 
                 # Store the values we need for later
                 dset_attrs[f"PartType{ptype}"][dset] = attrs
                 dset_dtype[f"PartType{ptype}"][dset] = dtype
 
-    # Copy the input virtual snapshot to the output
-    shutil.copyfile(snapshot, output_file)
-
-    # Open the output file
-    outfile = h5py.File(output_file, "r+")
-
-    # Loop over input membership files to get dataset shapes
+    # Loop over input auxilary files to get dataset shapes
     file_nr = 0
     filenames = []
     shapes = []
     counts = []
     while True:
-        filename = membership.format(file_nr=file_nr, snap_nr=snap_nr)
+        filename = auxilary.format(file_nr=file_nr, snap_nr=snap_nr)
         if os.path.exists(filename):
             filenames.append(filename)
             with h5py.File(filename, "r") as infile:
@@ -73,7 +76,7 @@ def make_virtual_snapshot(snapshot, membership, output_file, snap_nr):
             break
         file_nr += 1
     if file_nr == 0:
-        raise IOError(f"Failed to find files matching: {membership}")
+        raise IOError(f"Failed to find files matching: {auxilary}")
 
     # Loop over particle types in the output
     for ptype in range(7):
@@ -174,7 +177,7 @@ def make_virtual_snapshot(snapshot, membership, output_file, snap_nr):
     output_file = args.output_file.format(snap_nr=args.snap_nr)
 
     # Make a new virtual snapshot with group info
-    make_virtual_snapshot(virtual_snapshot, args.membership, output_file, args.snap_nr)
+    make_virtual_snapshot(virtual_snapshot, [args.membership], output_file, args.snap_nr)
 
     # Set file paths for datasets
     abs_snapshot_dir = os.path.abspath(os.path.dirname(virtual_snapshot))

From 688baad5cf99671f109dba637b3de48121ee9b5d Mon Sep 17 00:00:00 2001
From: robjmcgibbon <robjmcgibbon@gmail.com>
Date: Wed, 7 Jan 2026 15:32:43 +0000
Subject: [PATCH 2/9] Combine update_vds_path with make_virtual_snapshot

---
 compression/make_virtual_snapshot.py | 353 ++++++++++++++++++---------
 compression/update_vds_paths.py      | 118 ---------
 2 files changed, 233 insertions(+), 238 deletions(-)
 delete mode 100644 compression/update_vds_paths.py

diff --git a/compression/make_virtual_snapshot.py b/compression/make_virtual_snapshot.py
index dc17eca1..934b92cc 100644
--- a/compression/make_virtual_snapshot.py
+++ b/compression/make_virtual_snapshot.py
@@ -3,12 +3,74 @@
 import os.path
 import h5py
 import shutil
+import numpy as np
 
 
-def make_virtual_snapshot(snapshot, auxilary_snapshots, output_file, snap_nr):
+class SafeDict(dict):
+    def __missing__(self, key):
+        # Return the key back in braces so it remains in the string
+        return "{" + key + "}"
+
+
+def update_vds_paths(dset, modify_function):
+    """
+    Modify the virtual paths of the specified dataset
+
+    Note that querying the source dataspace and selection does not appear
+    to work (invalid pointer error from h5py) so here we assume that we're
+    referencing all of the source dataspace, which is correct for SWIFT
+    snapshots.
+
+    dset:            a h5py.Dataset object
+    modify_function: a function which takes the old path as its argument and
+                     returns the new path
+    """
+
+    # Choose a temporary path for the new virtual dataset
+    path = dset.name
+    tmp_path = dset.name + ".__tmp__"
+
+    # Build the creation property list for the new dataset
+    plist = h5py.h5p.create(h5py.h5p.DATASET_CREATE)
+    for vs in dset.virtual_sources():
+        bounds = vs.vspace.get_select_bounds()
+        if bounds is not None:
+            lower, upper = bounds
+            size = np.asarray(upper, dtype=int) - np.asarray(lower, dtype=int) + 1
+            src_space = h5py.h5s.create_simple(tuple(size))
+            new_name = modify_function(vs.file_name)
+            plist.set_virtual(
+                vs.vspace, new_name.encode(), vs.dset_name.encode(), src_space
+            )
+
+    # Create the new dataset
+    tmp_dset = h5py.h5d.create(
+        dset.file["/"].id,
+        tmp_path.encode(),
+        dset.id.get_type(),
+        dset.id.get_space(),
+        dcpl=plist,
+    )
+    tmp_dset = h5py.Dataset(tmp_dset)
+    for attr_name in dset.attrs:
+        tmp_dset.attrs[attr_name] = dset.attrs[attr_name]
+
+    # Rename the new dataset
+    f = dset.file
+    del f[path]
+    f[path] = f[tmp_path]
+    del f[tmp_path]
+
+
+def make_virtual_snapshot(snapshot, auxilary_snapshots, output_file, absolute_paths=False):
     """
     Given a snapshot and auxilary files, create
     a new virtual snapshot with all datasets combine.
+    
+    snapshot: Path to the snapshot file
+    auxilary_snapshots: List of auxiliary file patterns
+    output_file: Path to the output virtual snapshot
+    absolute_paths: If True, use absolute paths; if False, use relative paths
     """
 
     # Copy the input virtual snapshot to the output
@@ -17,114 +79,169 @@ def make_virtual_snapshot(snapshot, auxilary_snapshots, output_file, snap_nr):
     # Open the output file
     outfile = h5py.File(output_file, "r+")
 
-    # TODO: Loop
-    auxilary = auxilary_snapshots[0]
+    # Calculate directories for path updates
+    abs_snapshot_dir = os.path.abspath(os.path.dirname(snapshot))
+    abs_auxilary_dirs = [
+        os.path.abspath(os.path.dirname(aux.format(file_nr=0)))
+        for aux in auxilary_snapshots
+    ]
+    abs_output_dir = os.path.abspath(os.path.dirname(output_file))
+    
+    if absolute_paths:
+        snapshot_dir = abs_snapshot_dir
+        auxilary_dirs = abs_auxilary_dirs
+    else:
+        snapshot_dir = os.path.relpath(abs_snapshot_dir, abs_output_dir)
+        auxilary_dirs = [
+            os.path.relpath(aux_dir, abs_output_dir) 
+            for aux_dir in abs_auxilary_dirs
+        ]
+
+    # Create path replacement functions
+    def make_replace_path(target_dir):
+        def replace_path(old_path):
+            basename = os.path.basename(old_path)
+            return os.path.join(target_dir, basename)
+        return replace_path
+
+    replace_snapshot_path = make_replace_path(snapshot_dir)
+    auxilary_path_replacers = [make_replace_path(d) for d in auxilary_dirs]
+
+    all_auxilary_datasets = {}
+    
+    for aux_index, auxilary in enumerate(auxilary_snapshots):
 
-    # Check which datasets exist in the auxilary files
-    # and store their attributes and datatype
-    filename = auxilary.format(file_nr=0, snap_nr=snap_nr)
-    dset_attrs = {}
-    dset_dtype = {}
-    with h5py.File(filename, "r") as infile:
+        # Check which datasets exist in the auxilary files
+        # and store their attributes and datatype
+        filename = auxilary.format(file_nr=0)
+        dset_attrs = {}
+        dset_dtype = {}
+        with h5py.File(filename, "r") as infile:
+            for ptype in range(7):
+                if not f"PartType{ptype}" in infile:
+                    continue
+                dset_attrs[f"PartType{ptype}"] = {}
+                dset_dtype[f"PartType{ptype}"] = {}
+                for dset in infile[f"PartType{ptype}"].keys():
+                    attrs = dict(infile[f"PartType{ptype}/{dset}"].attrs)
+                    dtype = infile[f"PartType{ptype}/{dset}"].dtype
+
+                    # Some auxilary files are missing these attributes
+                    if not "Value stored as physical" in attrs:
+                        print(f"Setting comoving attrs for PartType{ptype}/{dset}")
+                        attrs["Value stored as physical"] = [1]
+                        attrs["Property can be converted to comoving"] = [0]
+
+                    # Add a flag that these datasets are stored in the auxilary files
+                    attrs["Auxilary file"] = [1]
+
+                    # Store the values we need for later
+                    dset_attrs[f"PartType{ptype}"][dset] = attrs
+                    dset_dtype[f"PartType{ptype}"][dset] = dtype
+
+                    # Check we don't have this dataset in any of the other auxilary files
+                    dset_path = f"PartType{ptype}/{dset}"
+                    if dset_path in all_auxilary_datasets:
+                        other_file = all_auxilary_datasets[f"PartType{ptype}/{dset}"]
+                        raise ValueError(f"{dset_path} is in {auxilary} and {other_file}")
+                    all_auxilary_datasets[dset_path] = auxilary
+
+        # Loop over input auxilary files to get dataset shapes
+        file_nr = 0
+        filenames = []
+        shapes = []
+        counts = []
+        while True:
+            filename = auxilary.format(file_nr=file_nr)
+            if os.path.exists(filename):
+                filenames.append(filename)
+                with h5py.File(filename, "r") as infile:
+                    shape = {}
+                    count = {}
+                    for ptype in range(7):
+                        if f"PartType{ptype}" not in dset_attrs:
+                            continue
+                        shape[f"PartType{ptype}"] = {}
+                        # Get the shape for each dataset
+                        for dset in dset_attrs[f"PartType{ptype}"]:
+                            s = infile[f"PartType{ptype}/{dset}"].shape
+                            shape[f"PartType{ptype}"][dset] = s
+                        # Get the number of particles in this chunk file
+                        count[f"PartType{ptype}"] = s[0]
+                    shapes.append(shape)
+                    counts.append(count)
+            else:
+                break
+            file_nr += 1
+        if file_nr == 0:
+            raise IOError(f"Failed to find files matching: {auxilary}")
+
+        # Loop over particle types in the output
         for ptype in range(7):
-            if not f"PartType{ptype}" in infile:
+            if f"PartType{ptype}" not in dset_attrs:
                 continue
-            dset_attrs[f"PartType{ptype}"] = {}
-            dset_dtype[f"PartType{ptype}"] = {}
-            for dset in infile[f"PartType{ptype}"].keys():
-                attrs = dict(infile[f"PartType{ptype}/{dset}"].attrs)
-                dtype = infile[f"PartType{ptype}/{dset}"].dtype
-
-                # Some auxilary files are missing these attributes
-                if not "Value stored as physical" in attrs:
-                    print(f"Setting comoving attrs for PartType{ptype}/{dset}")
-                    attrs["Value stored as physical"] = [1]
-                    attrs["Property can be converted to comoving"] = [0]
-
-                # Add a flag that these datasets are stored in the auxilary files
-                attrs["Auxilary file"] = [1]
-
-                # Store the values we need for later
-                dset_attrs[f"PartType{ptype}"][dset] = attrs
-                dset_dtype[f"PartType{ptype}"][dset] = dtype
-
-    # Loop over input auxilary files to get dataset shapes
-    file_nr = 0
-    filenames = []
-    shapes = []
-    counts = []
-    while True:
-        filename = auxilary.format(file_nr=file_nr, snap_nr=snap_nr)
-        if os.path.exists(filename):
-            filenames.append(filename)
-            with h5py.File(filename, "r") as infile:
-                shape = {}
-                count = {}
-                for ptype in range(7):
-                    if f"PartType{ptype}" not in dset_attrs:
-                        continue
-                    shape[f"PartType{ptype}"] = {}
-                    # Get the shape for each dataset
-                    for dset in dset_attrs[f"PartType{ptype}"]:
-                        s = infile[f"PartType{ptype}/{dset}"].shape
-                        shape[f"PartType{ptype}"][dset] = s
-                    # Get the number of particles in this chunk file
-                    count[f"PartType{ptype}"] = s[0]
-                shapes.append(shape)
-                counts.append(count)
-        else:
-            break
-        file_nr += 1
-    if file_nr == 0:
-        raise IOError(f"Failed to find files matching: {auxilary}")
-
-    # Loop over particle types in the output
-    for ptype in range(7):
-        if f"PartType{ptype}" not in dset_attrs:
-            continue
-
-        # Create virtual layout for new datasets
-        layouts = {}
-        nr_parts = sum([count[f"PartType{ptype}"] for count in counts])
-        for dset in dset_attrs[f"PartType{ptype}"]:
-            full_shape = list(shapes[0][f"PartType{ptype}"][dset])
-            full_shape[0] = nr_parts
-            full_shape = tuple(full_shape)
-            dtype = dset_dtype[f"PartType{ptype}"][dset]
-            layouts[dset] = h5py.VirtualLayout(shape=full_shape, dtype=dtype)
-
-        # Loop over input files
-        offset = 0
-        for filename, count, shape in zip(filenames, counts, shapes):
-            n_part = count[f"PartType{ptype}"]
+
+            # Create virtual layout for new datasets
+            layouts = {}
+            nr_parts = sum([count[f"PartType{ptype}"] for count in counts])
             for dset in dset_attrs[f"PartType{ptype}"]:
-                layouts[dset][offset : offset + n_part] = h5py.VirtualSource(
-                    filename,
-                    f"PartType{ptype}/{dset}",
-                    shape=shape[f"PartType{ptype}"][dset],
-                )
-            offset += n_part
-
-        # Create the virtual datasets, renaming datasets if they
-        # already exist in the snapshot
-        for dset, attrs in dset_attrs[f"PartType{ptype}"].items():
-            if f"PartType{ptype}/{dset}" in outfile:
-                outfile.move(f"PartType{ptype}/{dset}", f"PartType{ptype}/{dset}_snap")
-            outfile.create_virtual_dataset(
-                f"PartType{ptype}/{dset}", layouts[dset], fillvalue=-999
-            )
-            for k, v in attrs.items():
-                outfile[f"PartType{ptype}/{dset}"].attrs[k] = v
+                full_shape = list(shapes[0][f"PartType{ptype}"][dset])
+                full_shape[0] = nr_parts
+                full_shape = tuple(full_shape)
+                dtype = dset_dtype[f"PartType{ptype}"][dset]
+                layouts[dset] = h5py.VirtualLayout(shape=full_shape, dtype=dtype)
+
+            # Loop over input files
+            offset = 0
+            for filename, count, shape in zip(filenames, counts, shapes):
+                n_part = count[f"PartType{ptype}"]
+                for dset in dset_attrs[f"PartType{ptype}"]:
+                    layouts[dset][offset : offset + n_part] = h5py.VirtualSource(
+                        filename,
+                        f"PartType{ptype}/{dset}",
+                        shape=shape[f"PartType{ptype}"][dset],
+                    )
+                offset += n_part
 
-            # Copy GroupNr_bound to HaloCatalogueIndex, since that is the name in SOAP
-            if dset == "GroupNr_bound":
+            # Create the virtual datasets, renaming datasets if they
+            # already exist in the snapshot
+            for dset, attrs in dset_attrs[f"PartType{ptype}"].items():
+                if f"PartType{ptype}/{dset}" in outfile:
+                    outfile.move(f"PartType{ptype}/{dset}", f"PartType{ptype}/{dset}_snap")
                 outfile.create_virtual_dataset(
-                    f"PartType{ptype}/HaloCatalogueIndex",
-                    layouts["GroupNr_bound"],
-                    fillvalue=-999,
+                    f"PartType{ptype}/{dset}", layouts[dset], fillvalue=-999
                 )
-                for k, v in outfile[f"PartType{ptype}/GroupNr_bound"].attrs.items():
-                    outfile[f"PartType{ptype}/HaloCatalogueIndex"].attrs[k] = v
+                for k, v in attrs.items():
+                    outfile[f"PartType{ptype}/{dset}"].attrs[k] = v
+
+                # Update paths for this newly created auxiliary dataset
+                update_vds_paths(outfile[f"PartType{ptype}/{dset}"], auxilary_path_replacers[aux_index])
+
+                # Copy GroupNr_bound to HaloCatalogueIndex, since 
+                # that is the name in SOAP
+                if dset == "GroupNr_bound":
+                    outfile.create_virtual_dataset(
+                        f"PartType{ptype}/HaloCatalogueIndex",
+                        layouts["GroupNr_bound"],
+                        fillvalue=-999,
+                    )
+                    for k, v in outfile[f"PartType{ptype}/GroupNr_bound"].attrs.items():
+                        outfile[f"PartType{ptype}/HaloCatalogueIndex"].attrs[k] = v
+                    
+                    # Update paths for HaloCatalogueIndex too
+                    update_vds_paths(outfile[f"PartType{ptype}/HaloCatalogueIndex"], auxilary_path_replacers[aux_index])
+
+    # Update paths for all original snapshot datasets
+    for ptype in range(7):
+        ptype_name = f"PartType{ptype}"
+        if ptype_name in outfile:
+            for dset_name in list(outfile[ptype_name].keys()):
+                dset = outfile[f"{ptype_name}/{dset_name}"]
+                if dset.is_virtual:
+                    # Check if this is an auxiliary dataset (skip those, already handled)
+                    if dset.attrs.get("Auxilary file", [0])[0] != 1:
+                        # This is an original snapshot dataset
+                        update_vds_paths(dset, replace_snapshot_path)
 
     # Done
     outfile.close()
@@ -133,7 +250,6 @@ def make_virtual_snapshot(snapshot, auxilary_snapshots, output_file, snap_nr):
 if __name__ == "__main__":
 
     import argparse
-    from update_vds_paths import update_virtual_snapshot_paths
 
     # For description of parameters run the following: $ python make_virtual_snapshot.py --help
     parser = argparse.ArgumentParser(
@@ -149,9 +265,10 @@ def make_virtual_snapshot(snapshot, auxilary_snapshots, output_file, snap_nr):
         help="Name of the SWIFT virtual snapshot file, e.g. snapshot_{snap_nr:04}.hdf5",
     )
     parser.add_argument(
-        "membership",
+        "auxilary_snapshots",
         type=str,
-        help="Format string for membership files, e.g. membership_{snap_nr:04}.{file_nr}.hdf5",
+        nargs="+",
+        help="One of more format strings for auxilary files, e.g. membership_{snap_nr:04}.{file_nr}.hdf5",
     )
     parser.add_argument(
         "output_file",
@@ -176,20 +293,16 @@ def make_virtual_snapshot(snapshot, auxilary_snapshots, output_file, snap_nr):
     virtual_snapshot = args.virtual_snapshot.format(snap_nr=args.snap_nr)
     output_file = args.output_file.format(snap_nr=args.snap_nr)
 
-    # Make a new virtual snapshot with group info
-    make_virtual_snapshot(virtual_snapshot, [args.membership], output_file, args.snap_nr)
+    # We don't want to replace {file_nr} for auxilary snapshots
+    auxilary_snapshots = [
+        filename.format_map(SafeDict({'snap_nr': args.snap_nr}))
+        for filename in args.auxilary_snapshots
+    ]
 
-    # Set file paths for datasets
-    abs_snapshot_dir = os.path.abspath(os.path.dirname(virtual_snapshot))
-    abs_membership_dir = os.path.abspath(
-        os.path.dirname(args.membership.format(snap_nr=args.snap_nr, file_nr=0))
+    # Make a new virtual snapshot with group info
+    make_virtual_snapshot(
+        virtual_snapshot, 
+        auxilary_snapshots, 
+        output_file, 
+        absolute_paths=args.absolute_paths
     )
-    if args.absolute_paths:
-        # Ensure all paths in the virtual file are absolute to avoid VDS prefix issues
-        # (we probably need to pick up datasets from two different directories)
-        update_virtual_snapshot_paths(output_file, abs_snapshot_dir, abs_membership_dir)
-    else:
-        abs_output_dir = os.path.abspath(os.path.dirname(output_file))
-        rel_snapshot_dir = os.path.relpath(abs_snapshot_dir, abs_output_dir)
-        rel_membership_dir = os.path.relpath(abs_membership_dir, abs_output_dir)
-        update_virtual_snapshot_paths(output_file, rel_snapshot_dir, rel_membership_dir)
diff --git a/compression/update_vds_paths.py b/compression/update_vds_paths.py
deleted file mode 100644
index da24d57d..00000000
--- a/compression/update_vds_paths.py
+++ /dev/null
@@ -1,118 +0,0 @@
-#!/bin/env python
-
-import sys
-import h5py
-import numpy as np
-import os.path
-
-
-def update_vds_paths(dset, modify_function):
-    """
-    Modify the virtual paths of the specified dataset
-
-    Note that querying the source dataspace and selection does not appear
-    to work (invalid pointer error from h5py) so here we assume that we're
-    referencing all of the source dataspace, which is correct for SWIFT
-    snapshots.
-
-    dset:            a h5py.Dataset object
-    modify_function: a function which takes the old path as its argument and
-                     returns the new path
-    """
-
-    # Choose a temporary path for the new virtual dataset
-    path = dset.name
-    tmp_path = dset.name + ".__tmp__"
-
-    # Build the creation property list for the new dataset
-    plist = h5py.h5p.create(h5py.h5p.DATASET_CREATE)
-    for vs in dset.virtual_sources():
-        bounds = vs.vspace.get_select_bounds()
-        if bounds is not None:
-            lower, upper = bounds
-            size = np.asarray(upper, dtype=int) - np.asarray(lower, dtype=int) + 1
-            src_space = h5py.h5s.create_simple(tuple(size))
-            new_name = modify_function(vs.file_name)
-            plist.set_virtual(
-                vs.vspace, new_name.encode(), vs.dset_name.encode(), src_space
-            )
-
-    # Create the new dataset
-    tmp_dset = h5py.h5d.create(
-        dset.file["/"].id,
-        tmp_path.encode(),
-        dset.id.get_type(),
-        dset.id.get_space(),
-        dcpl=plist,
-    )
-    tmp_dset = h5py.Dataset(tmp_dset)
-    for attr_name in dset.attrs:
-        tmp_dset.attrs[attr_name] = dset.attrs[attr_name]
-
-    # Rename the new dataset
-    f = dset.file
-    del f[path]
-    f[path] = f[tmp_path]
-    del f[tmp_path]
-
-
-def update_virtual_snapshot_paths(filename, snapshot_dir=None, membership_dir=None):
-    """
-    Add full paths to virtual datasets in the specified file
-    """
-    f = h5py.File(filename, "r+")
-
-    # Find all datasets in the file
-    all_datasets = []
-
-    def visit_datasets(name, obj):
-        if isinstance(obj, h5py.Dataset):
-            all_datasets.append(obj)
-
-    f.visititems(visit_datasets)
-
-    def replace_snapshot_path(old_path):
-        basename = os.path.basename(old_path)
-        return os.path.join(snapshot_dir, basename)
-
-    def replace_membership_path(old_path):
-        basename = os.path.basename(old_path)
-        return os.path.join(membership_dir, basename)
-
-    # Loop over datasets and update paths if necessary
-    for dset in all_datasets:
-        if dset.is_virtual:
-            name = dset.name.split("/")[-1]
-            # Check if the dataset comes from a membership file
-            if dset.attrs.get("Auxilary file", [0])[0] == 1:
-                if membership_dir is not None:
-                    update_vds_paths(dset, replace_membership_path)
-            # Catch old datasets which didn't have the "Auxilary file" set
-            elif name in (
-                "GroupNr_all",
-                "GroupNr_bound",
-                "Rank_bound",
-                "HaloCatalogueIndex",
-                "SpecificPotentialEnergies",
-            ):
-                if membership_dir is not None:
-                    update_vds_paths(dset, replace_membership_path)
-            # Catch old case of FOF IDs from membership files
-            elif (name == "FOFGroupIDs") and ("PartType1/FOFGroupIDs_old" in f):
-                if membership_dir is not None:
-                    update_vds_paths(dset, replace_membership_path)
-            # Data comes from the snapshot files
-            else:
-                if snapshot_dir is not None:
-                    update_vds_paths(dset, replace_snapshot_path)
-
-    f.close()
-
-
-if __name__ == "__main__":
-
-    filename = sys.argv[1]  # Virtual snapshot file to update
-    snapshot_dir = sys.argv[2]  # Directory with the real snapshot files
-    membership_dir = sys.argv[3]  # Directory with the real membership files
-
-    update_virtual_snapshot_paths(filename, snapshot_dir, membership_dir)

From a3bde138476ae3397db52bdbe9c4a35b0d6ed6aa Mon Sep 17 00:00:00 2001
From: robjmcgibbon <robjmcgibbon@gmail.com>
Date: Wed, 7 Jan 2026 15:55:37 +0000
Subject: [PATCH 3/9] Virtual snapshot creation using multiple auxilary files

---
 compression/make_virtual_snapshot.py         | 63 +++++++++++++-------
 scripts/COLIBRE/compress_group_membership.sh |  5 +-
 scripts/EAGLE.sh                             |  6 +-
 3 files changed, 48 insertions(+), 26 deletions(-)

diff --git a/compression/make_virtual_snapshot.py b/compression/make_virtual_snapshot.py
index 934b92cc..0679ff7a 100644
--- a/compression/make_virtual_snapshot.py
+++ b/compression/make_virtual_snapshot.py
@@ -62,11 +62,13 @@ def update_vds_paths(dset, modify_function):
     del f[tmp_path]
 
 
-def make_virtual_snapshot(snapshot, auxilary_snapshots, output_file, absolute_paths=False):
+def make_virtual_snapshot(
+    snapshot, auxilary_snapshots, output_file, absolute_paths=False
+):
     """
     Given a snapshot and auxilary files, create
     a new virtual snapshot with all datasets combine.
-    
+
     snapshot: Path to the snapshot file
     auxilary_snapshots: List of auxiliary file patterns
     output_file: Path to the output virtual snapshot
@@ -86,15 +88,14 @@ def make_virtual_snapshot(snapshot, auxilary_snapshots, output_file, absolute_pa
         for aux in auxilary_snapshots
     ]
     abs_output_dir = os.path.abspath(os.path.dirname(output_file))
-    
+
     if absolute_paths:
         snapshot_dir = abs_snapshot_dir
         auxilary_dirs = abs_auxilary_dirs
     else:
         snapshot_dir = os.path.relpath(abs_snapshot_dir, abs_output_dir)
         auxilary_dirs = [
-            os.path.relpath(aux_dir, abs_output_dir) 
-            for aux_dir in abs_auxilary_dirs
+            os.path.relpath(aux_dir, abs_output_dir) for aux_dir in abs_auxilary_dirs
         ]
 
     # Create path replacement functions
@@ -102,13 +103,14 @@ def make_replace_path(target_dir):
         def replace_path(old_path):
             basename = os.path.basename(old_path)
             return os.path.join(target_dir, basename)
+
         return replace_path
 
     replace_snapshot_path = make_replace_path(snapshot_dir)
     auxilary_path_replacers = [make_replace_path(d) for d in auxilary_dirs]
 
     all_auxilary_datasets = {}
-    
+
     for aux_index, auxilary in enumerate(auxilary_snapshots):
 
         # Check which datasets exist in the auxilary files
@@ -143,7 +145,9 @@ def replace_path(old_path):
                     dset_path = f"PartType{ptype}/{dset}"
                     if dset_path in all_auxilary_datasets:
                         other_file = all_auxilary_datasets[f"PartType{ptype}/{dset}"]
-                        raise ValueError(f"{dset_path} is in {auxilary} and {other_file}")
+                        raise ValueError(
+                            f"{dset_path} is in {auxilary} and {other_file}"
+                        )
                     all_auxilary_datasets[dset_path] = auxilary
 
         # Loop over input auxilary files to get dataset shapes
@@ -207,7 +211,9 @@ def replace_path(old_path):
             # already exist in the snapshot
             for dset, attrs in dset_attrs[f"PartType{ptype}"].items():
                 if f"PartType{ptype}/{dset}" in outfile:
-                    outfile.move(f"PartType{ptype}/{dset}", f"PartType{ptype}/{dset}_snap")
+                    outfile.move(
+                        f"PartType{ptype}/{dset}", f"PartType{ptype}/{dset}_snap"
+                    )
                 outfile.create_virtual_dataset(
                     f"PartType{ptype}/{dset}", layouts[dset], fillvalue=-999
                 )
@@ -215,9 +221,12 @@ def replace_path(old_path):
                     outfile[f"PartType{ptype}/{dset}"].attrs[k] = v
 
                 # Update paths for this newly created auxiliary dataset
-                update_vds_paths(outfile[f"PartType{ptype}/{dset}"], auxilary_path_replacers[aux_index])
+                update_vds_paths(
+                    outfile[f"PartType{ptype}/{dset}"],
+                    auxilary_path_replacers[aux_index],
+                )
 
-                # Copy GroupNr_bound to HaloCatalogueIndex, since 
+                # Copy GroupNr_bound to HaloCatalogueIndex, since
                 # that is the name in SOAP
                 if dset == "GroupNr_bound":
                     outfile.create_virtual_dataset(
@@ -227,9 +236,12 @@ def replace_path(old_path):
                     )
                     for k, v in outfile[f"PartType{ptype}/GroupNr_bound"].attrs.items():
                         outfile[f"PartType{ptype}/HaloCatalogueIndex"].attrs[k] = v
-                    
+
                     # Update paths for HaloCatalogueIndex too
-                    update_vds_paths(outfile[f"PartType{ptype}/HaloCatalogueIndex"], auxilary_path_replacers[aux_index])
+                    update_vds_paths(
+                        outfile[f"PartType{ptype}/HaloCatalogueIndex"],
+                        auxilary_path_replacers[aux_index],
+                    )
 
     # Update paths for all original snapshot datasets
     for ptype in range(7):
@@ -260,25 +272,28 @@ def replace_path(old_path):
         )
     )
     parser.add_argument(
-        "virtual_snapshot",
+        "--virtual-snapshot",
         type=str,
+        required=True,
         help="Name of the SWIFT virtual snapshot file, e.g. snapshot_{snap_nr:04}.hdf5",
     )
     parser.add_argument(
-        "auxilary_snapshots",
+        "--auxilary-snapshots",
         type=str,
         nargs="+",
+        required=True,
         help="One of more format strings for auxilary files, e.g. membership_{snap_nr:04}.{file_nr}.hdf5",
     )
     parser.add_argument(
-        "output_file",
+        "--output-file",
         type=str,
+        required=True,
         help="Name of the virtual snapshot to create, e.g. membership_{snap_nr:04}.hdf5",
     )
     parser.add_argument(
-        "snap_nr",
+        "--snap-nr",
         type=int,
-        nargs="?",
+        required=False,
         default=-1,
         help="Snapshot number (default: -1). Not required if snap_nr is present in filenames passed.",
     )
@@ -289,20 +304,24 @@ def replace_path(old_path):
     )
     args = parser.parse_args()
 
+    print(f"Creating virtual snapshot")
+    for k, v in vars(args).items():
+        print(f"  {k}: {v}")
+
     # Substitute snap number
     virtual_snapshot = args.virtual_snapshot.format(snap_nr=args.snap_nr)
     output_file = args.output_file.format(snap_nr=args.snap_nr)
 
     # We don't want to replace {file_nr} for auxilary snapshots
     auxilary_snapshots = [
-        filename.format_map(SafeDict({'snap_nr': args.snap_nr}))
+        filename.format_map(SafeDict({"snap_nr": args.snap_nr}))
         for filename in args.auxilary_snapshots
     ]
 
     # Make a new virtual snapshot with group info
     make_virtual_snapshot(
-        virtual_snapshot, 
-        auxilary_snapshots, 
-        output_file, 
-        absolute_paths=args.absolute_paths
+        virtual_snapshot,
+        auxilary_snapshots,
+        output_file,
+        absolute_paths=args.absolute_paths,
     )
diff --git a/scripts/COLIBRE/compress_group_membership.sh b/scripts/COLIBRE/compress_group_membership.sh
index ee34965f..69587a22 100644
--- a/scripts/COLIBRE/compress_group_membership.sh
+++ b/scripts/COLIBRE/compress_group_membership.sh
@@ -90,7 +90,10 @@ echo "Creating virtual snapshot"
 snapshot="${output_dir}/${sim}/snapshots/colibre_${snapnum}/colibre_${snapnum}.hdf5"
 membership="${output_filename}.{file_nr}.hdf5"
 virtual="${outbase}/colibre_with_SOAP_membership_${snapnum}.hdf5"
-python compression/make_virtual_snapshot.py $snapshot $membership $virtual
+python compression/make_virtual_snapshot.py \
+  --virtual-snapshot $snapshot \
+  --auxilary-snapshots $membership \
+  --output-file $virtual
 
 echo "Setting virtual file to be read-only"
 chmod a=r "${virtual}"
diff --git a/scripts/EAGLE.sh b/scripts/EAGLE.sh
index 8f87ef72..d8ffd65b 100755
--- a/scripts/EAGLE.sh
+++ b/scripts/EAGLE.sh
@@ -78,9 +78,9 @@ python "${soap_dir}/create_virtual_snapshot.py" "snap_${snap_nr}.0.hdf5"
 cd -
 
 python compression/make_virtual_snapshot.py \
-    "${output_dir}/swift_snapshots/swift_${snap_nr}/snap_${snap_nr}.hdf5" \
-    "${output_dir}/SOAP_uncompressed/membership_${snap_nr}/membership_${snap_nr}.{file_nr}.hdf5" \
-    "${output_dir}/SOAP_uncompressed/snap_${snap_nr}.hdf5" \
+    --virtual-snapshot "${output_dir}/swift_snapshots/swift_${snap_nr}/snap_${snap_nr}.hdf5" \
+    --auxilary-snapshots "${output_dir}/SOAP_uncompressed/membership_${snap_nr}/membership_${snap_nr}.{file_nr}.hdf5" \
+    --output-file "${output_dir}/SOAP_uncompressed/snap_${snap_nr}.hdf5"
 
 ######### Run SOAP
 

From 73dc32ec4051679c0204641ed5b2115b96d3a9c1 Mon Sep 17 00:00:00 2001
From: robjmcgibbon <robjmcgibbon@gmail.com>
Date: Wed, 7 Jan 2026 16:16:10 +0000
Subject: [PATCH 4/9] BirthHaloCatalogueIndex v1

---
 misc/compute_BirthHaloCatalogueIndex.py | 247 ++++++++++++++++++++++++
 1 file changed, 247 insertions(+)
 create mode 100644 misc/compute_BirthHaloCatalogueIndex.py

diff --git a/misc/compute_BirthHaloCatalogueIndex.py b/misc/compute_BirthHaloCatalogueIndex.py
new file mode 100644
index 00000000..3f458f48
--- /dev/null
+++ b/misc/compute_BirthHaloCatalogueIndex.py
@@ -0,0 +1,247 @@
+#!/bin/env python
+
+"""
+# TODO: Update
+match_group_membership.py
+
+This script matches halos between different simulations run from the
+same initial conditions.
+
+Usage:
+
+  mpirun -- python -u misc/match_group_membership \
+            --snap-basename1 SNAP_BASENAME1 \
+            --snap-basename2 SNAP_BASENAME2 \
+            --membership-basename1 MEMBERSHIP_BASENAME1 \
+            --membership-basename2 MEMBERSHIP_BASENAME2 \
+            --catalogue-filename1 CATALOGUE_FILENAME1 \
+            --catalogue-filename2 CATALOGUE_FILENAME2 \
+            --output-filename OUTPUT_FILENAME
+
+Run "python misc/match_group_membership.py -h" for a discription
+of the optional arguments.
+
+"""
+
+import argparse
+import datetime
+import os
+
+from mpi4py import MPI
+
+comm = MPI.COMM_WORLD
+comm_rank = comm.Get_rank()
+comm_size = comm.Get_size()
+
+import h5py
+import numpy as np
+
+import virgo.mpi.parallel_sort as psort
+import virgo.mpi.parallel_hdf5 as phdf5
+from virgo.mpi.gather_array import gather_array
+
+
+def load_particle_data(snap_basename, membership_basename, load_gas, comm):
+    """
+    Load the particle IDs and halo membership for the particle types
+    we will use to match. Removes unbound particles.
+    """
+
+    particle_data = {}
+
+    # Load particle IDs
+    snap_filename = snap_basename + ".{file_nr}.hdf5"
+    file = phdf5.MultiFile(
+        snap_filename, file_nr_attr=("Header", "NumFilesPerSnapshot"), comm=comm
+    )
+    particle_data["PartType4/ParticleIDs"] = file.read("PartType4/ParticleIDs")
+    if load_gas:
+        particle_data["PartType0/ParticleIDs"] = file.read("PartType0/ParticleIDs")
+
+    # Membership files don't have a header, so create a list of filenames
+    n_file = len(file.filenames)
+    membership_filenames = [f"{membership_basename}.{i}.hdf5" for i in range(n_file)]
+    # Load membership information
+    file = phdf5.MultiFile(
+        membership_filenames, file_nr_attr=("Header", "NumFilesPerSnapshot"), comm=comm
+    )
+    particle_data["PartType4/GroupNr_bound"] = file.read("PartType4/GroupNr_bound")
+    if load_gas:
+        particle_data["PartType0/GroupNr_bound"] = file.read("PartType0/GroupNr_bound")
+
+    # Check the two files are partitioned the same way
+    assert (
+        particle_data["PartType4/GroupNr_bound"].shape
+        == particle_data["PartType4/ParticleIDs"].shape
+    )
+    if load_gas:
+        assert (
+            particle_data["PartType0/GroupNr_bound"].shape
+            == particle_data["PartType0/ParticleIds"].shape
+        )
+
+    return particle_data
+
+
+unit_attrs = {
+    "Conversion factor to CGS (not including cosmological corrections)": [1.0],
+    "Conversion factor to physical CGS (including cosmological corrections)": [1.0],
+    "U_I exponent": [0.0],
+    "U_L exponent": [0.0],
+    "U_M exponent": [0.0],
+    "U_t exponent": [0.0],
+    "U_T exponent": [0.0],
+    "a-scale exponent": [0.0],
+    "h-scale exponent": [0.0],
+    "Property can be converted to comoving": [0],
+    "Value stored as physical": [1],
+}
+
+
+def mpi_print(string, comm_rank):
+    if comm_rank == 0:
+        print(string)
+
+
+if __name__ == "__main__":
+
+    start_time = datetime.datetime.now()
+
+    parser = argparse.ArgumentParser(
+        description=("Script to calculate BirthHaloCatalogueIndex of star particles"),
+    )
+    parser.add_argument(
+        "--snap-basename",
+        type=str,
+        required=True,
+        help=(
+            "The basename of the snapshot files (the snapshot "
+            "name without the .{file_nr}.hdf5 suffix. Use "
+            "{snap_nr:04d} instead of the snapshot number)"
+        ),
+    )
+    parser.add_argument(
+        "--membership-basename",
+        type=str,
+        required=True,
+        help="The basename of the membership files",
+    )
+    parser.add_argument(
+        "--output-basename",
+        type=str,
+        required=True,
+        help="The basename of the output files",
+    )
+    parser.add_argument(
+        "--final-snap-nr",
+        type=int,
+        required=True,
+        help=(
+            # TODO:
+            "Snapshot at which to load the particles"
+        ),
+    )
+    # TODO: Skip check gas particles
+    parser.add_argument(
+        "--calculate-PreBirthHaloCatalogueIndex",
+        action="store_true",
+        help=(
+            "Whether to calculate and output the subhalo halo catalogue "
+            "index of the gas particle that formed each star"
+        ),
+    )
+
+    args = parser.parse_args()
+
+    # Log the arguments
+    for k, v in vars(args).items():
+        mpi_print(f"  {k}: {v}", comm_rank)
+
+    final_snap_basename = args.snap_basename.format(snap_nr=args.final_snap_nr)
+    final_membership_basename = args.membership_basename.format(
+        snap_nr=args.final_snap_nr
+    )
+    mpi_print("Loading stars from final snapshot", comm_rank)
+    particle_data = load_particle_data(
+        final_snap_basename,
+        final_membership_basename,
+        False,
+        comm,
+    )
+    star_particle_ids = particle_data["PartType4/ParticleIDs"]
+    star_birth_ids = particle_data["PartType4/GroupNr_bound"]
+    star_birth_ids[:] = -99
+
+    # TODO:
+    # For the loop we need gas particles from the previous snapshot
+    # if args.calculate_PreBirthHaloCatalogueIndex:
+    # star_prebirth_ids = np.copy(star_birth_ids)
+
+    for snap_nr in range(0, args.final_snap_nr + 1):
+        mpi_print(f"Loading data from snapshot {snap_nr}", comm_rank)
+        snap_basename = args.snap_basename.format(snap_nr=snap_nr)
+        membership_basename = args.membership_basename.format(snap_nr=snap_nr)
+        particle_data = load_particle_data(
+            snap_basename,
+            membership_basename,
+            args.calculate_PreBirthHaloCatalogueIndex,
+            comm,
+        )
+
+        # It would be quicker to make use of the BirthScaleFactors
+        # instead of checking all stars
+        idx = psort.parallel_match(
+            star_particle_ids[star_birth_ids == -99],
+            particle_data["PartType4/ParticleIDs"],
+            comm=comm,
+        )
+
+        new_birth_ids = psort.fetch_elements(
+            particle_data["PartType4/GroupNr_bound"],
+            idx[idx != -1],
+            comm=comm,
+        )
+
+        # TODO:
+        # if args.calculate_PreBirthHaloCatalogueIndex:
+
+        has_new_birth_id = star_birth_ids == -99
+        has_new_birth_id[has_new_birth_id] = idx != -1
+        star_birth_ids[has_new_birth_id] = new_birth_ids
+
+    # Check we found a value for every star
+    assert np.sum(star_birth_ids == -99) == 0
+
+    mpi_print("Writing output", comm_rank)
+    snap_file = phdf5.MultiFile(
+        final_snap_basename + ".{file_nr}.hdf5",
+        file_nr_attr=("Header", "NumFilesPerSnapshot"),
+        comm=comm,
+    )
+    elements_per_file = snap_file.get_elements_per_file(
+        "ParticleIDs", group="PartType4"
+    )
+    output = {"BirthHaloCatalogueIndex": star_birth_ids}
+    attrs = {"BirthHaloCatalogueIndex": {"Description": "TODO"}}
+    attrs["BirthHaloCatalogueIndex"].update(unit_attrs)
+    output_filename = (
+        args.output_basename.format(snap_nr=args.final_snap_nr) + ".{file_nr}.hdf5"
+    )
+    if comm_rank == 0:
+        output_dir = os.path.dirname(output_filename)
+        os.makedirs(output_dir, exist_ok=True)
+    comm.barrier()
+
+    snap_file.write(
+        output,
+        elements_per_file,
+        filenames=output_filename,
+        mode="w",
+        group="PartType4",
+        attrs=attrs,
+    )
+
+    comm.barrier()
+    mpi_print(f"Runtime: {datetime.datetime.now() - start_time}", comm_rank)
+    mpi_print("Done!", comm_rank)
+

From 03679bbe3984641a3601da47afdac30f05087eaa Mon Sep 17 00:00:00 2001
From: robjmcgibbon <robjmcgibbon@gmail.com>
Date: Wed, 14 Jan 2026 09:55:12 +0000
Subject: [PATCH 5/9] Identify gas progenitor

---
 misc/compute_BirthHaloCatalogueIndex.py | 106 ++++++++++++++++--------
 1 file changed, 70 insertions(+), 36 deletions(-)

diff --git a/misc/compute_BirthHaloCatalogueIndex.py b/misc/compute_BirthHaloCatalogueIndex.py
index 3f458f48..5fbed1d0 100644
--- a/misc/compute_BirthHaloCatalogueIndex.py
+++ b/misc/compute_BirthHaloCatalogueIndex.py
@@ -1,25 +1,21 @@
 #!/bin/env python
 
 """
-# TODO: Update
-match_group_membership.py
+compute_BirthHaloCatalogueIndex.py
 
-This script matches halos between different simulations run from the
-same initial conditions.
+This script produces an auxiliary snapshot which contains the subhalo
+id each star was part of when it first formed.
 
 Usage:
 
-  mpirun -- python -u misc/match_group_membership \
-            --snap-basename1 SNAP_BASENAME1 \
-            --snap-basename2 SNAP_BASENAME2 \
-            --membership-basename1 MEMBERSHIP_BASENAME1 \
-            --membership-basename2 MEMBERSHIP_BASENAME2 \
-            --catalogue-filename1 CATALOGUE_FILENAME1 \
-            --catalogue-filename2 CATALOGUE_FILENAME2 \
-            --output-filename OUTPUT_FILENAME
+  mpirun -- python -u misc/compute_BirthHaloCatalogueIndex.py \
+            --snap-basename SNAP_BASENAME \
+            --membership-basename MEMBERSHIP_BASENAME \
+            --output-basename OUTPUT_FILENAME \
+            --final-snap-nr FINAL_SNAP_NR
 
-Run "python misc/match_group_membership.py -h" for a discription
-of the optional arguments.
+Run "python misc/compute_BirthHaloCatalogueIndex.py -h" for a full description
+of the arguments, and a list of optional arguments.
 
 """
 
@@ -77,12 +73,12 @@ def load_particle_data(snap_basename, membership_basename, load_gas, comm):
     if load_gas:
         assert (
             particle_data["PartType0/GroupNr_bound"].shape
-            == particle_data["PartType0/ParticleIds"].shape
+            == particle_data["PartType0/ParticleIDs"].shape
         )
 
     return particle_data
 
-
+# Units for the dimensionless fields we will be saving
 unit_attrs = {
     "Conversion factor to CGS (not including cosmological corrections)": [1.0],
     "Conversion factor to physical CGS (including cosmological corrections)": [1.0],
@@ -137,11 +133,9 @@ def mpi_print(string, comm_rank):
         type=int,
         required=True,
         help=(
-            # TODO:
             "Snapshot at which to load the particles"
         ),
     )
-    # TODO: Skip check gas particles
     parser.add_argument(
         "--calculate-PreBirthHaloCatalogueIndex",
         action="store_true",
@@ -171,14 +165,20 @@ def mpi_print(string, comm_rank):
     star_particle_ids = particle_data["PartType4/ParticleIDs"]
     star_birth_ids = particle_data["PartType4/GroupNr_bound"]
     star_birth_ids[:] = -99
+    star_first_snapshot = np.copy(star_birth_ids)
 
-    # TODO:
-    # For the loop we need gas particles from the previous snapshot
-    # if args.calculate_PreBirthHaloCatalogueIndex:
-    # star_prebirth_ids = np.copy(star_birth_ids)
+    if args.calculate_PreBirthHaloCatalogueIndex:
+        particle_data['PartType0/ParticleIDs'] = np.ones(0)
+        particle_data['PartType0/GroupNr_bound'] = np.ones(0)
+        star_prebirth_ids = np.copy(star_birth_ids)
 
     for snap_nr in range(0, args.final_snap_nr + 1):
+
         mpi_print(f"Loading data from snapshot {snap_nr}", comm_rank)
+        if args.calculate_PreBirthHaloCatalogueIndex:
+            # We need to keep the gas IDs from snapshot N-1
+            gas_particle_ids = particle_data['PartType0/ParticleIDs']
+            gas_group_nr = particle_data['PartType0/GroupNr_bound']
         snap_basename = args.snap_basename.format(snap_nr=snap_nr)
         membership_basename = args.membership_basename.format(snap_nr=snap_nr)
         particle_data = load_particle_data(
@@ -188,6 +188,7 @@ def mpi_print(string, comm_rank):
             comm,
         )
 
+        mpi_print(f"Matching stars", comm_rank)
         # It would be quicker to make use of the BirthScaleFactors
         # instead of checking all stars
         idx = psort.parallel_match(
@@ -202,28 +203,50 @@ def mpi_print(string, comm_rank):
             comm=comm,
         )
 
-        # TODO:
-        # if args.calculate_PreBirthHaloCatalogueIndex:
-
         has_new_birth_id = star_birth_ids == -99
         has_new_birth_id[has_new_birth_id] = idx != -1
         star_birth_ids[has_new_birth_id] = new_birth_ids
+        star_first_snapshot[has_new_birth_id] = snap_nr
+
+        if args.calculate_PreBirthHaloCatalogueIndex:
+            mpi_print(f"Matching gas", comm_rank)
+            # Identify the gas progenitor of the newly formed stars
+            gas_idx = psort.parallel_match(
+                star_particle_ids[has_new_birth_id],
+                gas_particle_ids,
+                comm=comm,
+            )
+            # The gas progenitor may not exist for all stars due
+            # to particle splitting. Note this information is
+            # recoverable if required by using the SplitTrees.
+            new_prebirth_ids = -99 * np.ones_like(new_birth_ids)
+            new_prebirth_ids[gas_idx != -1] = psort.fetch_elements(
+                gas_group_nr,
+                gas_idx[gas_idx != -1],
+                comm=comm,
+            )
+            star_prebirth_ids[has_new_birth_id] = new_prebirth_ids
 
     # Check we found a value for every star
     assert np.sum(star_birth_ids == -99) == 0
 
-    mpi_print("Writing output", comm_rank)
-    snap_file = phdf5.MultiFile(
-        final_snap_basename + ".{file_nr}.hdf5",
-        file_nr_attr=("Header", "NumFilesPerSnapshot"),
-        comm=comm,
-    )
-    elements_per_file = snap_file.get_elements_per_file(
-        "ParticleIDs", group="PartType4"
-    )
-    output = {"BirthHaloCatalogueIndex": star_birth_ids}
-    attrs = {"BirthHaloCatalogueIndex": {"Description": "TODO"}}
+    # Set up what we want to output
+    output = {
+        "BirthHaloCatalogueIndex": star_birth_ids,
+        "FirstSnapshot": star_first_snapshot,
+    }
+    attrs = {
+        "BirthHaloCatalogueIndex": {"Description": "The HaloCatalogueIndex of this particle at the first snapshot it appeared."},
+        "FirstSnapshot": {"Description": "Index of the first simulation snapshot in which the star particle is present."},
+    }
     attrs["BirthHaloCatalogueIndex"].update(unit_attrs)
+    attrs["FirstSnapshot"].update(unit_attrs)
+    if args.calculate_PreBirthHaloCatalogueIndex:
+        output["PreBirthHaloCatalogueIndex"] = star_prebirth_ids
+        attrs["PreBirthHaloCatalogueIndex"] = {"Description": "The HaloCatalogueIndex of gas prognitor at the snapshot before the star formed. -99 if no gas progenitor is found."}
+        attrs["PreBirthHaloCatalogueIndex"].update(unit_attrs)
+
+    # Check the output directory exists
     output_filename = (
         args.output_basename.format(snap_nr=args.final_snap_nr) + ".{file_nr}.hdf5"
     )
@@ -232,6 +255,16 @@ def mpi_print(string, comm_rank):
         os.makedirs(output_dir, exist_ok=True)
     comm.barrier()
 
+    # Write the output
+    mpi_print("Writing output", comm_rank)
+    snap_file = phdf5.MultiFile(
+        final_snap_basename + ".{file_nr}.hdf5",
+        file_nr_attr=("Header", "NumFilesPerSnapshot"),
+        comm=comm,
+    )
+    elements_per_file = snap_file.get_elements_per_file(
+        "ParticleIDs", group="PartType4"
+    )
     snap_file.write(
         output,
         elements_per_file,
@@ -241,6 +274,7 @@ def mpi_print(string, comm_rank):
         attrs=attrs,
     )
 
+    # Finished
     comm.barrier()
     mpi_print(f"Runtime: {datetime.datetime.now() - start_time}", comm_rank)
     mpi_print("Done!", comm_rank)

From 43e709393b292e0d14a5c7f2dfa5384d8614c226 Mon Sep 17 00:00:00 2001
From: robjmcgibbon <robjmcgibbon@gmail.com>
Date: Wed, 14 Jan 2026 09:59:23 +0000
Subject: [PATCH 6/9] Add ExSitu fraction property to SOAP

---
 .../particle_selection/aperture_properties.py |  19 +++
 SOAP/property_table.py                        |  18 ++-
 parameter_files/COLIBRE_HYBRID.yml            |   1 +
 parameter_files/COLIBRE_THERMAL.yml           |   1 +
 parameter_files/ExSitu.yml                    | 111 ++++++++++++++++++
 5 files changed, 149 insertions(+), 1 deletion(-)
 create mode 100644 parameter_files/ExSitu.yml

diff --git a/SOAP/particle_selection/aperture_properties.py b/SOAP/particle_selection/aperture_properties.py
index 5d0cbe4b..3a424a42 100644
--- a/SOAP/particle_selection/aperture_properties.py
+++ b/SOAP/particle_selection/aperture_properties.py
@@ -760,6 +760,24 @@ def TotalSNIaRate(self) -> unyt.unyt_quantity:
             self.star_mask_ap
         ].sum()
 
+    @lazy_property
+    def ExSituFraction(self) -> unyt.unyt_quantity:
+        """
+        Mass fraction of bound stars that formed in a different subhalo.
+        """
+        if self.Nstar == 0:
+            return None
+
+        group_nr = self.get_dataset("PartType4/GroupNr_bound")[self.star_mask_all][
+            self.star_mask_ap
+        ]
+        birth_group_nr = self.get_dataset("PartType4/BirthHaloCatalogueIndex")[self.star_mask_all][
+            self.star_mask_ap
+        ]
+        ex_situ = group_nr != birth_group_nr
+
+        return self.star_mass_fraction[ex_situ].sum()
+
     @lazy_property
     def bh_mask_all(self) -> NDArray[bool]:
         """
@@ -3788,6 +3806,7 @@ class ApertureProperties(HaloProperty):
         "stellar_age_mw": False,
         "stellar_age_lw": False,
         "TotalSNIaRate": False,
+        "ExSituFraction": False,
         "HydrogenMass": False,
         "HeliumMass": False,
         "MolecularHydrogenMass": False,
diff --git a/SOAP/property_table.py b/SOAP/property_table.py
index 583f291f..1867517a 100644
--- a/SOAP/property_table.py
+++ b/SOAP/property_table.py
@@ -1905,6 +1905,22 @@ class PropertyTable:
             output_physical=True,
             a_scale_exponent=0,
         ),
+        "ExSituFraction": Property(
+            name="ExSituFraction",
+            shape=1,
+            dtype=np.float32,
+            unit="dimensionless",
+            description="Mass fraction of bound stars that formed in a different subhalo",
+            lossy_compression_filter="FMantissa9",
+            dmo_property=True,
+            particle_properties=[
+                "PartType4/Masses",
+                "PartType4/BirthHaloCatalogueIndex",
+                "PartType4/GroupNr_bound",
+            ],
+            output_physical=True,
+            a_scale_exponent=0,
+        ),
         "Mgas": Property(
             name="GasMass",
             shape=1,
@@ -5134,7 +5150,7 @@ def generate_tex_files(self, output_dir: str):
         # standalone table file footer
         tailstr = "\\end{document}"
 
-        # generate the auxilary documentation files
+        # generate the auxiliary documentation files
         with open(f"{output_dir}/timestamp.tex", "w") as ofile:
             ofile.write(get_version_string())
         with open(f"{output_dir}/table.tex", "w") as ofile:
diff --git a/parameter_files/COLIBRE_HYBRID.yml b/parameter_files/COLIBRE_HYBRID.yml
index 0f9b3f6e..343be68d 100644
--- a/parameter_files/COLIBRE_HYBRID.yml
+++ b/parameter_files/COLIBRE_HYBRID.yml
@@ -244,6 +244,7 @@ ApertureProperties:
     StellarCylindricalVelocityDispersionLuminosityWeighted: false
     StellarCylindricalVelocityDispersionVerticalLuminosityWeighted: false
     StellarCylindricalVelocityDispersionDiscPlaneLuminosityWeighted: false
+    ExSituFraction: false
     TotalMass: true
     TotalSNIaRate: true
     GasMassInColdDenseDiffuseMetals:
diff --git a/parameter_files/COLIBRE_THERMAL.yml b/parameter_files/COLIBRE_THERMAL.yml
index 403267a5..9d93c947 100644
--- a/parameter_files/COLIBRE_THERMAL.yml
+++ b/parameter_files/COLIBRE_THERMAL.yml
@@ -244,6 +244,7 @@ ApertureProperties:
     StellarCylindricalVelocityDispersionDiscPlaneLuminosityWeighted: false
     StellarRotationalVelocity: false
     StellarRotationalVelocityLuminosityWeighted: false
+    ExSituFraction: false
     TotalMass: true
     TotalSNIaRate: true
     GasMassInColdDenseDiffuseMetals:
diff --git a/parameter_files/ExSitu.yml b/parameter_files/ExSitu.yml
new file mode 100644
index 00000000..47adf469
--- /dev/null
+++ b/parameter_files/ExSitu.yml
@@ -0,0 +1,111 @@
+# Values in this section are substituted into the other sections
+Parameters:
+  sim_dir: /cosma8/data/dp004/jlvc76/COLIBRE/ScienceRuns
+  output_dir: /snap8/scratch/dp004/dc-mcgi1/soap_BirthTrackId
+  scratch_dir: /snap8/scratch/dp004/dc-mcgi1/soap_BirthTrackId
+
+# Location of the Swift snapshots:
+Snapshots:
+  filename: "{sim_dir}/{sim_name}/snapshots/colibre_{snap_nr:04d}/colibre_{snap_nr:04d}.{file_nr}.hdf5"
+
+# Which halo finder we're using, and base name for halo finder output files
+HaloFinder:
+  type: HBTplus
+  filename: "{sim_dir}/{sim_name}/HBT-HERONS/sorted_catalogues/OrderedSubSnap_{snap_nr:03d}.hdf5"
+  # fof_filename: "{sim_dir}/{sim_name}/fof/fof_output_{snap_nr:04d}.hdf5"
+  # fof_radius_filename: "{sim_dir}/{sim_name}/fof/fof_output_{snap_nr:04d}.hdf5"
+  read_potential_energies: true
+  #type: VR
+  #filename: "{sim_dir}/halo_{snap_nr:04d}"
+  #type: Subfind
+  #filename: "{sim_dir}/snapdir_{snap_nr:03d}/snapshot_{snap_nr:03d}"
+
+GroupMembership:
+  # Where to write the group membership files
+  filename: "{sim_dir}/{sim_name}/SOAP-HBT/membership_{snap_nr:04d}/membership_{snap_nr:04d}.{file_nr}.hdf5"
+
+ExtraInput:
+  situ: "{sim_dir}/{sim_name}/SOAP_BirthTrackId/test_{snap_nr:04d}/test_{snap_nr:04d}.{file_nr}.hdf5"
+
+HaloProperties:
+  # Where to write the halo properties file
+  filename: "{output_dir}/{sim_name}/SOAP_uncompressed/halo_properties_{snap_nr:04d}.hdf5"
+  # Where to write temporary chunk output
+  chunk_dir: "{scratch_dir}/{sim_name}/SOAP-tmp/"
+
+ApertureProperties:
+  properties:
+    StellarMass: true
+    ExSituFraction: true
+  variations:
+    exclusive_50_kpc:
+      inclusive: false
+      radius_in_kpc: 50.0
+ProjectedApertureProperties:
+  properties:
+    {}
+  variations:
+    {}
+SOProperties:
+  properties:
+    {}
+  variations:
+    {}
+SubhaloProperties:
+  properties:
+    EncloseRadius: true
+    TotalMass: true
+    NumberOfBlackHoleParticles: true
+    NumberOfDarkMatterParticles: true
+    NumberOfGasParticles: true
+    NumberOfStarParticles: true
+aliases:
+  PartType0/LastSNIIKineticFeedbackDensities: PartType0/DensitiesAtLastSupernovaEvent
+  PartType0/LastSNIIThermalFeedbackDensities: PartType0/DensitiesAtLastSupernovaEvent
+  snipshot:
+    PartType0/SpeciesFractions: PartType0/ReducedSpeciesFractions
+    PartType0/ElementMassFractions: PartType0/ReducedElementMassFractions
+    PartType0/LastSNIIKineticFeedbackDensities: PartType0/DensitiesAtLastSupernovaEvent
+    PartType0/LastSNIIThermalFeedbackDensities: PartType0/DensitiesAtLastSupernovaEvent
+filters:
+  general:
+    limit: 100
+    properties:
+      - BoundSubhalo/NumberOfGasParticles
+      - BoundSubhalo/NumberOfDarkMatterParticles
+      - BoundSubhalo/NumberOfStarParticles
+      - BoundSubhalo/NumberOfBlackHoleParticles
+    combine_properties: sum
+  baryon:
+    limit: 0
+    properties:
+      - BoundSubhalo/NumberOfGasParticles
+      - BoundSubhalo/NumberOfStarParticles
+    combine_properties: sum
+  dm:
+    limit: 0
+    properties:
+      - BoundSubhalo/NumberOfDarkMatterParticles
+  gas:
+    limit: 0
+    properties:
+      - BoundSubhalo/NumberOfGasParticles
+  star:
+    limit: 0
+    properties:
+      - BoundSubhalo/NumberOfStarParticles
+defined_constants:
+  O_H_sun: 4.9e-4
+  Fe_H_sun: 3.16e-5
+  N_O_sun: 0.138
+  C_O_sun: 0.549
+  Mg_H_sun: 3.98e-5
+calculations:
+  calculate_missing_properties: false
+  strict_halo_copy: false
+  recently_heated_gas_filter:
+    delta_time_myr: 15
+    use_AGN_delta_T: false
+  cold_dense_gas_filter:
+    maximum_temperature_K: 3.16e4
+    minimum_hydrogen_number_density_cm3: 0.1

From 257b0be53c971900a0d6000c942f4bede2bcf7cd Mon Sep 17 00:00:00 2001
From: robjmcgibbon <robjmcgibbon@gmail.com>
Date: Wed, 14 Jan 2026 10:22:22 +0000
Subject: [PATCH 7/9] Add sbatch script

---
 scripts/COLIBRE/compute_birth_index.sh | 31 ++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)
 create mode 100755 scripts/COLIBRE/compute_birth_index.sh

diff --git a/scripts/COLIBRE/compute_birth_index.sh b/scripts/COLIBRE/compute_birth_index.sh
new file mode 100755
index 00000000..bf777fef
--- /dev/null
+++ b/scripts/COLIBRE/compute_birth_index.sh
@@ -0,0 +1,31 @@
+#!/bin/bash -l
+
+#SBATCH --cpus-per-task=1
+#SBATCH -o ./logs/birth_track_id_%j.out
+#SBATCH -p cosma8
+#SBATCH -A dp004
+#SBATCH -J BirthHaloCatalogueIndex
+#SBATCH --nodes=1
+#SBATCH -t 12:00:00
+# N0752: 1 node, 2 hours
+# N1504: 1 node, 12 hours
+# N3008: 4 nodes, 24 hours
+
+set -e
+
+base_dir="/cosma8/data/dp004/jlvc76/COLIBRE/ScienceRuns"
+sim="L0400N3008/Thermal"
+output_dir="/cosma8/data/dp004/dc-mcgi1/COLIBRE/BirthHaloCatalogueIndex"
+snapnum="0127"
+
+snap_basename="${base_dir}/${sim}/snapshots/colibre_{snap_nr:04d}/colibre_{snap_nr:04d}"
+membership_basename="${base_dir}/${sim}/SOAP-HBT/membership_{snap_nr:04d}/membership_{snap_nr:04d}"
+output_basename="${output_dir}/${sim}/SOAP_BirthTrackId/birth_${snapnum}/birth_${snapnum}"
+
+mpirun -- python misc/compute_BirthHaloCatalogueIndex.py \
+  --snap-basename ${snap_basename} \
+  --membership-basename ${membership_basename} \
+  --output-basename ${output_basename} \
+  --final-snap-nr ${snapnum} \
+  --calculate-PreBirthHaloCatalogueIndex
+

From 2fa51f47c36a7e07628ada2d428c92c4522c5c2d Mon Sep 17 00:00:00 2001
From: robjmcgibbon <robjmcgibbon@gmail.com>
Date: Wed, 14 Jan 2026 13:15:44 +0000
Subject: [PATCH 8/9] Remove extra parameter file

---
 parameter_files/ExSitu.yml | 111 -------------------------------------
 1 file changed, 111 deletions(-)
 delete mode 100644 parameter_files/ExSitu.yml

diff --git a/parameter_files/ExSitu.yml b/parameter_files/ExSitu.yml
deleted file mode 100644
index 47adf469..00000000
--- a/parameter_files/ExSitu.yml
+++ /dev/null
@@ -1,111 +0,0 @@
-# Values in this section are substituted into the other sections
-Parameters:
-  sim_dir: /cosma8/data/dp004/jlvc76/COLIBRE/ScienceRuns
-  output_dir: /snap8/scratch/dp004/dc-mcgi1/soap_BirthTrackId
-  scratch_dir: /snap8/scratch/dp004/dc-mcgi1/soap_BirthTrackId
-
-# Location of the Swift snapshots:
-Snapshots:
-  filename: "{sim_dir}/{sim_name}/snapshots/colibre_{snap_nr:04d}/colibre_{snap_nr:04d}.{file_nr}.hdf5"
-
-# Which halo finder we're using, and base name for halo finder output files
-HaloFinder:
-  type: HBTplus
-  filename: "{sim_dir}/{sim_name}/HBT-HERONS/sorted_catalogues/OrderedSubSnap_{snap_nr:03d}.hdf5"
-  # fof_filename: "{sim_dir}/{sim_name}/fof/fof_output_{snap_nr:04d}.hdf5"
-  # fof_radius_filename: "{sim_dir}/{sim_name}/fof/fof_output_{snap_nr:04d}.hdf5"
-  read_potential_energies: true
-  #type: VR
-  #filename: "{sim_dir}/halo_{snap_nr:04d}"
-  #type: Subfind
-  #filename: "{sim_dir}/snapdir_{snap_nr:03d}/snapshot_{snap_nr:03d}"
-
-GroupMembership:
-  # Where to write the group membership files
-  filename: "{sim_dir}/{sim_name}/SOAP-HBT/membership_{snap_nr:04d}/membership_{snap_nr:04d}.{file_nr}.hdf5"
-
-ExtraInput:
-  situ: "{sim_dir}/{sim_name}/SOAP_BirthTrackId/test_{snap_nr:04d}/test_{snap_nr:04d}.{file_nr}.hdf5"
-
-HaloProperties:
-  # Where to write the halo properties file
-  filename: "{output_dir}/{sim_name}/SOAP_uncompressed/halo_properties_{snap_nr:04d}.hdf5"
-  # Where to write temporary chunk output
-  chunk_dir: "{scratch_dir}/{sim_name}/SOAP-tmp/"
-
-ApertureProperties:
-  properties:
-    StellarMass: true
-    ExSituFraction: true
-  variations:
-    exclusive_50_kpc:
-      inclusive: false
-      radius_in_kpc: 50.0
-ProjectedApertureProperties:
-  properties:
-    {}
-  variations:
-    {}
-SOProperties:
-  properties:
-    {}
-  variations:
-    {}
-SubhaloProperties:
-  properties:
-    EncloseRadius: true
-    TotalMass: true
-    NumberOfBlackHoleParticles: true
-    NumberOfDarkMatterParticles: true
-    NumberOfGasParticles: true
-    NumberOfStarParticles: true
-aliases:
-  PartType0/LastSNIIKineticFeedbackDensities: PartType0/DensitiesAtLastSupernovaEvent
-  PartType0/LastSNIIThermalFeedbackDensities: PartType0/DensitiesAtLastSupernovaEvent
-  snipshot:
-    PartType0/SpeciesFractions: PartType0/ReducedSpeciesFractions
-    PartType0/ElementMassFractions: PartType0/ReducedElementMassFractions
-    PartType0/LastSNIIKineticFeedbackDensities: PartType0/DensitiesAtLastSupernovaEvent
-    PartType0/LastSNIIThermalFeedbackDensities: PartType0/DensitiesAtLastSupernovaEvent
-filters:
-  general:
-    limit: 100
-    properties:
-      - BoundSubhalo/NumberOfGasParticles
-      - BoundSubhalo/NumberOfDarkMatterParticles
-      - BoundSubhalo/NumberOfStarParticles
-      - BoundSubhalo/NumberOfBlackHoleParticles
-    combine_properties: sum
-  baryon:
-    limit: 0
-    properties:
-      - BoundSubhalo/NumberOfGasParticles
-      - BoundSubhalo/NumberOfStarParticles
-    combine_properties: sum
-  dm:
-    limit: 0
-    properties:
-      - BoundSubhalo/NumberOfDarkMatterParticles
-  gas:
-    limit: 0
-    properties:
-      - BoundSubhalo/NumberOfGasParticles
-  star:
-    limit: 0
-    properties:
-      - BoundSubhalo/NumberOfStarParticles
-defined_constants:
-  O_H_sun: 4.9e-4
-  Fe_H_sun: 3.16e-5
-  N_O_sun: 0.138
-  C_O_sun: 0.549
-  Mg_H_sun: 3.98e-5
-calculations:
-  calculate_missing_properties: false
-  strict_halo_copy: false
-  recently_heated_gas_filter:
-    delta_time_myr: 15
-    use_AGN_delta_T: false
-  cold_dense_gas_filter:
-    maximum_temperature_K: 3.16e4
-    minimum_hydrogen_number_density_cm3: 0.1

From 6a1d3984f771f3118d2385f309aa7f8dfa0586f1 Mon Sep 17 00:00:00 2001
From: robjmcgibbon <robjmcgibbon@gmail.com>
Date: Wed, 14 Jan 2026 13:30:25 +0000
Subject: [PATCH 9/9] Format

---
 .../particle_selection/aperture_properties.py |  6 ++---
 misc/compute_BirthHaloCatalogueIndex.py       | 26 +++++++++++--------
 2 files changed, 18 insertions(+), 14 deletions(-)

diff --git a/SOAP/particle_selection/aperture_properties.py b/SOAP/particle_selection/aperture_properties.py
index 3a424a42..a14c9be1 100644
--- a/SOAP/particle_selection/aperture_properties.py
+++ b/SOAP/particle_selection/aperture_properties.py
@@ -771,9 +771,9 @@ def ExSituFraction(self) -> unyt.unyt_quantity:
         group_nr = self.get_dataset("PartType4/GroupNr_bound")[self.star_mask_all][
             self.star_mask_ap
         ]
-        birth_group_nr = self.get_dataset("PartType4/BirthHaloCatalogueIndex")[self.star_mask_all][
-            self.star_mask_ap
-        ]
+        birth_group_nr = self.get_dataset("PartType4/BirthHaloCatalogueIndex")[
+            self.star_mask_all
+        ][self.star_mask_ap]
         ex_situ = group_nr != birth_group_nr
 
         return self.star_mass_fraction[ex_situ].sum()
diff --git a/misc/compute_BirthHaloCatalogueIndex.py b/misc/compute_BirthHaloCatalogueIndex.py
index 5fbed1d0..8926db16 100644
--- a/misc/compute_BirthHaloCatalogueIndex.py
+++ b/misc/compute_BirthHaloCatalogueIndex.py
@@ -78,6 +78,7 @@ def load_particle_data(snap_basename, membership_basename, load_gas, comm):
 
     return particle_data
 
+
 # Units for the dimensionless fields we will be saving
 unit_attrs = {
     "Conversion factor to CGS (not including cosmological corrections)": [1.0],
@@ -132,9 +133,7 @@ def mpi_print(string, comm_rank):
         "--final-snap-nr",
         type=int,
         required=True,
-        help=(
-            "Snapshot at which to load the particles"
-        ),
+        help=("Snapshot at which to load the particles"),
     )
     parser.add_argument(
         "--calculate-PreBirthHaloCatalogueIndex",
@@ -168,8 +167,8 @@ def mpi_print(string, comm_rank):
     star_first_snapshot = np.copy(star_birth_ids)
 
     if args.calculate_PreBirthHaloCatalogueIndex:
-        particle_data['PartType0/ParticleIDs'] = np.ones(0)
-        particle_data['PartType0/GroupNr_bound'] = np.ones(0)
+        particle_data["PartType0/ParticleIDs"] = np.ones(0)
+        particle_data["PartType0/GroupNr_bound"] = np.ones(0)
         star_prebirth_ids = np.copy(star_birth_ids)
 
     for snap_nr in range(0, args.final_snap_nr + 1):
@@ -177,8 +176,8 @@ def mpi_print(string, comm_rank):
         mpi_print(f"Loading data from snapshot {snap_nr}", comm_rank)
         if args.calculate_PreBirthHaloCatalogueIndex:
             # We need to keep the gas IDs from snapshot N-1
-            gas_particle_ids = particle_data['PartType0/ParticleIDs']
-            gas_group_nr = particle_data['PartType0/GroupNr_bound']
+            gas_particle_ids = particle_data["PartType0/ParticleIDs"]
+            gas_group_nr = particle_data["PartType0/GroupNr_bound"]
         snap_basename = args.snap_basename.format(snap_nr=snap_nr)
         membership_basename = args.membership_basename.format(snap_nr=snap_nr)
         particle_data = load_particle_data(
@@ -236,14 +235,20 @@ def mpi_print(string, comm_rank):
         "FirstSnapshot": star_first_snapshot,
     }
     attrs = {
-        "BirthHaloCatalogueIndex": {"Description": "The HaloCatalogueIndex of this particle at the first snapshot it appeared."},
-        "FirstSnapshot": {"Description": "Index of the first simulation snapshot in which the star particle is present."},
+        "BirthHaloCatalogueIndex": {
+            "Description": "The HaloCatalogueIndex of this particle at the first snapshot it appeared."
+        },
+        "FirstSnapshot": {
+            "Description": "Index of the first simulation snapshot in which the star particle is present."
+        },
     }
     attrs["BirthHaloCatalogueIndex"].update(unit_attrs)
     attrs["FirstSnapshot"].update(unit_attrs)
     if args.calculate_PreBirthHaloCatalogueIndex:
         output["PreBirthHaloCatalogueIndex"] = star_prebirth_ids
-        attrs["PreBirthHaloCatalogueIndex"] = {"Description": "The HaloCatalogueIndex of gas prognitor at the snapshot before the star formed. -99 if no gas progenitor is found."}
+        attrs["PreBirthHaloCatalogueIndex"] = {
+            "Description": "The HaloCatalogueIndex of gas prognitor at the snapshot before the star formed. -99 if no gas progenitor is found."
+        }
         attrs["PreBirthHaloCatalogueIndex"].update(unit_attrs)
 
     # Check the output directory exists
@@ -278,4 +283,3 @@ def mpi_print(string, comm_rank):
     comm.barrier()
     mpi_print(f"Runtime: {datetime.datetime.now() - start_time}", comm_rank)
     mpi_print("Done!", comm_rank)
-