From daa8ce1c6e9df2de80a9a751e8a96d0a9869efc5 Mon Sep 17 00:00:00 2001 From: robjmcgibbon Date: Wed, 7 Jan 2026 11:28:41 +0000 Subject: [PATCH 1/9] Virtual v1 --- compression/make_virtual_snapshot.py | 37 +++++++++++++++------------- 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/compression/make_virtual_snapshot.py b/compression/make_virtual_snapshot.py index 5f6e37f8..dc17eca1 100644 --- a/compression/make_virtual_snapshot.py +++ b/compression/make_virtual_snapshot.py @@ -5,15 +5,24 @@ import shutil -def make_virtual_snapshot(snapshot, membership, output_file, snap_nr): +def make_virtual_snapshot(snapshot, auxilary_snapshots, output_file, snap_nr): """ - Given a FLAMINGO snapshot and group membership files, - create a new virtual snapshot with group info. + Given a snapshot and auxilary files, create + a new virtual snapshot with all datasets combine. """ - # Check which datasets exist in the membership files + # Copy the input virtual snapshot to the output + shutil.copyfile(snapshot, output_file) + + # Open the output file + outfile = h5py.File(output_file, "r+") + + # TODO: Loop + auxilary = auxilary_snapshots[0] + + # Check which datasets exist in the auxilary files # and store their attributes and datatype - filename = membership.format(file_nr=0, snap_nr=snap_nr) + filename = auxilary.format(file_nr=0, snap_nr=snap_nr) dset_attrs = {} dset_dtype = {} with h5py.File(filename, "r") as infile: @@ -26,32 +35,26 @@ def make_virtual_snapshot(snapshot, membership, output_file, snap_nr): attrs = dict(infile[f"PartType{ptype}/{dset}"].attrs) dtype = infile[f"PartType{ptype}/{dset}"].dtype - # Some membership files are missing these attributes + # Some auxilary files are missing these attributes if not "Value stored as physical" in attrs: print(f"Setting comoving attrs for PartType{ptype}/{dset}") attrs["Value stored as physical"] = [1] attrs["Property can be converted to comoving"] = [0] - # Add a flag that these are stored in the membership files + # Add a flag that these datasets are stored in the auxilary files attrs["Auxilary file"] = [1] # Store the values we need for later dset_attrs[f"PartType{ptype}"][dset] = attrs dset_dtype[f"PartType{ptype}"][dset] = dtype - # Copy the input virtual snapshot to the output - shutil.copyfile(snapshot, output_file) - - # Open the output file - outfile = h5py.File(output_file, "r+") - - # Loop over input membership files to get dataset shapes + # Loop over input auxilary files to get dataset shapes file_nr = 0 filenames = [] shapes = [] counts = [] while True: - filename = membership.format(file_nr=file_nr, snap_nr=snap_nr) + filename = auxilary.format(file_nr=file_nr, snap_nr=snap_nr) if os.path.exists(filename): filenames.append(filename) with h5py.File(filename, "r") as infile: @@ -73,7 +76,7 @@ def make_virtual_snapshot(snapshot, membership, output_file, snap_nr): break file_nr += 1 if file_nr == 0: - raise IOError(f"Failed to find files matching: {membership}") + raise IOError(f"Failed to find files matching: {auxilary}") # Loop over particle types in the output for ptype in range(7): @@ -174,7 +177,7 @@ def make_virtual_snapshot(snapshot, membership, output_file, snap_nr): output_file = args.output_file.format(snap_nr=args.snap_nr) # Make a new virtual snapshot with group info - make_virtual_snapshot(virtual_snapshot, args.membership, output_file, args.snap_nr) + make_virtual_snapshot(virtual_snapshot, [args.membership], output_file, args.snap_nr) # Set file paths for datasets abs_snapshot_dir = os.path.abspath(os.path.dirname(virtual_snapshot)) From 688baad5cf99671f109dba637b3de48121ee9b5d Mon Sep 17 00:00:00 2001 From: robjmcgibbon Date: Wed, 7 Jan 2026 15:32:43 +0000 Subject: [PATCH 2/9] Combine update_vds_path with make_virtual_snapshot --- compression/make_virtual_snapshot.py | 353 ++++++++++++++++++--------- compression/update_vds_paths.py | 118 --------- 2 files changed, 233 insertions(+), 238 deletions(-) delete mode 100644 compression/update_vds_paths.py diff --git a/compression/make_virtual_snapshot.py b/compression/make_virtual_snapshot.py index dc17eca1..934b92cc 100644 --- a/compression/make_virtual_snapshot.py +++ b/compression/make_virtual_snapshot.py @@ -3,12 +3,74 @@ import os.path import h5py import shutil +import numpy as np -def make_virtual_snapshot(snapshot, auxilary_snapshots, output_file, snap_nr): +class SafeDict(dict): + def __missing__(self, key): + # Return the key back in braces so it remains in the string + return "{" + key + "}" + + +def update_vds_paths(dset, modify_function): + """ + Modify the virtual paths of the specified dataset + + Note that querying the source dataspace and selection does not appear + to work (invalid pointer error from h5py) so here we assume that we're + referencing all of the source dataspace, which is correct for SWIFT + snapshots. + + dset: a h5py.Dataset object + modify_function: a function which takes the old path as its argument and + returns the new path + """ + + # Choose a temporary path for the new virtual dataset + path = dset.name + tmp_path = dset.name + ".__tmp__" + + # Build the creation property list for the new dataset + plist = h5py.h5p.create(h5py.h5p.DATASET_CREATE) + for vs in dset.virtual_sources(): + bounds = vs.vspace.get_select_bounds() + if bounds is not None: + lower, upper = bounds + size = np.asarray(upper, dtype=int) - np.asarray(lower, dtype=int) + 1 + src_space = h5py.h5s.create_simple(tuple(size)) + new_name = modify_function(vs.file_name) + plist.set_virtual( + vs.vspace, new_name.encode(), vs.dset_name.encode(), src_space + ) + + # Create the new dataset + tmp_dset = h5py.h5d.create( + dset.file["/"].id, + tmp_path.encode(), + dset.id.get_type(), + dset.id.get_space(), + dcpl=plist, + ) + tmp_dset = h5py.Dataset(tmp_dset) + for attr_name in dset.attrs: + tmp_dset.attrs[attr_name] = dset.attrs[attr_name] + + # Rename the new dataset + f = dset.file + del f[path] + f[path] = f[tmp_path] + del f[tmp_path] + + +def make_virtual_snapshot(snapshot, auxilary_snapshots, output_file, absolute_paths=False): """ Given a snapshot and auxilary files, create a new virtual snapshot with all datasets combine. + + snapshot: Path to the snapshot file + auxilary_snapshots: List of auxiliary file patterns + output_file: Path to the output virtual snapshot + absolute_paths: If True, use absolute paths; if False, use relative paths """ # Copy the input virtual snapshot to the output @@ -17,114 +79,169 @@ def make_virtual_snapshot(snapshot, auxilary_snapshots, output_file, snap_nr): # Open the output file outfile = h5py.File(output_file, "r+") - # TODO: Loop - auxilary = auxilary_snapshots[0] + # Calculate directories for path updates + abs_snapshot_dir = os.path.abspath(os.path.dirname(snapshot)) + abs_auxilary_dirs = [ + os.path.abspath(os.path.dirname(aux.format(file_nr=0))) + for aux in auxilary_snapshots + ] + abs_output_dir = os.path.abspath(os.path.dirname(output_file)) + + if absolute_paths: + snapshot_dir = abs_snapshot_dir + auxilary_dirs = abs_auxilary_dirs + else: + snapshot_dir = os.path.relpath(abs_snapshot_dir, abs_output_dir) + auxilary_dirs = [ + os.path.relpath(aux_dir, abs_output_dir) + for aux_dir in abs_auxilary_dirs + ] + + # Create path replacement functions + def make_replace_path(target_dir): + def replace_path(old_path): + basename = os.path.basename(old_path) + return os.path.join(target_dir, basename) + return replace_path + + replace_snapshot_path = make_replace_path(snapshot_dir) + auxilary_path_replacers = [make_replace_path(d) for d in auxilary_dirs] + + all_auxilary_datasets = {} + + for aux_index, auxilary in enumerate(auxilary_snapshots): - # Check which datasets exist in the auxilary files - # and store their attributes and datatype - filename = auxilary.format(file_nr=0, snap_nr=snap_nr) - dset_attrs = {} - dset_dtype = {} - with h5py.File(filename, "r") as infile: + # Check which datasets exist in the auxilary files + # and store their attributes and datatype + filename = auxilary.format(file_nr=0) + dset_attrs = {} + dset_dtype = {} + with h5py.File(filename, "r") as infile: + for ptype in range(7): + if not f"PartType{ptype}" in infile: + continue + dset_attrs[f"PartType{ptype}"] = {} + dset_dtype[f"PartType{ptype}"] = {} + for dset in infile[f"PartType{ptype}"].keys(): + attrs = dict(infile[f"PartType{ptype}/{dset}"].attrs) + dtype = infile[f"PartType{ptype}/{dset}"].dtype + + # Some auxilary files are missing these attributes + if not "Value stored as physical" in attrs: + print(f"Setting comoving attrs for PartType{ptype}/{dset}") + attrs["Value stored as physical"] = [1] + attrs["Property can be converted to comoving"] = [0] + + # Add a flag that these datasets are stored in the auxilary files + attrs["Auxilary file"] = [1] + + # Store the values we need for later + dset_attrs[f"PartType{ptype}"][dset] = attrs + dset_dtype[f"PartType{ptype}"][dset] = dtype + + # Check we don't have this dataset in any of the other auxilary files + dset_path = f"PartType{ptype}/{dset}" + if dset_path in all_auxilary_datasets: + other_file = all_auxilary_datasets[f"PartType{ptype}/{dset}"] + raise ValueError(f"{dset_path} is in {auxilary} and {other_file}") + all_auxilary_datasets[dset_path] = auxilary + + # Loop over input auxilary files to get dataset shapes + file_nr = 0 + filenames = [] + shapes = [] + counts = [] + while True: + filename = auxilary.format(file_nr=file_nr) + if os.path.exists(filename): + filenames.append(filename) + with h5py.File(filename, "r") as infile: + shape = {} + count = {} + for ptype in range(7): + if f"PartType{ptype}" not in dset_attrs: + continue + shape[f"PartType{ptype}"] = {} + # Get the shape for each dataset + for dset in dset_attrs[f"PartType{ptype}"]: + s = infile[f"PartType{ptype}/{dset}"].shape + shape[f"PartType{ptype}"][dset] = s + # Get the number of particles in this chunk file + count[f"PartType{ptype}"] = s[0] + shapes.append(shape) + counts.append(count) + else: + break + file_nr += 1 + if file_nr == 0: + raise IOError(f"Failed to find files matching: {auxilary}") + + # Loop over particle types in the output for ptype in range(7): - if not f"PartType{ptype}" in infile: + if f"PartType{ptype}" not in dset_attrs: continue - dset_attrs[f"PartType{ptype}"] = {} - dset_dtype[f"PartType{ptype}"] = {} - for dset in infile[f"PartType{ptype}"].keys(): - attrs = dict(infile[f"PartType{ptype}/{dset}"].attrs) - dtype = infile[f"PartType{ptype}/{dset}"].dtype - - # Some auxilary files are missing these attributes - if not "Value stored as physical" in attrs: - print(f"Setting comoving attrs for PartType{ptype}/{dset}") - attrs["Value stored as physical"] = [1] - attrs["Property can be converted to comoving"] = [0] - - # Add a flag that these datasets are stored in the auxilary files - attrs["Auxilary file"] = [1] - - # Store the values we need for later - dset_attrs[f"PartType{ptype}"][dset] = attrs - dset_dtype[f"PartType{ptype}"][dset] = dtype - - # Loop over input auxilary files to get dataset shapes - file_nr = 0 - filenames = [] - shapes = [] - counts = [] - while True: - filename = auxilary.format(file_nr=file_nr, snap_nr=snap_nr) - if os.path.exists(filename): - filenames.append(filename) - with h5py.File(filename, "r") as infile: - shape = {} - count = {} - for ptype in range(7): - if f"PartType{ptype}" not in dset_attrs: - continue - shape[f"PartType{ptype}"] = {} - # Get the shape for each dataset - for dset in dset_attrs[f"PartType{ptype}"]: - s = infile[f"PartType{ptype}/{dset}"].shape - shape[f"PartType{ptype}"][dset] = s - # Get the number of particles in this chunk file - count[f"PartType{ptype}"] = s[0] - shapes.append(shape) - counts.append(count) - else: - break - file_nr += 1 - if file_nr == 0: - raise IOError(f"Failed to find files matching: {auxilary}") - - # Loop over particle types in the output - for ptype in range(7): - if f"PartType{ptype}" not in dset_attrs: - continue - - # Create virtual layout for new datasets - layouts = {} - nr_parts = sum([count[f"PartType{ptype}"] for count in counts]) - for dset in dset_attrs[f"PartType{ptype}"]: - full_shape = list(shapes[0][f"PartType{ptype}"][dset]) - full_shape[0] = nr_parts - full_shape = tuple(full_shape) - dtype = dset_dtype[f"PartType{ptype}"][dset] - layouts[dset] = h5py.VirtualLayout(shape=full_shape, dtype=dtype) - - # Loop over input files - offset = 0 - for filename, count, shape in zip(filenames, counts, shapes): - n_part = count[f"PartType{ptype}"] + + # Create virtual layout for new datasets + layouts = {} + nr_parts = sum([count[f"PartType{ptype}"] for count in counts]) for dset in dset_attrs[f"PartType{ptype}"]: - layouts[dset][offset : offset + n_part] = h5py.VirtualSource( - filename, - f"PartType{ptype}/{dset}", - shape=shape[f"PartType{ptype}"][dset], - ) - offset += n_part - - # Create the virtual datasets, renaming datasets if they - # already exist in the snapshot - for dset, attrs in dset_attrs[f"PartType{ptype}"].items(): - if f"PartType{ptype}/{dset}" in outfile: - outfile.move(f"PartType{ptype}/{dset}", f"PartType{ptype}/{dset}_snap") - outfile.create_virtual_dataset( - f"PartType{ptype}/{dset}", layouts[dset], fillvalue=-999 - ) - for k, v in attrs.items(): - outfile[f"PartType{ptype}/{dset}"].attrs[k] = v + full_shape = list(shapes[0][f"PartType{ptype}"][dset]) + full_shape[0] = nr_parts + full_shape = tuple(full_shape) + dtype = dset_dtype[f"PartType{ptype}"][dset] + layouts[dset] = h5py.VirtualLayout(shape=full_shape, dtype=dtype) + + # Loop over input files + offset = 0 + for filename, count, shape in zip(filenames, counts, shapes): + n_part = count[f"PartType{ptype}"] + for dset in dset_attrs[f"PartType{ptype}"]: + layouts[dset][offset : offset + n_part] = h5py.VirtualSource( + filename, + f"PartType{ptype}/{dset}", + shape=shape[f"PartType{ptype}"][dset], + ) + offset += n_part - # Copy GroupNr_bound to HaloCatalogueIndex, since that is the name in SOAP - if dset == "GroupNr_bound": + # Create the virtual datasets, renaming datasets if they + # already exist in the snapshot + for dset, attrs in dset_attrs[f"PartType{ptype}"].items(): + if f"PartType{ptype}/{dset}" in outfile: + outfile.move(f"PartType{ptype}/{dset}", f"PartType{ptype}/{dset}_snap") outfile.create_virtual_dataset( - f"PartType{ptype}/HaloCatalogueIndex", - layouts["GroupNr_bound"], - fillvalue=-999, + f"PartType{ptype}/{dset}", layouts[dset], fillvalue=-999 ) - for k, v in outfile[f"PartType{ptype}/GroupNr_bound"].attrs.items(): - outfile[f"PartType{ptype}/HaloCatalogueIndex"].attrs[k] = v + for k, v in attrs.items(): + outfile[f"PartType{ptype}/{dset}"].attrs[k] = v + + # Update paths for this newly created auxiliary dataset + update_vds_paths(outfile[f"PartType{ptype}/{dset}"], auxilary_path_replacers[aux_index]) + + # Copy GroupNr_bound to HaloCatalogueIndex, since + # that is the name in SOAP + if dset == "GroupNr_bound": + outfile.create_virtual_dataset( + f"PartType{ptype}/HaloCatalogueIndex", + layouts["GroupNr_bound"], + fillvalue=-999, + ) + for k, v in outfile[f"PartType{ptype}/GroupNr_bound"].attrs.items(): + outfile[f"PartType{ptype}/HaloCatalogueIndex"].attrs[k] = v + + # Update paths for HaloCatalogueIndex too + update_vds_paths(outfile[f"PartType{ptype}/HaloCatalogueIndex"], auxilary_path_replacers[aux_index]) + + # Update paths for all original snapshot datasets + for ptype in range(7): + ptype_name = f"PartType{ptype}" + if ptype_name in outfile: + for dset_name in list(outfile[ptype_name].keys()): + dset = outfile[f"{ptype_name}/{dset_name}"] + if dset.is_virtual: + # Check if this is an auxiliary dataset (skip those, already handled) + if dset.attrs.get("Auxilary file", [0])[0] != 1: + # This is an original snapshot dataset + update_vds_paths(dset, replace_snapshot_path) # Done outfile.close() @@ -133,7 +250,6 @@ def make_virtual_snapshot(snapshot, auxilary_snapshots, output_file, snap_nr): if __name__ == "__main__": import argparse - from update_vds_paths import update_virtual_snapshot_paths # For description of parameters run the following: $ python make_virtual_snapshot.py --help parser = argparse.ArgumentParser( @@ -149,9 +265,10 @@ def make_virtual_snapshot(snapshot, auxilary_snapshots, output_file, snap_nr): help="Name of the SWIFT virtual snapshot file, e.g. snapshot_{snap_nr:04}.hdf5", ) parser.add_argument( - "membership", + "auxilary_snapshots", type=str, - help="Format string for membership files, e.g. membership_{snap_nr:04}.{file_nr}.hdf5", + nargs="+", + help="One of more format strings for auxilary files, e.g. membership_{snap_nr:04}.{file_nr}.hdf5", ) parser.add_argument( "output_file", @@ -176,20 +293,16 @@ def make_virtual_snapshot(snapshot, auxilary_snapshots, output_file, snap_nr): virtual_snapshot = args.virtual_snapshot.format(snap_nr=args.snap_nr) output_file = args.output_file.format(snap_nr=args.snap_nr) - # Make a new virtual snapshot with group info - make_virtual_snapshot(virtual_snapshot, [args.membership], output_file, args.snap_nr) + # We don't want to replace {file_nr} for auxilary snapshots + auxilary_snapshots = [ + filename.format_map(SafeDict({'snap_nr': args.snap_nr})) + for filename in args.auxilary_snapshots + ] - # Set file paths for datasets - abs_snapshot_dir = os.path.abspath(os.path.dirname(virtual_snapshot)) - abs_membership_dir = os.path.abspath( - os.path.dirname(args.membership.format(snap_nr=args.snap_nr, file_nr=0)) + # Make a new virtual snapshot with group info + make_virtual_snapshot( + virtual_snapshot, + auxilary_snapshots, + output_file, + absolute_paths=args.absolute_paths ) - if args.absolute_paths: - # Ensure all paths in the virtual file are absolute to avoid VDS prefix issues - # (we probably need to pick up datasets from two different directories) - update_virtual_snapshot_paths(output_file, abs_snapshot_dir, abs_membership_dir) - else: - abs_output_dir = os.path.abspath(os.path.dirname(output_file)) - rel_snapshot_dir = os.path.relpath(abs_snapshot_dir, abs_output_dir) - rel_membership_dir = os.path.relpath(abs_membership_dir, abs_output_dir) - update_virtual_snapshot_paths(output_file, rel_snapshot_dir, rel_membership_dir) diff --git a/compression/update_vds_paths.py b/compression/update_vds_paths.py deleted file mode 100644 index da24d57d..00000000 --- a/compression/update_vds_paths.py +++ /dev/null @@ -1,118 +0,0 @@ -#!/bin/env python - -import sys -import h5py -import numpy as np -import os.path - - -def update_vds_paths(dset, modify_function): - """ - Modify the virtual paths of the specified dataset - - Note that querying the source dataspace and selection does not appear - to work (invalid pointer error from h5py) so here we assume that we're - referencing all of the source dataspace, which is correct for SWIFT - snapshots. - - dset: a h5py.Dataset object - modify_function: a function which takes the old path as its argument and - returns the new path - """ - - # Choose a temporary path for the new virtual dataset - path = dset.name - tmp_path = dset.name + ".__tmp__" - - # Build the creation property list for the new dataset - plist = h5py.h5p.create(h5py.h5p.DATASET_CREATE) - for vs in dset.virtual_sources(): - bounds = vs.vspace.get_select_bounds() - if bounds is not None: - lower, upper = bounds - size = np.asarray(upper, dtype=int) - np.asarray(lower, dtype=int) + 1 - src_space = h5py.h5s.create_simple(tuple(size)) - new_name = modify_function(vs.file_name) - plist.set_virtual( - vs.vspace, new_name.encode(), vs.dset_name.encode(), src_space - ) - - # Create the new dataset - tmp_dset = h5py.h5d.create( - dset.file["/"].id, - tmp_path.encode(), - dset.id.get_type(), - dset.id.get_space(), - dcpl=plist, - ) - tmp_dset = h5py.Dataset(tmp_dset) - for attr_name in dset.attrs: - tmp_dset.attrs[attr_name] = dset.attrs[attr_name] - - # Rename the new dataset - f = dset.file - del f[path] - f[path] = f[tmp_path] - del f[tmp_path] - - -def update_virtual_snapshot_paths(filename, snapshot_dir=None, membership_dir=None): - """ - Add full paths to virtual datasets in the specified file - """ - f = h5py.File(filename, "r+") - - # Find all datasets in the file - all_datasets = [] - - def visit_datasets(name, obj): - if isinstance(obj, h5py.Dataset): - all_datasets.append(obj) - - f.visititems(visit_datasets) - - def replace_snapshot_path(old_path): - basename = os.path.basename(old_path) - return os.path.join(snapshot_dir, basename) - - def replace_membership_path(old_path): - basename = os.path.basename(old_path) - return os.path.join(membership_dir, basename) - - # Loop over datasets and update paths if necessary - for dset in all_datasets: - if dset.is_virtual: - name = dset.name.split("/")[-1] - # Check if the dataset comes from a membership file - if dset.attrs.get("Auxilary file", [0])[0] == 1: - if membership_dir is not None: - update_vds_paths(dset, replace_membership_path) - # Catch old datasets which didn't have the "Auxilary file" set - elif name in ( - "GroupNr_all", - "GroupNr_bound", - "Rank_bound", - "HaloCatalogueIndex", - "SpecificPotentialEnergies", - ): - if membership_dir is not None: - update_vds_paths(dset, replace_membership_path) - # Catch old case of FOF IDs from membership files - elif (name == "FOFGroupIDs") and ("PartType1/FOFGroupIDs_old" in f): - if membership_dir is not None: - update_vds_paths(dset, replace_membership_path) - # Data comes from the snapshot files - else: - if snapshot_dir is not None: - update_vds_paths(dset, replace_snapshot_path) - - f.close() - - -if __name__ == "__main__": - - filename = sys.argv[1] # Virtual snapshot file to update - snapshot_dir = sys.argv[2] # Directory with the real snapshot files - membership_dir = sys.argv[3] # Directory with the real membership files - - update_virtual_snapshot_paths(filename, snapshot_dir, membership_dir) From a3bde138476ae3397db52bdbe9c4a35b0d6ed6aa Mon Sep 17 00:00:00 2001 From: robjmcgibbon Date: Wed, 7 Jan 2026 15:55:37 +0000 Subject: [PATCH 3/9] Virtual snapshot creation using multiple auxilary files --- compression/make_virtual_snapshot.py | 63 +++++++++++++------- scripts/COLIBRE/compress_group_membership.sh | 5 +- scripts/EAGLE.sh | 6 +- 3 files changed, 48 insertions(+), 26 deletions(-) diff --git a/compression/make_virtual_snapshot.py b/compression/make_virtual_snapshot.py index 934b92cc..0679ff7a 100644 --- a/compression/make_virtual_snapshot.py +++ b/compression/make_virtual_snapshot.py @@ -62,11 +62,13 @@ def update_vds_paths(dset, modify_function): del f[tmp_path] -def make_virtual_snapshot(snapshot, auxilary_snapshots, output_file, absolute_paths=False): +def make_virtual_snapshot( + snapshot, auxilary_snapshots, output_file, absolute_paths=False +): """ Given a snapshot and auxilary files, create a new virtual snapshot with all datasets combine. - + snapshot: Path to the snapshot file auxilary_snapshots: List of auxiliary file patterns output_file: Path to the output virtual snapshot @@ -86,15 +88,14 @@ def make_virtual_snapshot(snapshot, auxilary_snapshots, output_file, absolute_pa for aux in auxilary_snapshots ] abs_output_dir = os.path.abspath(os.path.dirname(output_file)) - + if absolute_paths: snapshot_dir = abs_snapshot_dir auxilary_dirs = abs_auxilary_dirs else: snapshot_dir = os.path.relpath(abs_snapshot_dir, abs_output_dir) auxilary_dirs = [ - os.path.relpath(aux_dir, abs_output_dir) - for aux_dir in abs_auxilary_dirs + os.path.relpath(aux_dir, abs_output_dir) for aux_dir in abs_auxilary_dirs ] # Create path replacement functions @@ -102,13 +103,14 @@ def make_replace_path(target_dir): def replace_path(old_path): basename = os.path.basename(old_path) return os.path.join(target_dir, basename) + return replace_path replace_snapshot_path = make_replace_path(snapshot_dir) auxilary_path_replacers = [make_replace_path(d) for d in auxilary_dirs] all_auxilary_datasets = {} - + for aux_index, auxilary in enumerate(auxilary_snapshots): # Check which datasets exist in the auxilary files @@ -143,7 +145,9 @@ def replace_path(old_path): dset_path = f"PartType{ptype}/{dset}" if dset_path in all_auxilary_datasets: other_file = all_auxilary_datasets[f"PartType{ptype}/{dset}"] - raise ValueError(f"{dset_path} is in {auxilary} and {other_file}") + raise ValueError( + f"{dset_path} is in {auxilary} and {other_file}" + ) all_auxilary_datasets[dset_path] = auxilary # Loop over input auxilary files to get dataset shapes @@ -207,7 +211,9 @@ def replace_path(old_path): # already exist in the snapshot for dset, attrs in dset_attrs[f"PartType{ptype}"].items(): if f"PartType{ptype}/{dset}" in outfile: - outfile.move(f"PartType{ptype}/{dset}", f"PartType{ptype}/{dset}_snap") + outfile.move( + f"PartType{ptype}/{dset}", f"PartType{ptype}/{dset}_snap" + ) outfile.create_virtual_dataset( f"PartType{ptype}/{dset}", layouts[dset], fillvalue=-999 ) @@ -215,9 +221,12 @@ def replace_path(old_path): outfile[f"PartType{ptype}/{dset}"].attrs[k] = v # Update paths for this newly created auxiliary dataset - update_vds_paths(outfile[f"PartType{ptype}/{dset}"], auxilary_path_replacers[aux_index]) + update_vds_paths( + outfile[f"PartType{ptype}/{dset}"], + auxilary_path_replacers[aux_index], + ) - # Copy GroupNr_bound to HaloCatalogueIndex, since + # Copy GroupNr_bound to HaloCatalogueIndex, since # that is the name in SOAP if dset == "GroupNr_bound": outfile.create_virtual_dataset( @@ -227,9 +236,12 @@ def replace_path(old_path): ) for k, v in outfile[f"PartType{ptype}/GroupNr_bound"].attrs.items(): outfile[f"PartType{ptype}/HaloCatalogueIndex"].attrs[k] = v - + # Update paths for HaloCatalogueIndex too - update_vds_paths(outfile[f"PartType{ptype}/HaloCatalogueIndex"], auxilary_path_replacers[aux_index]) + update_vds_paths( + outfile[f"PartType{ptype}/HaloCatalogueIndex"], + auxilary_path_replacers[aux_index], + ) # Update paths for all original snapshot datasets for ptype in range(7): @@ -260,25 +272,28 @@ def replace_path(old_path): ) ) parser.add_argument( - "virtual_snapshot", + "--virtual-snapshot", type=str, + required=True, help="Name of the SWIFT virtual snapshot file, e.g. snapshot_{snap_nr:04}.hdf5", ) parser.add_argument( - "auxilary_snapshots", + "--auxilary-snapshots", type=str, nargs="+", + required=True, help="One of more format strings for auxilary files, e.g. membership_{snap_nr:04}.{file_nr}.hdf5", ) parser.add_argument( - "output_file", + "--output-file", type=str, + required=True, help="Name of the virtual snapshot to create, e.g. membership_{snap_nr:04}.hdf5", ) parser.add_argument( - "snap_nr", + "--snap-nr", type=int, - nargs="?", + required=False, default=-1, help="Snapshot number (default: -1). Not required if snap_nr is present in filenames passed.", ) @@ -289,20 +304,24 @@ def replace_path(old_path): ) args = parser.parse_args() + print(f"Creating virtual snapshot") + for k, v in vars(args).items(): + print(f" {k}: {v}") + # Substitute snap number virtual_snapshot = args.virtual_snapshot.format(snap_nr=args.snap_nr) output_file = args.output_file.format(snap_nr=args.snap_nr) # We don't want to replace {file_nr} for auxilary snapshots auxilary_snapshots = [ - filename.format_map(SafeDict({'snap_nr': args.snap_nr})) + filename.format_map(SafeDict({"snap_nr": args.snap_nr})) for filename in args.auxilary_snapshots ] # Make a new virtual snapshot with group info make_virtual_snapshot( - virtual_snapshot, - auxilary_snapshots, - output_file, - absolute_paths=args.absolute_paths + virtual_snapshot, + auxilary_snapshots, + output_file, + absolute_paths=args.absolute_paths, ) diff --git a/scripts/COLIBRE/compress_group_membership.sh b/scripts/COLIBRE/compress_group_membership.sh index ee34965f..69587a22 100644 --- a/scripts/COLIBRE/compress_group_membership.sh +++ b/scripts/COLIBRE/compress_group_membership.sh @@ -90,7 +90,10 @@ echo "Creating virtual snapshot" snapshot="${output_dir}/${sim}/snapshots/colibre_${snapnum}/colibre_${snapnum}.hdf5" membership="${output_filename}.{file_nr}.hdf5" virtual="${outbase}/colibre_with_SOAP_membership_${snapnum}.hdf5" -python compression/make_virtual_snapshot.py $snapshot $membership $virtual +python compression/make_virtual_snapshot.py \ + --virtual-snapshot $snapshot \ + --auxilary-snapshots $membership \ + --output-file $virtual echo "Setting virtual file to be read-only" chmod a=r "${virtual}" diff --git a/scripts/EAGLE.sh b/scripts/EAGLE.sh index 8f87ef72..d8ffd65b 100755 --- a/scripts/EAGLE.sh +++ b/scripts/EAGLE.sh @@ -78,9 +78,9 @@ python "${soap_dir}/create_virtual_snapshot.py" "snap_${snap_nr}.0.hdf5" cd - python compression/make_virtual_snapshot.py \ - "${output_dir}/swift_snapshots/swift_${snap_nr}/snap_${snap_nr}.hdf5" \ - "${output_dir}/SOAP_uncompressed/membership_${snap_nr}/membership_${snap_nr}.{file_nr}.hdf5" \ - "${output_dir}/SOAP_uncompressed/snap_${snap_nr}.hdf5" \ + --virtual-snapshot "${output_dir}/swift_snapshots/swift_${snap_nr}/snap_${snap_nr}.hdf5" \ + --auxilary-snapshots "${output_dir}/SOAP_uncompressed/membership_${snap_nr}/membership_${snap_nr}.{file_nr}.hdf5" \ + --output-file "${output_dir}/SOAP_uncompressed/snap_${snap_nr}.hdf5" ######### Run SOAP From 73dc32ec4051679c0204641ed5b2115b96d3a9c1 Mon Sep 17 00:00:00 2001 From: robjmcgibbon Date: Wed, 7 Jan 2026 16:16:10 +0000 Subject: [PATCH 4/9] BirthHaloCatalogueIndex v1 --- misc/compute_BirthHaloCatalogueIndex.py | 247 ++++++++++++++++++++++++ 1 file changed, 247 insertions(+) create mode 100644 misc/compute_BirthHaloCatalogueIndex.py diff --git a/misc/compute_BirthHaloCatalogueIndex.py b/misc/compute_BirthHaloCatalogueIndex.py new file mode 100644 index 00000000..3f458f48 --- /dev/null +++ b/misc/compute_BirthHaloCatalogueIndex.py @@ -0,0 +1,247 @@ +#!/bin/env python + +""" +# TODO: Update +match_group_membership.py + +This script matches halos between different simulations run from the +same initial conditions. + +Usage: + + mpirun -- python -u misc/match_group_membership \ + --snap-basename1 SNAP_BASENAME1 \ + --snap-basename2 SNAP_BASENAME2 \ + --membership-basename1 MEMBERSHIP_BASENAME1 \ + --membership-basename2 MEMBERSHIP_BASENAME2 \ + --catalogue-filename1 CATALOGUE_FILENAME1 \ + --catalogue-filename2 CATALOGUE_FILENAME2 \ + --output-filename OUTPUT_FILENAME + +Run "python misc/match_group_membership.py -h" for a discription +of the optional arguments. + +""" + +import argparse +import datetime +import os + +from mpi4py import MPI + +comm = MPI.COMM_WORLD +comm_rank = comm.Get_rank() +comm_size = comm.Get_size() + +import h5py +import numpy as np + +import virgo.mpi.parallel_sort as psort +import virgo.mpi.parallel_hdf5 as phdf5 +from virgo.mpi.gather_array import gather_array + + +def load_particle_data(snap_basename, membership_basename, load_gas, comm): + """ + Load the particle IDs and halo membership for the particle types + we will use to match. Removes unbound particles. + """ + + particle_data = {} + + # Load particle IDs + snap_filename = snap_basename + ".{file_nr}.hdf5" + file = phdf5.MultiFile( + snap_filename, file_nr_attr=("Header", "NumFilesPerSnapshot"), comm=comm + ) + particle_data["PartType4/ParticleIDs"] = file.read("PartType4/ParticleIDs") + if load_gas: + particle_data["PartType0/ParticleIDs"] = file.read("PartType0/ParticleIDs") + + # Membership files don't have a header, so create a list of filenames + n_file = len(file.filenames) + membership_filenames = [f"{membership_basename}.{i}.hdf5" for i in range(n_file)] + # Load membership information + file = phdf5.MultiFile( + membership_filenames, file_nr_attr=("Header", "NumFilesPerSnapshot"), comm=comm + ) + particle_data["PartType4/GroupNr_bound"] = file.read("PartType4/GroupNr_bound") + if load_gas: + particle_data["PartType0/GroupNr_bound"] = file.read("PartType0/GroupNr_bound") + + # Check the two files are partitioned the same way + assert ( + particle_data["PartType4/GroupNr_bound"].shape + == particle_data["PartType4/ParticleIDs"].shape + ) + if load_gas: + assert ( + particle_data["PartType0/GroupNr_bound"].shape + == particle_data["PartType0/ParticleIds"].shape + ) + + return particle_data + + +unit_attrs = { + "Conversion factor to CGS (not including cosmological corrections)": [1.0], + "Conversion factor to physical CGS (including cosmological corrections)": [1.0], + "U_I exponent": [0.0], + "U_L exponent": [0.0], + "U_M exponent": [0.0], + "U_t exponent": [0.0], + "U_T exponent": [0.0], + "a-scale exponent": [0.0], + "h-scale exponent": [0.0], + "Property can be converted to comoving": [0], + "Value stored as physical": [1], +} + + +def mpi_print(string, comm_rank): + if comm_rank == 0: + print(string) + + +if __name__ == "__main__": + + start_time = datetime.datetime.now() + + parser = argparse.ArgumentParser( + description=("Script to calculate BirthHaloCatalogueIndex of star particles"), + ) + parser.add_argument( + "--snap-basename", + type=str, + required=True, + help=( + "The basename of the snapshot files (the snapshot " + "name without the .{file_nr}.hdf5 suffix. Use " + "{snap_nr:04d} instead of the snapshot number)" + ), + ) + parser.add_argument( + "--membership-basename", + type=str, + required=True, + help="The basename of the membership files", + ) + parser.add_argument( + "--output-basename", + type=str, + required=True, + help="The basename of the output files", + ) + parser.add_argument( + "--final-snap-nr", + type=int, + required=True, + help=( + # TODO: + "Snapshot at which to load the particles" + ), + ) + # TODO: Skip check gas particles + parser.add_argument( + "--calculate-PreBirthHaloCatalogueIndex", + action="store_true", + help=( + "Whether to calculate and output the subhalo halo catalogue " + "index of the gas particle that formed each star" + ), + ) + + args = parser.parse_args() + + # Log the arguments + for k, v in vars(args).items(): + mpi_print(f" {k}: {v}", comm_rank) + + final_snap_basename = args.snap_basename.format(snap_nr=args.final_snap_nr) + final_membership_basename = args.membership_basename.format( + snap_nr=args.final_snap_nr + ) + mpi_print("Loading stars from final snapshot", comm_rank) + particle_data = load_particle_data( + final_snap_basename, + final_membership_basename, + False, + comm, + ) + star_particle_ids = particle_data["PartType4/ParticleIDs"] + star_birth_ids = particle_data["PartType4/GroupNr_bound"] + star_birth_ids[:] = -99 + + # TODO: + # For the loop we need gas particles from the previous snapshot + # if args.calculate_PreBirthHaloCatalogueIndex: + # star_prebirth_ids = np.copy(star_birth_ids) + + for snap_nr in range(0, args.final_snap_nr + 1): + mpi_print(f"Loading data from snapshot {snap_nr}", comm_rank) + snap_basename = args.snap_basename.format(snap_nr=snap_nr) + membership_basename = args.membership_basename.format(snap_nr=snap_nr) + particle_data = load_particle_data( + snap_basename, + membership_basename, + args.calculate_PreBirthHaloCatalogueIndex, + comm, + ) + + # It would be quicker to make use of the BirthScaleFactors + # instead of checking all stars + idx = psort.parallel_match( + star_particle_ids[star_birth_ids == -99], + particle_data["PartType4/ParticleIDs"], + comm=comm, + ) + + new_birth_ids = psort.fetch_elements( + particle_data["PartType4/GroupNr_bound"], + idx[idx != -1], + comm=comm, + ) + + # TODO: + # if args.calculate_PreBirthHaloCatalogueIndex: + + has_new_birth_id = star_birth_ids == -99 + has_new_birth_id[has_new_birth_id] = idx != -1 + star_birth_ids[has_new_birth_id] = new_birth_ids + + # Check we found a value for every star + assert np.sum(star_birth_ids == -99) == 0 + + mpi_print("Writing output", comm_rank) + snap_file = phdf5.MultiFile( + final_snap_basename + ".{file_nr}.hdf5", + file_nr_attr=("Header", "NumFilesPerSnapshot"), + comm=comm, + ) + elements_per_file = snap_file.get_elements_per_file( + "ParticleIDs", group="PartType4" + ) + output = {"BirthHaloCatalogueIndex": star_birth_ids} + attrs = {"BirthHaloCatalogueIndex": {"Description": "TODO"}} + attrs["BirthHaloCatalogueIndex"].update(unit_attrs) + output_filename = ( + args.output_basename.format(snap_nr=args.final_snap_nr) + ".{file_nr}.hdf5" + ) + if comm_rank == 0: + output_dir = os.path.dirname(output_filename) + os.makedirs(output_dir, exist_ok=True) + comm.barrier() + + snap_file.write( + output, + elements_per_file, + filenames=output_filename, + mode="w", + group="PartType4", + attrs=attrs, + ) + + comm.barrier() + mpi_print(f"Runtime: {datetime.datetime.now() - start_time}", comm_rank) + mpi_print("Done!", comm_rank) + From 03679bbe3984641a3601da47afdac30f05087eaa Mon Sep 17 00:00:00 2001 From: robjmcgibbon Date: Wed, 14 Jan 2026 09:55:12 +0000 Subject: [PATCH 5/9] Identify gas progenitor --- misc/compute_BirthHaloCatalogueIndex.py | 106 ++++++++++++++++-------- 1 file changed, 70 insertions(+), 36 deletions(-) diff --git a/misc/compute_BirthHaloCatalogueIndex.py b/misc/compute_BirthHaloCatalogueIndex.py index 3f458f48..5fbed1d0 100644 --- a/misc/compute_BirthHaloCatalogueIndex.py +++ b/misc/compute_BirthHaloCatalogueIndex.py @@ -1,25 +1,21 @@ #!/bin/env python """ -# TODO: Update -match_group_membership.py +compute_BirthHaloCatalogueIndex.py -This script matches halos between different simulations run from the -same initial conditions. +This script produces an auxiliary snapshot which contains the subhalo +id each star was part of when it first formed. Usage: - mpirun -- python -u misc/match_group_membership \ - --snap-basename1 SNAP_BASENAME1 \ - --snap-basename2 SNAP_BASENAME2 \ - --membership-basename1 MEMBERSHIP_BASENAME1 \ - --membership-basename2 MEMBERSHIP_BASENAME2 \ - --catalogue-filename1 CATALOGUE_FILENAME1 \ - --catalogue-filename2 CATALOGUE_FILENAME2 \ - --output-filename OUTPUT_FILENAME + mpirun -- python -u misc/compute_BirthHaloCatalogueIndex.py \ + --snap-basename SNAP_BASENAME \ + --membership-basename MEMBERSHIP_BASENAME \ + --output-basename OUTPUT_FILENAME \ + --final-snap-nr FINAL_SNAP_NR -Run "python misc/match_group_membership.py -h" for a discription -of the optional arguments. +Run "python misc/compute_BirthHaloCatalogueIndex.py -h" for a full description +of the arguments, and a list of optional arguments. """ @@ -77,12 +73,12 @@ def load_particle_data(snap_basename, membership_basename, load_gas, comm): if load_gas: assert ( particle_data["PartType0/GroupNr_bound"].shape - == particle_data["PartType0/ParticleIds"].shape + == particle_data["PartType0/ParticleIDs"].shape ) return particle_data - +# Units for the dimensionless fields we will be saving unit_attrs = { "Conversion factor to CGS (not including cosmological corrections)": [1.0], "Conversion factor to physical CGS (including cosmological corrections)": [1.0], @@ -137,11 +133,9 @@ def mpi_print(string, comm_rank): type=int, required=True, help=( - # TODO: "Snapshot at which to load the particles" ), ) - # TODO: Skip check gas particles parser.add_argument( "--calculate-PreBirthHaloCatalogueIndex", action="store_true", @@ -171,14 +165,20 @@ def mpi_print(string, comm_rank): star_particle_ids = particle_data["PartType4/ParticleIDs"] star_birth_ids = particle_data["PartType4/GroupNr_bound"] star_birth_ids[:] = -99 + star_first_snapshot = np.copy(star_birth_ids) - # TODO: - # For the loop we need gas particles from the previous snapshot - # if args.calculate_PreBirthHaloCatalogueIndex: - # star_prebirth_ids = np.copy(star_birth_ids) + if args.calculate_PreBirthHaloCatalogueIndex: + particle_data['PartType0/ParticleIDs'] = np.ones(0) + particle_data['PartType0/GroupNr_bound'] = np.ones(0) + star_prebirth_ids = np.copy(star_birth_ids) for snap_nr in range(0, args.final_snap_nr + 1): + mpi_print(f"Loading data from snapshot {snap_nr}", comm_rank) + if args.calculate_PreBirthHaloCatalogueIndex: + # We need to keep the gas IDs from snapshot N-1 + gas_particle_ids = particle_data['PartType0/ParticleIDs'] + gas_group_nr = particle_data['PartType0/GroupNr_bound'] snap_basename = args.snap_basename.format(snap_nr=snap_nr) membership_basename = args.membership_basename.format(snap_nr=snap_nr) particle_data = load_particle_data( @@ -188,6 +188,7 @@ def mpi_print(string, comm_rank): comm, ) + mpi_print(f"Matching stars", comm_rank) # It would be quicker to make use of the BirthScaleFactors # instead of checking all stars idx = psort.parallel_match( @@ -202,28 +203,50 @@ def mpi_print(string, comm_rank): comm=comm, ) - # TODO: - # if args.calculate_PreBirthHaloCatalogueIndex: - has_new_birth_id = star_birth_ids == -99 has_new_birth_id[has_new_birth_id] = idx != -1 star_birth_ids[has_new_birth_id] = new_birth_ids + star_first_snapshot[has_new_birth_id] = snap_nr + + if args.calculate_PreBirthHaloCatalogueIndex: + mpi_print(f"Matching gas", comm_rank) + # Identify the gas progenitor of the newly formed stars + gas_idx = psort.parallel_match( + star_particle_ids[has_new_birth_id], + gas_particle_ids, + comm=comm, + ) + # The gas progenitor may not exist for all stars due + # to particle splitting. Note this information is + # recoverable if required by using the SplitTrees. + new_prebirth_ids = -99 * np.ones_like(new_birth_ids) + new_prebirth_ids[gas_idx != -1] = psort.fetch_elements( + gas_group_nr, + gas_idx[gas_idx != -1], + comm=comm, + ) + star_prebirth_ids[has_new_birth_id] = new_prebirth_ids # Check we found a value for every star assert np.sum(star_birth_ids == -99) == 0 - mpi_print("Writing output", comm_rank) - snap_file = phdf5.MultiFile( - final_snap_basename + ".{file_nr}.hdf5", - file_nr_attr=("Header", "NumFilesPerSnapshot"), - comm=comm, - ) - elements_per_file = snap_file.get_elements_per_file( - "ParticleIDs", group="PartType4" - ) - output = {"BirthHaloCatalogueIndex": star_birth_ids} - attrs = {"BirthHaloCatalogueIndex": {"Description": "TODO"}} + # Set up what we want to output + output = { + "BirthHaloCatalogueIndex": star_birth_ids, + "FirstSnapshot": star_first_snapshot, + } + attrs = { + "BirthHaloCatalogueIndex": {"Description": "The HaloCatalogueIndex of this particle at the first snapshot it appeared."}, + "FirstSnapshot": {"Description": "Index of the first simulation snapshot in which the star particle is present."}, + } attrs["BirthHaloCatalogueIndex"].update(unit_attrs) + attrs["FirstSnapshot"].update(unit_attrs) + if args.calculate_PreBirthHaloCatalogueIndex: + output["PreBirthHaloCatalogueIndex"] = star_prebirth_ids + attrs["PreBirthHaloCatalogueIndex"] = {"Description": "The HaloCatalogueIndex of gas prognitor at the snapshot before the star formed. -99 if no gas progenitor is found."} + attrs["PreBirthHaloCatalogueIndex"].update(unit_attrs) + + # Check the output directory exists output_filename = ( args.output_basename.format(snap_nr=args.final_snap_nr) + ".{file_nr}.hdf5" ) @@ -232,6 +255,16 @@ def mpi_print(string, comm_rank): os.makedirs(output_dir, exist_ok=True) comm.barrier() + # Write the output + mpi_print("Writing output", comm_rank) + snap_file = phdf5.MultiFile( + final_snap_basename + ".{file_nr}.hdf5", + file_nr_attr=("Header", "NumFilesPerSnapshot"), + comm=comm, + ) + elements_per_file = snap_file.get_elements_per_file( + "ParticleIDs", group="PartType4" + ) snap_file.write( output, elements_per_file, @@ -241,6 +274,7 @@ def mpi_print(string, comm_rank): attrs=attrs, ) + # Finished comm.barrier() mpi_print(f"Runtime: {datetime.datetime.now() - start_time}", comm_rank) mpi_print("Done!", comm_rank) From 43e709393b292e0d14a5c7f2dfa5384d8614c226 Mon Sep 17 00:00:00 2001 From: robjmcgibbon Date: Wed, 14 Jan 2026 09:59:23 +0000 Subject: [PATCH 6/9] Add ExSitu fraction property to SOAP --- .../particle_selection/aperture_properties.py | 19 +++ SOAP/property_table.py | 18 ++- parameter_files/COLIBRE_HYBRID.yml | 1 + parameter_files/COLIBRE_THERMAL.yml | 1 + parameter_files/ExSitu.yml | 111 ++++++++++++++++++ 5 files changed, 149 insertions(+), 1 deletion(-) create mode 100644 parameter_files/ExSitu.yml diff --git a/SOAP/particle_selection/aperture_properties.py b/SOAP/particle_selection/aperture_properties.py index 5d0cbe4b..3a424a42 100644 --- a/SOAP/particle_selection/aperture_properties.py +++ b/SOAP/particle_selection/aperture_properties.py @@ -760,6 +760,24 @@ def TotalSNIaRate(self) -> unyt.unyt_quantity: self.star_mask_ap ].sum() + @lazy_property + def ExSituFraction(self) -> unyt.unyt_quantity: + """ + Mass fraction of bound stars that formed in a different subhalo. + """ + if self.Nstar == 0: + return None + + group_nr = self.get_dataset("PartType4/GroupNr_bound")[self.star_mask_all][ + self.star_mask_ap + ] + birth_group_nr = self.get_dataset("PartType4/BirthHaloCatalogueIndex")[self.star_mask_all][ + self.star_mask_ap + ] + ex_situ = group_nr != birth_group_nr + + return self.star_mass_fraction[ex_situ].sum() + @lazy_property def bh_mask_all(self) -> NDArray[bool]: """ @@ -3788,6 +3806,7 @@ class ApertureProperties(HaloProperty): "stellar_age_mw": False, "stellar_age_lw": False, "TotalSNIaRate": False, + "ExSituFraction": False, "HydrogenMass": False, "HeliumMass": False, "MolecularHydrogenMass": False, diff --git a/SOAP/property_table.py b/SOAP/property_table.py index 583f291f..1867517a 100644 --- a/SOAP/property_table.py +++ b/SOAP/property_table.py @@ -1905,6 +1905,22 @@ class PropertyTable: output_physical=True, a_scale_exponent=0, ), + "ExSituFraction": Property( + name="ExSituFraction", + shape=1, + dtype=np.float32, + unit="dimensionless", + description="Mass fraction of bound stars that formed in a different subhalo", + lossy_compression_filter="FMantissa9", + dmo_property=True, + particle_properties=[ + "PartType4/Masses", + "PartType4/BirthHaloCatalogueIndex", + "PartType4/GroupNr_bound", + ], + output_physical=True, + a_scale_exponent=0, + ), "Mgas": Property( name="GasMass", shape=1, @@ -5134,7 +5150,7 @@ def generate_tex_files(self, output_dir: str): # standalone table file footer tailstr = "\\end{document}" - # generate the auxilary documentation files + # generate the auxiliary documentation files with open(f"{output_dir}/timestamp.tex", "w") as ofile: ofile.write(get_version_string()) with open(f"{output_dir}/table.tex", "w") as ofile: diff --git a/parameter_files/COLIBRE_HYBRID.yml b/parameter_files/COLIBRE_HYBRID.yml index 0f9b3f6e..343be68d 100644 --- a/parameter_files/COLIBRE_HYBRID.yml +++ b/parameter_files/COLIBRE_HYBRID.yml @@ -244,6 +244,7 @@ ApertureProperties: StellarCylindricalVelocityDispersionLuminosityWeighted: false StellarCylindricalVelocityDispersionVerticalLuminosityWeighted: false StellarCylindricalVelocityDispersionDiscPlaneLuminosityWeighted: false + ExSituFraction: false TotalMass: true TotalSNIaRate: true GasMassInColdDenseDiffuseMetals: diff --git a/parameter_files/COLIBRE_THERMAL.yml b/parameter_files/COLIBRE_THERMAL.yml index 403267a5..9d93c947 100644 --- a/parameter_files/COLIBRE_THERMAL.yml +++ b/parameter_files/COLIBRE_THERMAL.yml @@ -244,6 +244,7 @@ ApertureProperties: StellarCylindricalVelocityDispersionDiscPlaneLuminosityWeighted: false StellarRotationalVelocity: false StellarRotationalVelocityLuminosityWeighted: false + ExSituFraction: false TotalMass: true TotalSNIaRate: true GasMassInColdDenseDiffuseMetals: diff --git a/parameter_files/ExSitu.yml b/parameter_files/ExSitu.yml new file mode 100644 index 00000000..47adf469 --- /dev/null +++ b/parameter_files/ExSitu.yml @@ -0,0 +1,111 @@ +# Values in this section are substituted into the other sections +Parameters: + sim_dir: /cosma8/data/dp004/jlvc76/COLIBRE/ScienceRuns + output_dir: /snap8/scratch/dp004/dc-mcgi1/soap_BirthTrackId + scratch_dir: /snap8/scratch/dp004/dc-mcgi1/soap_BirthTrackId + +# Location of the Swift snapshots: +Snapshots: + filename: "{sim_dir}/{sim_name}/snapshots/colibre_{snap_nr:04d}/colibre_{snap_nr:04d}.{file_nr}.hdf5" + +# Which halo finder we're using, and base name for halo finder output files +HaloFinder: + type: HBTplus + filename: "{sim_dir}/{sim_name}/HBT-HERONS/sorted_catalogues/OrderedSubSnap_{snap_nr:03d}.hdf5" + # fof_filename: "{sim_dir}/{sim_name}/fof/fof_output_{snap_nr:04d}.hdf5" + # fof_radius_filename: "{sim_dir}/{sim_name}/fof/fof_output_{snap_nr:04d}.hdf5" + read_potential_energies: true + #type: VR + #filename: "{sim_dir}/halo_{snap_nr:04d}" + #type: Subfind + #filename: "{sim_dir}/snapdir_{snap_nr:03d}/snapshot_{snap_nr:03d}" + +GroupMembership: + # Where to write the group membership files + filename: "{sim_dir}/{sim_name}/SOAP-HBT/membership_{snap_nr:04d}/membership_{snap_nr:04d}.{file_nr}.hdf5" + +ExtraInput: + situ: "{sim_dir}/{sim_name}/SOAP_BirthTrackId/test_{snap_nr:04d}/test_{snap_nr:04d}.{file_nr}.hdf5" + +HaloProperties: + # Where to write the halo properties file + filename: "{output_dir}/{sim_name}/SOAP_uncompressed/halo_properties_{snap_nr:04d}.hdf5" + # Where to write temporary chunk output + chunk_dir: "{scratch_dir}/{sim_name}/SOAP-tmp/" + +ApertureProperties: + properties: + StellarMass: true + ExSituFraction: true + variations: + exclusive_50_kpc: + inclusive: false + radius_in_kpc: 50.0 +ProjectedApertureProperties: + properties: + {} + variations: + {} +SOProperties: + properties: + {} + variations: + {} +SubhaloProperties: + properties: + EncloseRadius: true + TotalMass: true + NumberOfBlackHoleParticles: true + NumberOfDarkMatterParticles: true + NumberOfGasParticles: true + NumberOfStarParticles: true +aliases: + PartType0/LastSNIIKineticFeedbackDensities: PartType0/DensitiesAtLastSupernovaEvent + PartType0/LastSNIIThermalFeedbackDensities: PartType0/DensitiesAtLastSupernovaEvent + snipshot: + PartType0/SpeciesFractions: PartType0/ReducedSpeciesFractions + PartType0/ElementMassFractions: PartType0/ReducedElementMassFractions + PartType0/LastSNIIKineticFeedbackDensities: PartType0/DensitiesAtLastSupernovaEvent + PartType0/LastSNIIThermalFeedbackDensities: PartType0/DensitiesAtLastSupernovaEvent +filters: + general: + limit: 100 + properties: + - BoundSubhalo/NumberOfGasParticles + - BoundSubhalo/NumberOfDarkMatterParticles + - BoundSubhalo/NumberOfStarParticles + - BoundSubhalo/NumberOfBlackHoleParticles + combine_properties: sum + baryon: + limit: 0 + properties: + - BoundSubhalo/NumberOfGasParticles + - BoundSubhalo/NumberOfStarParticles + combine_properties: sum + dm: + limit: 0 + properties: + - BoundSubhalo/NumberOfDarkMatterParticles + gas: + limit: 0 + properties: + - BoundSubhalo/NumberOfGasParticles + star: + limit: 0 + properties: + - BoundSubhalo/NumberOfStarParticles +defined_constants: + O_H_sun: 4.9e-4 + Fe_H_sun: 3.16e-5 + N_O_sun: 0.138 + C_O_sun: 0.549 + Mg_H_sun: 3.98e-5 +calculations: + calculate_missing_properties: false + strict_halo_copy: false + recently_heated_gas_filter: + delta_time_myr: 15 + use_AGN_delta_T: false + cold_dense_gas_filter: + maximum_temperature_K: 3.16e4 + minimum_hydrogen_number_density_cm3: 0.1 From 257b0be53c971900a0d6000c942f4bede2bcf7cd Mon Sep 17 00:00:00 2001 From: robjmcgibbon Date: Wed, 14 Jan 2026 10:22:22 +0000 Subject: [PATCH 7/9] Add sbatch script --- scripts/COLIBRE/compute_birth_index.sh | 31 ++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100755 scripts/COLIBRE/compute_birth_index.sh diff --git a/scripts/COLIBRE/compute_birth_index.sh b/scripts/COLIBRE/compute_birth_index.sh new file mode 100755 index 00000000..bf777fef --- /dev/null +++ b/scripts/COLIBRE/compute_birth_index.sh @@ -0,0 +1,31 @@ +#!/bin/bash -l + +#SBATCH --cpus-per-task=1 +#SBATCH -o ./logs/birth_track_id_%j.out +#SBATCH -p cosma8 +#SBATCH -A dp004 +#SBATCH -J BirthHaloCatalogueIndex +#SBATCH --nodes=1 +#SBATCH -t 12:00:00 +# N0752: 1 node, 2 hours +# N1504: 1 node, 12 hours +# N3008: 4 nodes, 24 hours + +set -e + +base_dir="/cosma8/data/dp004/jlvc76/COLIBRE/ScienceRuns" +sim="L0400N3008/Thermal" +output_dir="/cosma8/data/dp004/dc-mcgi1/COLIBRE/BirthHaloCatalogueIndex" +snapnum="0127" + +snap_basename="${base_dir}/${sim}/snapshots/colibre_{snap_nr:04d}/colibre_{snap_nr:04d}" +membership_basename="${base_dir}/${sim}/SOAP-HBT/membership_{snap_nr:04d}/membership_{snap_nr:04d}" +output_basename="${output_dir}/${sim}/SOAP_BirthTrackId/birth_${snapnum}/birth_${snapnum}" + +mpirun -- python misc/compute_BirthHaloCatalogueIndex.py \ + --snap-basename ${snap_basename} \ + --membership-basename ${membership_basename} \ + --output-basename ${output_basename} \ + --final-snap-nr ${snapnum} \ + --calculate-PreBirthHaloCatalogueIndex + From 2fa51f47c36a7e07628ada2d428c92c4522c5c2d Mon Sep 17 00:00:00 2001 From: robjmcgibbon Date: Wed, 14 Jan 2026 13:15:44 +0000 Subject: [PATCH 8/9] Remove extra parameter file --- parameter_files/ExSitu.yml | 111 ------------------------------------- 1 file changed, 111 deletions(-) delete mode 100644 parameter_files/ExSitu.yml diff --git a/parameter_files/ExSitu.yml b/parameter_files/ExSitu.yml deleted file mode 100644 index 47adf469..00000000 --- a/parameter_files/ExSitu.yml +++ /dev/null @@ -1,111 +0,0 @@ -# Values in this section are substituted into the other sections -Parameters: - sim_dir: /cosma8/data/dp004/jlvc76/COLIBRE/ScienceRuns - output_dir: /snap8/scratch/dp004/dc-mcgi1/soap_BirthTrackId - scratch_dir: /snap8/scratch/dp004/dc-mcgi1/soap_BirthTrackId - -# Location of the Swift snapshots: -Snapshots: - filename: "{sim_dir}/{sim_name}/snapshots/colibre_{snap_nr:04d}/colibre_{snap_nr:04d}.{file_nr}.hdf5" - -# Which halo finder we're using, and base name for halo finder output files -HaloFinder: - type: HBTplus - filename: "{sim_dir}/{sim_name}/HBT-HERONS/sorted_catalogues/OrderedSubSnap_{snap_nr:03d}.hdf5" - # fof_filename: "{sim_dir}/{sim_name}/fof/fof_output_{snap_nr:04d}.hdf5" - # fof_radius_filename: "{sim_dir}/{sim_name}/fof/fof_output_{snap_nr:04d}.hdf5" - read_potential_energies: true - #type: VR - #filename: "{sim_dir}/halo_{snap_nr:04d}" - #type: Subfind - #filename: "{sim_dir}/snapdir_{snap_nr:03d}/snapshot_{snap_nr:03d}" - -GroupMembership: - # Where to write the group membership files - filename: "{sim_dir}/{sim_name}/SOAP-HBT/membership_{snap_nr:04d}/membership_{snap_nr:04d}.{file_nr}.hdf5" - -ExtraInput: - situ: "{sim_dir}/{sim_name}/SOAP_BirthTrackId/test_{snap_nr:04d}/test_{snap_nr:04d}.{file_nr}.hdf5" - -HaloProperties: - # Where to write the halo properties file - filename: "{output_dir}/{sim_name}/SOAP_uncompressed/halo_properties_{snap_nr:04d}.hdf5" - # Where to write temporary chunk output - chunk_dir: "{scratch_dir}/{sim_name}/SOAP-tmp/" - -ApertureProperties: - properties: - StellarMass: true - ExSituFraction: true - variations: - exclusive_50_kpc: - inclusive: false - radius_in_kpc: 50.0 -ProjectedApertureProperties: - properties: - {} - variations: - {} -SOProperties: - properties: - {} - variations: - {} -SubhaloProperties: - properties: - EncloseRadius: true - TotalMass: true - NumberOfBlackHoleParticles: true - NumberOfDarkMatterParticles: true - NumberOfGasParticles: true - NumberOfStarParticles: true -aliases: - PartType0/LastSNIIKineticFeedbackDensities: PartType0/DensitiesAtLastSupernovaEvent - PartType0/LastSNIIThermalFeedbackDensities: PartType0/DensitiesAtLastSupernovaEvent - snipshot: - PartType0/SpeciesFractions: PartType0/ReducedSpeciesFractions - PartType0/ElementMassFractions: PartType0/ReducedElementMassFractions - PartType0/LastSNIIKineticFeedbackDensities: PartType0/DensitiesAtLastSupernovaEvent - PartType0/LastSNIIThermalFeedbackDensities: PartType0/DensitiesAtLastSupernovaEvent -filters: - general: - limit: 100 - properties: - - BoundSubhalo/NumberOfGasParticles - - BoundSubhalo/NumberOfDarkMatterParticles - - BoundSubhalo/NumberOfStarParticles - - BoundSubhalo/NumberOfBlackHoleParticles - combine_properties: sum - baryon: - limit: 0 - properties: - - BoundSubhalo/NumberOfGasParticles - - BoundSubhalo/NumberOfStarParticles - combine_properties: sum - dm: - limit: 0 - properties: - - BoundSubhalo/NumberOfDarkMatterParticles - gas: - limit: 0 - properties: - - BoundSubhalo/NumberOfGasParticles - star: - limit: 0 - properties: - - BoundSubhalo/NumberOfStarParticles -defined_constants: - O_H_sun: 4.9e-4 - Fe_H_sun: 3.16e-5 - N_O_sun: 0.138 - C_O_sun: 0.549 - Mg_H_sun: 3.98e-5 -calculations: - calculate_missing_properties: false - strict_halo_copy: false - recently_heated_gas_filter: - delta_time_myr: 15 - use_AGN_delta_T: false - cold_dense_gas_filter: - maximum_temperature_K: 3.16e4 - minimum_hydrogen_number_density_cm3: 0.1 From 6a1d3984f771f3118d2385f309aa7f8dfa0586f1 Mon Sep 17 00:00:00 2001 From: robjmcgibbon Date: Wed, 14 Jan 2026 13:30:25 +0000 Subject: [PATCH 9/9] Format --- .../particle_selection/aperture_properties.py | 6 ++--- misc/compute_BirthHaloCatalogueIndex.py | 26 +++++++++++-------- 2 files changed, 18 insertions(+), 14 deletions(-) diff --git a/SOAP/particle_selection/aperture_properties.py b/SOAP/particle_selection/aperture_properties.py index 3a424a42..a14c9be1 100644 --- a/SOAP/particle_selection/aperture_properties.py +++ b/SOAP/particle_selection/aperture_properties.py @@ -771,9 +771,9 @@ def ExSituFraction(self) -> unyt.unyt_quantity: group_nr = self.get_dataset("PartType4/GroupNr_bound")[self.star_mask_all][ self.star_mask_ap ] - birth_group_nr = self.get_dataset("PartType4/BirthHaloCatalogueIndex")[self.star_mask_all][ - self.star_mask_ap - ] + birth_group_nr = self.get_dataset("PartType4/BirthHaloCatalogueIndex")[ + self.star_mask_all + ][self.star_mask_ap] ex_situ = group_nr != birth_group_nr return self.star_mass_fraction[ex_situ].sum() diff --git a/misc/compute_BirthHaloCatalogueIndex.py b/misc/compute_BirthHaloCatalogueIndex.py index 5fbed1d0..8926db16 100644 --- a/misc/compute_BirthHaloCatalogueIndex.py +++ b/misc/compute_BirthHaloCatalogueIndex.py @@ -78,6 +78,7 @@ def load_particle_data(snap_basename, membership_basename, load_gas, comm): return particle_data + # Units for the dimensionless fields we will be saving unit_attrs = { "Conversion factor to CGS (not including cosmological corrections)": [1.0], @@ -132,9 +133,7 @@ def mpi_print(string, comm_rank): "--final-snap-nr", type=int, required=True, - help=( - "Snapshot at which to load the particles" - ), + help=("Snapshot at which to load the particles"), ) parser.add_argument( "--calculate-PreBirthHaloCatalogueIndex", @@ -168,8 +167,8 @@ def mpi_print(string, comm_rank): star_first_snapshot = np.copy(star_birth_ids) if args.calculate_PreBirthHaloCatalogueIndex: - particle_data['PartType0/ParticleIDs'] = np.ones(0) - particle_data['PartType0/GroupNr_bound'] = np.ones(0) + particle_data["PartType0/ParticleIDs"] = np.ones(0) + particle_data["PartType0/GroupNr_bound"] = np.ones(0) star_prebirth_ids = np.copy(star_birth_ids) for snap_nr in range(0, args.final_snap_nr + 1): @@ -177,8 +176,8 @@ def mpi_print(string, comm_rank): mpi_print(f"Loading data from snapshot {snap_nr}", comm_rank) if args.calculate_PreBirthHaloCatalogueIndex: # We need to keep the gas IDs from snapshot N-1 - gas_particle_ids = particle_data['PartType0/ParticleIDs'] - gas_group_nr = particle_data['PartType0/GroupNr_bound'] + gas_particle_ids = particle_data["PartType0/ParticleIDs"] + gas_group_nr = particle_data["PartType0/GroupNr_bound"] snap_basename = args.snap_basename.format(snap_nr=snap_nr) membership_basename = args.membership_basename.format(snap_nr=snap_nr) particle_data = load_particle_data( @@ -236,14 +235,20 @@ def mpi_print(string, comm_rank): "FirstSnapshot": star_first_snapshot, } attrs = { - "BirthHaloCatalogueIndex": {"Description": "The HaloCatalogueIndex of this particle at the first snapshot it appeared."}, - "FirstSnapshot": {"Description": "Index of the first simulation snapshot in which the star particle is present."}, + "BirthHaloCatalogueIndex": { + "Description": "The HaloCatalogueIndex of this particle at the first snapshot it appeared." + }, + "FirstSnapshot": { + "Description": "Index of the first simulation snapshot in which the star particle is present." + }, } attrs["BirthHaloCatalogueIndex"].update(unit_attrs) attrs["FirstSnapshot"].update(unit_attrs) if args.calculate_PreBirthHaloCatalogueIndex: output["PreBirthHaloCatalogueIndex"] = star_prebirth_ids - attrs["PreBirthHaloCatalogueIndex"] = {"Description": "The HaloCatalogueIndex of gas prognitor at the snapshot before the star formed. -99 if no gas progenitor is found."} + attrs["PreBirthHaloCatalogueIndex"] = { + "Description": "The HaloCatalogueIndex of gas prognitor at the snapshot before the star formed. -99 if no gas progenitor is found." + } attrs["PreBirthHaloCatalogueIndex"].update(unit_attrs) # Check the output directory exists @@ -278,4 +283,3 @@ def mpi_print(string, comm_rank): comm.barrier() mpi_print(f"Runtime: {datetime.datetime.now() - start_time}", comm_rank) mpi_print("Done!", comm_rank) -