From 572f4bf9a39ff0648f5a19025715d5c73e52a8d4 Mon Sep 17 00:00:00 2001 From: rettigl Date: Tue, 27 Jan 2026 21:56:53 +0100 Subject: [PATCH 1/9] adaption to accepted nexus definitions --- src/sed/calibrator/momentum.py | 13 +++++++------ src/sed/config/NXmpes_config.json | 2 +- tests/calibrator/test_momentum.py | 24 ++++++++++++------------ 3 files changed, 20 insertions(+), 19 deletions(-) diff --git a/src/sed/calibrator/momentum.py b/src/sed/calibrator/momentum.py index 16088945..dd87bb6e 100644 --- a/src/sed/calibrator/momentum.py +++ b/src/sed/calibrator/momentum.py @@ -1832,11 +1832,11 @@ def gather_correction_metadata(self) -> dict: metadata["registration"]["creation_date"] = datetime.now() metadata["registration"]["applied"] = True metadata["registration"]["depends_on"] = ( - "/entry/process/registration/transformations/rot_z" + "/entry/registration/transformations/rot_z" if "angle" in metadata["registration"] and metadata["registration"]["angle"] - else "/entry/process/registration/transformations/trans_y" + else "/entry/registration/transformations/trans_y" if "xtrans" in metadata["registration"] and metadata["registration"]["xtrans"] - else "/entry/process/registration/transformations/trans_x" + else "/entry/registration/transformations/trans_x" if "ytrans" in metadata["registration"] and metadata["registration"]["ytrans"] else "." ) @@ -1860,7 +1860,7 @@ def gather_correction_metadata(self) -> dict: [0.0, 1.0, 0.0], ) metadata["registration"]["trans_y"]["depends_on"] = ( - "/entry/process/registration/transformations/trans_x" + "/entry/registration/transformations/trans_x" if "ytrans" in metadata["registration"] and metadata["registration"]["ytrans"] else "." ) @@ -1875,10 +1875,11 @@ def gather_correction_metadata(self) -> dict: metadata["registration"]["rot_z"]["offset"] = np.concatenate( (metadata["registration"]["center"], [0.0]), ) + metadata["registration"]["rot_z"]["offset_units"] = "pixel" metadata["registration"]["rot_z"]["depends_on"] = ( - "/entry/process/registration/transformations/trans_y" + "/entry/registration/transformations/trans_y" if "xtrans" in metadata["registration"] and metadata["registration"]["xtrans"] - else "/entry/process/registration/transformations/trans_x" + else "/entry/registration/transformations/trans_x" if "ytrans" in metadata["registration"] and metadata["registration"]["ytrans"] else "." ) diff --git a/src/sed/config/NXmpes_config.json b/src/sed/config/NXmpes_config.json index 17e7b1cb..5211d674 100644 --- a/src/sed/config/NXmpes_config.json +++ b/src/sed/config/NXmpes_config.json @@ -296,7 +296,7 @@ }, "/ENTRY/REGISTRATION[registration]": { "applied": "!@attrs:metadata/momentum_correction/registration/applied", - "depends_on": "/entry/process/registration/transformations/rot_z", + "depends_on": "/entry/registration/transformations/rot_z", "TRANSFORMATIONS[transformations]": { "AXISNAME[trans_x]": "@attrs:metadata/momentum_correction/registration/trans_x/value", "AXISNAME[trans_x]/@transformation_type": "translation", diff --git a/tests/calibrator/test_momentum.py b/tests/calibrator/test_momentum.py index dcddaa85..cfde93da 100644 --- a/tests/calibrator/test_momentum.py +++ b/tests/calibrator/test_momentum.py @@ -239,44 +239,44 @@ def test_apply_correction() -> None: ] depends_on_list = [ { - "root": "/entry/process/registration/transformations/trans_x", + "root": "/entry/registration/transformations/trans_x", "axes": {"trans_x": "."}, }, { - "root": "/entry/process/registration/transformations/trans_y", + "root": "/entry/registration/transformations/trans_y", "axes": {"trans_y": "."}, }, { - "root": "/entry/process/registration/transformations/rot_z", + "root": "/entry/registration/transformations/rot_z", "axes": {"rot_z": "."}, }, { - "root": "/entry/process/registration/transformations/trans_y", + "root": "/entry/registration/transformations/trans_y", "axes": { "trans_x": ".", - "trans_y": "/entry/process/registration/transformations/trans_x", + "trans_y": "/entry/registration/transformations/trans_x", }, }, { - "root": "/entry/process/registration/transformations/rot_z", + "root": "/entry/registration/transformations/rot_z", "axes": { "trans_x": ".", - "rot_z": "/entry/process/registration/transformations/trans_x", + "rot_z": "/entry/registration/transformations/trans_x", }, }, { - "root": "/entry/process/registration/transformations/rot_z", + "root": "/entry/registration/transformations/rot_z", "axes": { "trans_y": ".", - "rot_z": "/entry/process/registration/transformations/trans_y", + "rot_z": "/entry/registration/transformations/trans_y", }, }, { - "root": "/entry/process/registration/transformations/rot_z", + "root": "/entry/registration/transformations/rot_z", "axes": { "trans_x": ".", - "trans_y": "/entry/process/registration/transformations/trans_x", - "rot_z": "/entry/process/registration/transformations/trans_y", + "trans_y": "/entry/registration/transformations/trans_x", + "rot_z": "/entry/registration/transformations/trans_y", }, }, ] From 0204c7c3a7df874c1048e15c9e4aaffda529c524 Mon Sep 17 00:00:00 2001 From: rettigl Date: Tue, 27 Jan 2026 22:55:25 +0100 Subject: [PATCH 2/9] fix tests for pandas>=3 --- tests/test_dfops.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/test_dfops.py b/tests/test_dfops.py index 3a6482cd..b3945787 100644 --- a/tests/test_dfops.py +++ b/tests/test_dfops.py @@ -160,7 +160,7 @@ def swap(x, y): def test_forward_fill_lazy_sparse_nans() -> None: """test that a lazy forward fill works as expected with sparse nans""" t_df = df.copy() - t_df["energy"][::2] = np.nan + t_df.iloc[::2, 2] = np.nan t_dask_df = ddf.from_pandas(t_df, npartitions=N_PARTITIONS) t_dask_df = forward_fill_lazy(t_dask_df, "energy", before="max") t_df = t_df.ffill() @@ -170,7 +170,7 @@ def test_forward_fill_lazy_sparse_nans() -> None: def test_forward_fill_lazy_full_partition_nans() -> None: """test that a lazy forward fill works as expected with a full partition of nans""" t_df = df.copy() - t_df["energy"][5:25] = np.nan + t_df.iloc[5:25, 2] = np.nan t_dask_df = ddf.from_pandas(t_df, npartitions=N_PARTITIONS) t_dask_df = forward_fill_lazy(t_dask_df, "energy", before="max") t_df = t_df.ffill() @@ -182,7 +182,7 @@ def test_forward_fill_lazy_consecutive_full_partition_nans() -> None: full of nans """ t_df = df.copy() - t_df["energy"][5:35] = np.nan + t_df.iloc[5:35, 2] = np.nan t_dask_df = ddf.from_pandas(t_df, npartitions=N_PARTITIONS) t_dask_df = forward_fill_lazy(t_dask_df, "energy", before="max") t_df = t_df.ffill() @@ -192,7 +192,7 @@ def test_forward_fill_lazy_consecutive_full_partition_nans() -> None: def test_forward_fill_lazy_wrong_parameters() -> None: """test that a lazy forward fill fails as expected on wrong parameters""" t_df = df.copy() - t_df["energy"][5:35] = np.nan + t_df.iloc[5:35, 2] = np.nan t_dask_df = ddf.from_pandas(t_df, npartitions=N_PARTITIONS) with pytest.raises(TypeError): t_dask_df = forward_fill_lazy(t_dask_df, "energy", before="wrong parameter") @@ -201,7 +201,7 @@ def test_forward_fill_lazy_wrong_parameters() -> None: def test_forward_fill_lazy_compute() -> None: """test that a lazy forward fill works as expected with compute=True""" t_df = df.copy() - t_df["energy"][5:35] = np.nan + t_df.iloc[5:35, 2] = np.nan t_dask_df = ddf.from_pandas(t_df, npartitions=N_PARTITIONS) t_dask_df_comp = forward_fill_lazy(t_dask_df, "energy", before="max", compute_lengths=True) t_dask_df_nocomp = forward_fill_lazy(t_dask_df, "energy", before="max", compute_lengths=False) @@ -212,7 +212,7 @@ def test_forward_fill_lazy_keep_head_nans() -> None: """test that a lazy forward fill works as expected with missing values at the beginning of the dataframe""" t_df = df.copy() - t_df["energy"][:5] = np.nan + t_df.iloc[:5, 2] = np.nan t_dask_df = ddf.from_pandas(t_df, npartitions=N_PARTITIONS) t_df = forward_fill_lazy(t_dask_df, "energy", before="max").compute() assert np.all(np.isnan(t_df["energy"][:5])) @@ -238,7 +238,7 @@ def test_forward_fill_lazy_wrong_channels() -> None: def test_forward_fill_lazy_multiple_iterations() -> None: """test that a lazy forward fill works as expected with multiple iterations""" t_df = df.copy() - t_df["energy"][5:35] = np.nan + t_df.loc[5:35, "energy"] = np.nan t_dask_df = ddf.from_pandas(t_df, npartitions=N_PARTITIONS) t_dask_df = forward_fill_lazy(t_dask_df, "energy", before="max", iterations=5) t_df = t_df.ffill() From 1563bd3f764ac4a7636665a5981163e2881365f5 Mon Sep 17 00:00:00 2001 From: rettigl Date: Sat, 31 Jan 2026 23:50:47 +0100 Subject: [PATCH 3/9] create copy in bin_partition to avoid modifying input data --- src/sed/binning/binning.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/sed/binning/binning.py b/src/sed/binning/binning.py index e6884538..ecd9b215 100644 --- a/src/sed/binning/binning.py +++ b/src/sed/binning/binning.py @@ -121,11 +121,14 @@ def bin_partition( # convert bin centers to bin edges: if all(isinstance(x, np.ndarray) for x in bins): - bins = cast(list[np.ndarray], bins) + # create a copy to avoid modifying input data + bins = list(bins) for i, bin_centers in enumerate(bins): bins[i] = bin_centers_to_bin_edges(bin_centers) else: bins = cast(list[int], bins) + # create a copy to avoid modifying input data + ranges = list(ranges) # shift ranges by half a bin size to align the bin centers to the given ranges, # as the histogram functions interpret the ranges as limits for the edges. for i, nbins in enumerate(bins): From 31fed8853d92b83c0f8788d45b14be5dadf086bb Mon Sep 17 00:00:00 2001 From: rettigl Date: Sat, 31 Jan 2026 23:52:09 +0100 Subject: [PATCH 4/9] remove limit on bokeh --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index c2875329..6df968c8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,7 +29,7 @@ classifiers = [ "Operating System :: OS Independent", ] dependencies = [ - "bokeh>=2.4.2,<3.7.0", + "bokeh>=2.4.2", "dask>=2021.12.0,<2024.8", "elabapi-python>=5.0,<5.2", "fastdtw>=0.3.4", From e1021703dd11ba1967e6d49c1cff843da54b8f74 Mon Sep 17 00:00:00 2001 From: rettigl Date: Sun, 1 Feb 2026 12:44:35 +0100 Subject: [PATCH 5/9] fix liniting --- src/sed/binning/binning.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sed/binning/binning.py b/src/sed/binning/binning.py index ecd9b215..04a399cb 100644 --- a/src/sed/binning/binning.py +++ b/src/sed/binning/binning.py @@ -122,7 +122,7 @@ def bin_partition( # convert bin centers to bin edges: if all(isinstance(x, np.ndarray) for x in bins): # create a copy to avoid modifying input data - bins = list(bins) + bins = list(cast(list[np.ndarray], bins)) for i, bin_centers in enumerate(bins): bins[i] = bin_centers_to_bin_edges(bin_centers) else: From 973d6ddf53c15338d0c7ee51a6c149cf66f63256 Mon Sep 17 00:00:00 2001 From: rettigl Date: Sun, 1 Feb 2026 13:01:44 +0100 Subject: [PATCH 6/9] changes for new bokeh versions --- src/sed/calibrator/energy.py | 4 ++-- src/sed/calibrator/momentum.py | 7 ++++--- src/sed/diagnostics.py | 3 ++- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/sed/calibrator/energy.py b/src/sed/calibrator/energy.py index b5d055dd..ec34483c 100644 --- a/src/sed/calibrator/energy.py +++ b/src/sed/calibrator/energy.py @@ -734,8 +734,8 @@ def view( title=ttl, width=figsize[0] * 100, height=figsize[1] * 100, - tooltips=ttp, ) + fig.hover.tooltips = ttp # Plotting the main traces for itr, color in zip(range(len(traces)), colors): trace = traces[itr, :] @@ -790,7 +790,7 @@ def view( if show_legend: fig.legend.location = kwds.pop("legend_location", "top_right") fig.legend.spacing = 0 - fig.legend.padding = 2 + fig.legend.padding = 2 # type: ignore pbk.show(fig) diff --git a/src/sed/calibrator/momentum.py b/src/sed/calibrator/momentum.py index dd87bb6e..dd65536d 100644 --- a/src/sed/calibrator/momentum.py +++ b/src/sed/calibrator/momentum.py @@ -21,6 +21,7 @@ import xarray as xr from bokeh.colors import RGB from bokeh.io import output_notebook +from bokeh.models import Range1d from bokeh.palettes import Category10 as ColorCycle from IPython.display import display from joblib import delayed @@ -1408,10 +1409,10 @@ def view( fig = pbk.figure( width=figsize[0] * 100, height=figsize[1] * 100, - tooltips=ttp, - x_range=(0, num_rows), - y_range=(0, num_cols), + x_range=Range1d(0, num_rows), + y_range=Range1d(0, num_cols), ) + fig.hover.tooltips = ttp fig.image( image=[image.T], x=0, diff --git a/src/sed/diagnostics.py b/src/sed/diagnostics.py index 4f44b1f7..a25e63aa 100644 --- a/src/sed/diagnostics.py +++ b/src/sed/diagnostics.py @@ -34,7 +34,8 @@ def plot_single_hist( """ ttp = kwds.pop("tooltip", [("(x, y)", "($x, $y)")]) - fig = pbk.figure(background_fill_color="white", tooltips=ttp) + fig = pbk.figure(background_fill_color="white") + fig.hover.tooltips = ttp fig.quad( top=histvals, bottom=0, From 958f336255d7ea078639d960afabb7c2d0d864a5 Mon Sep 17 00:00:00 2001 From: rettigl Date: Sun, 1 Feb 2026 12:26:13 +0100 Subject: [PATCH 7/9] fix dld sector alignment for newer versions of dask --- src/sed/calibrator/energy.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/sed/calibrator/energy.py b/src/sed/calibrator/energy.py index ec34483c..2c766c07 100644 --- a/src/sed/calibrator/energy.py +++ b/src/sed/calibrator/energy.py @@ -1520,12 +1520,17 @@ def align_dld_sectors( ) tof_column = tof_column or self.tof_column - # align the 8s sectors - sector_delays_arr = dask.array.from_array(sector_delays) + # align the 8 sectors + # Use a local NumPy array and vectorized indexing per partition. Creating a + # dask.array and indexing it with per-partition numpy indices is expensive + # because it builds additional dask graphs. Using np.take on a NumPy array + # inside the partition function keeps the work local and fast. def align_sector(x): - val = x[tof_column] - sector_delays_arr[x[sector_id_column].values.astype(int)] - return val.astype(np.float32) + # ensure integer indices and use np.take for fast vectorized lookup + idx = x[sector_id_column].to_numpy(dtype=int) + shifted = x[tof_column].to_numpy(dtype=float) - np.take(sector_delays, idx) + return dask.dataframe.from_array(shifted.astype(np.float32)) df[tof_column] = df.map_partitions(align_sector, meta=(tof_column, np.float32)) metadata: dict[str, Any] = { From 969181bc7abd313292703bebbd197a3132721ac5 Mon Sep 17 00:00:00 2001 From: rettigl Date: Sun, 1 Feb 2026 12:39:02 +0100 Subject: [PATCH 8/9] update dask and pandas --- pyproject.toml | 4 ++-- src/sed/__init__.py | 4 ---- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 6df968c8..07accf47 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,7 +30,7 @@ classifiers = [ ] dependencies = [ "bokeh>=2.4.2", - "dask>=2021.12.0,<2024.8", + "dask[dataframe]>=2024.8,<2025.5", # limit due to https://github.com/dask/dask/issues/12122 "elabapi-python>=5.0,<5.2", "fastdtw>=0.3.4", "h5py>=3.6.0", @@ -41,7 +41,7 @@ dependencies = [ "natsort>=8.1.0", "numba>=0.55.1", "numpy>=2.0.0", - "pandas>=1.4.1", + "pandas>=3.0.0", "photutils<2.0", "psutil>=5.9.0", "pynxtools-mpes>=0.2.6", diff --git a/src/sed/__init__.py b/src/sed/__init__.py index 3d03ef82..e4d1d659 100644 --- a/src/sed/__init__.py +++ b/src/sed/__init__.py @@ -1,10 +1,6 @@ """sed module easy access APIs.""" import importlib.metadata -import dask - -dask.config.set({"dataframe.query-planning": False}) - from .core.processor import SedProcessor # noqa: E402 __version__ = importlib.metadata.version("sed-processor") From cfaeb8fa3c565ba3218f63b359fb145a93944c60 Mon Sep 17 00:00:00 2001 From: rettigl Date: Sun, 1 Feb 2026 13:07:36 +0100 Subject: [PATCH 9/9] small fixes --- pyproject.toml | 2 +- src/sed/binning/binning.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 07accf47..9f4d067a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,7 +41,7 @@ dependencies = [ "natsort>=8.1.0", "numba>=0.55.1", "numpy>=2.0.0", - "pandas>=3.0.0", + "pandas>=1.4.1", "photutils<2.0", "psutil>=5.9.0", "pynxtools-mpes>=0.2.6", diff --git a/src/sed/binning/binning.py b/src/sed/binning/binning.py index 04a399cb..b87785e9 100644 --- a/src/sed/binning/binning.py +++ b/src/sed/binning/binning.py @@ -495,7 +495,7 @@ def normalization_histogram_from_timed_dataframe( def apply_jitter_on_column( - df: dask.dataframe.core.DataFrame | pd.DataFrame, + df: dask.dataframe.DataFrame | pd.DataFrame, amp: float, col: str, mode: str = "uniform", @@ -503,7 +503,7 @@ def apply_jitter_on_column( """Add jittering to the column of a dataframe. Args: - df (Union[dask.dataframe.core.DataFrame, pd.DataFrame]): Dataframe to add + df (Union[dask.dataframe.DataFrame, pd.DataFrame]): Dataframe to add noise/jittering to. amp (float): Amplitude scaling for the jittering noise. col (str): Name of the column to add jittering to.