diff --git a/pyproject.toml b/pyproject.toml index c2875329..9f4d067a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,8 +29,8 @@ classifiers = [ "Operating System :: OS Independent", ] dependencies = [ - "bokeh>=2.4.2,<3.7.0", - "dask>=2021.12.0,<2024.8", + "bokeh>=2.4.2", + "dask[dataframe]>=2024.8,<2025.5", # limit due to https://github.com/dask/dask/issues/12122 "elabapi-python>=5.0,<5.2", "fastdtw>=0.3.4", "h5py>=3.6.0", diff --git a/src/sed/__init__.py b/src/sed/__init__.py index 3d03ef82..e4d1d659 100644 --- a/src/sed/__init__.py +++ b/src/sed/__init__.py @@ -1,10 +1,6 @@ """sed module easy access APIs.""" import importlib.metadata -import dask - -dask.config.set({"dataframe.query-planning": False}) - from .core.processor import SedProcessor # noqa: E402 __version__ = importlib.metadata.version("sed-processor") diff --git a/src/sed/binning/binning.py b/src/sed/binning/binning.py index e6884538..b87785e9 100644 --- a/src/sed/binning/binning.py +++ b/src/sed/binning/binning.py @@ -121,11 +121,14 @@ def bin_partition( # convert bin centers to bin edges: if all(isinstance(x, np.ndarray) for x in bins): - bins = cast(list[np.ndarray], bins) + # create a copy to avoid modifying input data + bins = list(cast(list[np.ndarray], bins)) for i, bin_centers in enumerate(bins): bins[i] = bin_centers_to_bin_edges(bin_centers) else: bins = cast(list[int], bins) + # create a copy to avoid modifying input data + ranges = list(ranges) # shift ranges by half a bin size to align the bin centers to the given ranges, # as the histogram functions interpret the ranges as limits for the edges. for i, nbins in enumerate(bins): @@ -492,7 +495,7 @@ def normalization_histogram_from_timed_dataframe( def apply_jitter_on_column( - df: dask.dataframe.core.DataFrame | pd.DataFrame, + df: dask.dataframe.DataFrame | pd.DataFrame, amp: float, col: str, mode: str = "uniform", @@ -500,7 +503,7 @@ def apply_jitter_on_column( """Add jittering to the column of a dataframe. Args: - df (Union[dask.dataframe.core.DataFrame, pd.DataFrame]): Dataframe to add + df (Union[dask.dataframe.DataFrame, pd.DataFrame]): Dataframe to add noise/jittering to. amp (float): Amplitude scaling for the jittering noise. col (str): Name of the column to add jittering to. diff --git a/src/sed/calibrator/energy.py b/src/sed/calibrator/energy.py index b5d055dd..2c766c07 100644 --- a/src/sed/calibrator/energy.py +++ b/src/sed/calibrator/energy.py @@ -734,8 +734,8 @@ def view( title=ttl, width=figsize[0] * 100, height=figsize[1] * 100, - tooltips=ttp, ) + fig.hover.tooltips = ttp # Plotting the main traces for itr, color in zip(range(len(traces)), colors): trace = traces[itr, :] @@ -790,7 +790,7 @@ def view( if show_legend: fig.legend.location = kwds.pop("legend_location", "top_right") fig.legend.spacing = 0 - fig.legend.padding = 2 + fig.legend.padding = 2 # type: ignore pbk.show(fig) @@ -1520,12 +1520,17 @@ def align_dld_sectors( ) tof_column = tof_column or self.tof_column - # align the 8s sectors - sector_delays_arr = dask.array.from_array(sector_delays) + # align the 8 sectors + # Use a local NumPy array and vectorized indexing per partition. Creating a + # dask.array and indexing it with per-partition numpy indices is expensive + # because it builds additional dask graphs. Using np.take on a NumPy array + # inside the partition function keeps the work local and fast. def align_sector(x): - val = x[tof_column] - sector_delays_arr[x[sector_id_column].values.astype(int)] - return val.astype(np.float32) + # ensure integer indices and use np.take for fast vectorized lookup + idx = x[sector_id_column].to_numpy(dtype=int) + shifted = x[tof_column].to_numpy(dtype=float) - np.take(sector_delays, idx) + return dask.dataframe.from_array(shifted.astype(np.float32)) df[tof_column] = df.map_partitions(align_sector, meta=(tof_column, np.float32)) metadata: dict[str, Any] = { diff --git a/src/sed/calibrator/momentum.py b/src/sed/calibrator/momentum.py index 16088945..dd65536d 100644 --- a/src/sed/calibrator/momentum.py +++ b/src/sed/calibrator/momentum.py @@ -21,6 +21,7 @@ import xarray as xr from bokeh.colors import RGB from bokeh.io import output_notebook +from bokeh.models import Range1d from bokeh.palettes import Category10 as ColorCycle from IPython.display import display from joblib import delayed @@ -1408,10 +1409,10 @@ def view( fig = pbk.figure( width=figsize[0] * 100, height=figsize[1] * 100, - tooltips=ttp, - x_range=(0, num_rows), - y_range=(0, num_cols), + x_range=Range1d(0, num_rows), + y_range=Range1d(0, num_cols), ) + fig.hover.tooltips = ttp fig.image( image=[image.T], x=0, @@ -1832,11 +1833,11 @@ def gather_correction_metadata(self) -> dict: metadata["registration"]["creation_date"] = datetime.now() metadata["registration"]["applied"] = True metadata["registration"]["depends_on"] = ( - "/entry/process/registration/transformations/rot_z" + "/entry/registration/transformations/rot_z" if "angle" in metadata["registration"] and metadata["registration"]["angle"] - else "/entry/process/registration/transformations/trans_y" + else "/entry/registration/transformations/trans_y" if "xtrans" in metadata["registration"] and metadata["registration"]["xtrans"] - else "/entry/process/registration/transformations/trans_x" + else "/entry/registration/transformations/trans_x" if "ytrans" in metadata["registration"] and metadata["registration"]["ytrans"] else "." ) @@ -1860,7 +1861,7 @@ def gather_correction_metadata(self) -> dict: [0.0, 1.0, 0.0], ) metadata["registration"]["trans_y"]["depends_on"] = ( - "/entry/process/registration/transformations/trans_x" + "/entry/registration/transformations/trans_x" if "ytrans" in metadata["registration"] and metadata["registration"]["ytrans"] else "." ) @@ -1875,10 +1876,11 @@ def gather_correction_metadata(self) -> dict: metadata["registration"]["rot_z"]["offset"] = np.concatenate( (metadata["registration"]["center"], [0.0]), ) + metadata["registration"]["rot_z"]["offset_units"] = "pixel" metadata["registration"]["rot_z"]["depends_on"] = ( - "/entry/process/registration/transformations/trans_y" + "/entry/registration/transformations/trans_y" if "xtrans" in metadata["registration"] and metadata["registration"]["xtrans"] - else "/entry/process/registration/transformations/trans_x" + else "/entry/registration/transformations/trans_x" if "ytrans" in metadata["registration"] and metadata["registration"]["ytrans"] else "." ) diff --git a/src/sed/config/NXmpes_config.json b/src/sed/config/NXmpes_config.json index 17e7b1cb..5211d674 100644 --- a/src/sed/config/NXmpes_config.json +++ b/src/sed/config/NXmpes_config.json @@ -296,7 +296,7 @@ }, "/ENTRY/REGISTRATION[registration]": { "applied": "!@attrs:metadata/momentum_correction/registration/applied", - "depends_on": "/entry/process/registration/transformations/rot_z", + "depends_on": "/entry/registration/transformations/rot_z", "TRANSFORMATIONS[transformations]": { "AXISNAME[trans_x]": "@attrs:metadata/momentum_correction/registration/trans_x/value", "AXISNAME[trans_x]/@transformation_type": "translation", diff --git a/src/sed/diagnostics.py b/src/sed/diagnostics.py index 4f44b1f7..a25e63aa 100644 --- a/src/sed/diagnostics.py +++ b/src/sed/diagnostics.py @@ -34,7 +34,8 @@ def plot_single_hist( """ ttp = kwds.pop("tooltip", [("(x, y)", "($x, $y)")]) - fig = pbk.figure(background_fill_color="white", tooltips=ttp) + fig = pbk.figure(background_fill_color="white") + fig.hover.tooltips = ttp fig.quad( top=histvals, bottom=0, diff --git a/tests/calibrator/test_momentum.py b/tests/calibrator/test_momentum.py index dcddaa85..cfde93da 100644 --- a/tests/calibrator/test_momentum.py +++ b/tests/calibrator/test_momentum.py @@ -239,44 +239,44 @@ def test_apply_correction() -> None: ] depends_on_list = [ { - "root": "/entry/process/registration/transformations/trans_x", + "root": "/entry/registration/transformations/trans_x", "axes": {"trans_x": "."}, }, { - "root": "/entry/process/registration/transformations/trans_y", + "root": "/entry/registration/transformations/trans_y", "axes": {"trans_y": "."}, }, { - "root": "/entry/process/registration/transformations/rot_z", + "root": "/entry/registration/transformations/rot_z", "axes": {"rot_z": "."}, }, { - "root": "/entry/process/registration/transformations/trans_y", + "root": "/entry/registration/transformations/trans_y", "axes": { "trans_x": ".", - "trans_y": "/entry/process/registration/transformations/trans_x", + "trans_y": "/entry/registration/transformations/trans_x", }, }, { - "root": "/entry/process/registration/transformations/rot_z", + "root": "/entry/registration/transformations/rot_z", "axes": { "trans_x": ".", - "rot_z": "/entry/process/registration/transformations/trans_x", + "rot_z": "/entry/registration/transformations/trans_x", }, }, { - "root": "/entry/process/registration/transformations/rot_z", + "root": "/entry/registration/transformations/rot_z", "axes": { "trans_y": ".", - "rot_z": "/entry/process/registration/transformations/trans_y", + "rot_z": "/entry/registration/transformations/trans_y", }, }, { - "root": "/entry/process/registration/transformations/rot_z", + "root": "/entry/registration/transformations/rot_z", "axes": { "trans_x": ".", - "trans_y": "/entry/process/registration/transformations/trans_x", - "rot_z": "/entry/process/registration/transformations/trans_y", + "trans_y": "/entry/registration/transformations/trans_x", + "rot_z": "/entry/registration/transformations/trans_y", }, }, ] diff --git a/tests/test_dfops.py b/tests/test_dfops.py index 3a6482cd..b3945787 100644 --- a/tests/test_dfops.py +++ b/tests/test_dfops.py @@ -160,7 +160,7 @@ def swap(x, y): def test_forward_fill_lazy_sparse_nans() -> None: """test that a lazy forward fill works as expected with sparse nans""" t_df = df.copy() - t_df["energy"][::2] = np.nan + t_df.iloc[::2, 2] = np.nan t_dask_df = ddf.from_pandas(t_df, npartitions=N_PARTITIONS) t_dask_df = forward_fill_lazy(t_dask_df, "energy", before="max") t_df = t_df.ffill() @@ -170,7 +170,7 @@ def test_forward_fill_lazy_sparse_nans() -> None: def test_forward_fill_lazy_full_partition_nans() -> None: """test that a lazy forward fill works as expected with a full partition of nans""" t_df = df.copy() - t_df["energy"][5:25] = np.nan + t_df.iloc[5:25, 2] = np.nan t_dask_df = ddf.from_pandas(t_df, npartitions=N_PARTITIONS) t_dask_df = forward_fill_lazy(t_dask_df, "energy", before="max") t_df = t_df.ffill() @@ -182,7 +182,7 @@ def test_forward_fill_lazy_consecutive_full_partition_nans() -> None: full of nans """ t_df = df.copy() - t_df["energy"][5:35] = np.nan + t_df.iloc[5:35, 2] = np.nan t_dask_df = ddf.from_pandas(t_df, npartitions=N_PARTITIONS) t_dask_df = forward_fill_lazy(t_dask_df, "energy", before="max") t_df = t_df.ffill() @@ -192,7 +192,7 @@ def test_forward_fill_lazy_consecutive_full_partition_nans() -> None: def test_forward_fill_lazy_wrong_parameters() -> None: """test that a lazy forward fill fails as expected on wrong parameters""" t_df = df.copy() - t_df["energy"][5:35] = np.nan + t_df.iloc[5:35, 2] = np.nan t_dask_df = ddf.from_pandas(t_df, npartitions=N_PARTITIONS) with pytest.raises(TypeError): t_dask_df = forward_fill_lazy(t_dask_df, "energy", before="wrong parameter") @@ -201,7 +201,7 @@ def test_forward_fill_lazy_wrong_parameters() -> None: def test_forward_fill_lazy_compute() -> None: """test that a lazy forward fill works as expected with compute=True""" t_df = df.copy() - t_df["energy"][5:35] = np.nan + t_df.iloc[5:35, 2] = np.nan t_dask_df = ddf.from_pandas(t_df, npartitions=N_PARTITIONS) t_dask_df_comp = forward_fill_lazy(t_dask_df, "energy", before="max", compute_lengths=True) t_dask_df_nocomp = forward_fill_lazy(t_dask_df, "energy", before="max", compute_lengths=False) @@ -212,7 +212,7 @@ def test_forward_fill_lazy_keep_head_nans() -> None: """test that a lazy forward fill works as expected with missing values at the beginning of the dataframe""" t_df = df.copy() - t_df["energy"][:5] = np.nan + t_df.iloc[:5, 2] = np.nan t_dask_df = ddf.from_pandas(t_df, npartitions=N_PARTITIONS) t_df = forward_fill_lazy(t_dask_df, "energy", before="max").compute() assert np.all(np.isnan(t_df["energy"][:5])) @@ -238,7 +238,7 @@ def test_forward_fill_lazy_wrong_channels() -> None: def test_forward_fill_lazy_multiple_iterations() -> None: """test that a lazy forward fill works as expected with multiple iterations""" t_df = df.copy() - t_df["energy"][5:35] = np.nan + t_df.loc[5:35, "energy"] = np.nan t_dask_df = ddf.from_pandas(t_df, npartitions=N_PARTITIONS) t_dask_df = forward_fill_lazy(t_dask_df, "energy", before="max", iterations=5) t_df = t_df.ffill()