diff --git a/.cspell/custom-dictionary.txt b/.cspell/custom-dictionary.txt
index b6f706ed..43f2eb28 100644
--- a/.cspell/custom-dictionary.txt
+++ b/.cspell/custom-dictionary.txt
@@ -43,6 +43,8 @@ cdeform
 cdeformfield
 cdisp
 centroidnn
+cfel
+CFEL
 chessy
 clim
 cmap
@@ -64,6 +66,7 @@ cryo
 cstart
 cstep
 csvfile
+cumsum
 custom-dictionary
 cval
 cvdist
@@ -176,6 +179,7 @@ joblib
 jpars
 jupyterlab
 kernelspec
+kmic
 kmodem
 KTOF
 kwds
@@ -208,6 +212,8 @@ mdist
 meshgrid
 microbunch
 microbunches
+millis
+millisec
 mirrorutil
 mnpos
 modindex
diff --git a/.sed-dev/bin/Activate.ps1 b/.sed-dev/bin/Activate.ps1
new file mode 100644
index 00000000..b49d77ba
--- /dev/null
+++ b/.sed-dev/bin/Activate.ps1
@@ -0,0 +1,247 @@
+<#
+.Synopsis
+Activate a Python virtual environment for the current PowerShell session.
+
+.Description
+Pushes the python executable for a virtual environment to the front of the
+$Env:PATH environment variable and sets the prompt to signify that you are
+in a Python virtual environment. Makes use of the command line switches as
+well as the `pyvenv.cfg` file values present in the virtual environment.
+
+.Parameter VenvDir
+Path to the directory that contains the virtual environment to activate. The
+default value for this is the parent of the directory that the Activate.ps1
+script is located within.
+
+.Parameter Prompt
+The prompt prefix to display when this virtual environment is activated. By
+default, this prompt is the name of the virtual environment folder (VenvDir)
+surrounded by parentheses and followed by a single space (ie. '(.venv) ').
+
+.Example
+Activate.ps1
+Activates the Python virtual environment that contains the Activate.ps1 script.
+
+.Example
+Activate.ps1 -Verbose
+Activates the Python virtual environment that contains the Activate.ps1 script,
+and shows extra information about the activation as it executes.
+
+.Example
+Activate.ps1 -VenvDir C:\Users\MyUser\Common\.venv
+Activates the Python virtual environment located in the specified location.
+
+.Example
+Activate.ps1 -Prompt "MyPython"
+Activates the Python virtual environment that contains the Activate.ps1 script,
+and prefixes the current prompt with the specified string (surrounded in
+parentheses) while the virtual environment is active.
+
+.Notes
+On Windows, it may be required to enable this Activate.ps1 script by setting the
+execution policy for the user. You can do this by issuing the following PowerShell
+command:
+
+PS C:\> Set-ExecutionPolicy -ExecutionPolicy RemoteSigned -Scope CurrentUser
+
+For more information on Execution Policies: 
+https://go.microsoft.com/fwlink/?LinkID=135170
+
+#>
+Param(
+    [Parameter(Mandatory = $false)]
+    [String]
+    $VenvDir,
+    [Parameter(Mandatory = $false)]
+    [String]
+    $Prompt
+)
+
+<# Function declarations --------------------------------------------------- #>
+
+<#
+.Synopsis
+Remove all shell session elements added by the Activate script, including the
+addition of the virtual environment's Python executable from the beginning of
+the PATH variable.
+
+.Parameter NonDestructive
+If present, do not remove this function from the global namespace for the
+session.
+
+#>
+function global:deactivate ([switch]$NonDestructive) {
+    # Revert to original values
+
+    # The prior prompt:
+    if (Test-Path -Path Function:_OLD_VIRTUAL_PROMPT) {
+        Copy-Item -Path Function:_OLD_VIRTUAL_PROMPT -Destination Function:prompt
+        Remove-Item -Path Function:_OLD_VIRTUAL_PROMPT
+    }
+
+    # The prior PYTHONHOME:
+    if (Test-Path -Path Env:_OLD_VIRTUAL_PYTHONHOME) {
+        Copy-Item -Path Env:_OLD_VIRTUAL_PYTHONHOME -Destination Env:PYTHONHOME
+        Remove-Item -Path Env:_OLD_VIRTUAL_PYTHONHOME
+    }
+
+    # The prior PATH:
+    if (Test-Path -Path Env:_OLD_VIRTUAL_PATH) {
+        Copy-Item -Path Env:_OLD_VIRTUAL_PATH -Destination Env:PATH
+        Remove-Item -Path Env:_OLD_VIRTUAL_PATH
+    }
+
+    # Just remove the VIRTUAL_ENV altogether:
+    if (Test-Path -Path Env:VIRTUAL_ENV) {
+        Remove-Item -Path env:VIRTUAL_ENV
+    }
+
+    # Just remove VIRTUAL_ENV_PROMPT altogether.
+    if (Test-Path -Path Env:VIRTUAL_ENV_PROMPT) {
+        Remove-Item -Path env:VIRTUAL_ENV_PROMPT
+    }
+
+    # Just remove the _PYTHON_VENV_PROMPT_PREFIX altogether:
+    if (Get-Variable -Name "_PYTHON_VENV_PROMPT_PREFIX" -ErrorAction SilentlyContinue) {
+        Remove-Variable -Name _PYTHON_VENV_PROMPT_PREFIX -Scope Global -Force
+    }
+
+    # Leave deactivate function in the global namespace if requested:
+    if (-not $NonDestructive) {
+        Remove-Item -Path function:deactivate
+    }
+}
+
+<#
+.Description
+Get-PyVenvConfig parses the values from the pyvenv.cfg file located in the
+given folder, and returns them in a map.
+
+For each line in the pyvenv.cfg file, if that line can be parsed into exactly
+two strings separated by `=` (with any amount of whitespace surrounding the =)
+then it is considered a `key = value` line. The left hand string is the key,
+the right hand is the value.
+
+If the value starts with a `'` or a `"` then the first and last character is
+stripped from the value before being captured.
+
+.Parameter ConfigDir
+Path to the directory that contains the `pyvenv.cfg` file.
+#>
+function Get-PyVenvConfig(
+    [String]
+    $ConfigDir
+) {
+    Write-Verbose "Given ConfigDir=$ConfigDir, obtain values in pyvenv.cfg"
+
+    # Ensure the file exists, and issue a warning if it doesn't (but still allow the function to continue).
+    $pyvenvConfigPath = Join-Path -Resolve -Path $ConfigDir -ChildPath 'pyvenv.cfg' -ErrorAction Continue
+
+    # An empty map will be returned if no config file is found.
+    $pyvenvConfig = @{ }
+
+    if ($pyvenvConfigPath) {
+
+        Write-Verbose "File exists, parse `key = value` lines"
+        $pyvenvConfigContent = Get-Content -Path $pyvenvConfigPath
+
+        $pyvenvConfigContent | ForEach-Object {
+            $keyval = $PSItem -split "\s*=\s*", 2
+            if ($keyval[0] -and $keyval[1]) {
+                $val = $keyval[1]
+
+                # Remove extraneous quotations around a string value.
+                if ("'""".Contains($val.Substring(0, 1))) {
+                    $val = $val.Substring(1, $val.Length - 2)
+                }
+
+                $pyvenvConfig[$keyval[0]] = $val
+                Write-Verbose "Adding Key: '$($keyval[0])'='$val'"
+            }
+        }
+    }
+    return $pyvenvConfig
+}
+
+
+<# Begin Activate script --------------------------------------------------- #>
+
+# Determine the containing directory of this script
+$VenvExecPath = Split-Path -Parent $MyInvocation.MyCommand.Definition
+$VenvExecDir = Get-Item -Path $VenvExecPath
+
+Write-Verbose "Activation script is located in path: '$VenvExecPath'"
+Write-Verbose "VenvExecDir Fullname: '$($VenvExecDir.FullName)"
+Write-Verbose "VenvExecDir Name: '$($VenvExecDir.Name)"
+
+# Set values required in priority: CmdLine, ConfigFile, Default
+# First, get the location of the virtual environment, it might not be
+# VenvExecDir if specified on the command line.
+if ($VenvDir) {
+    Write-Verbose "VenvDir given as parameter, using '$VenvDir' to determine values"
+}
+else {
+    Write-Verbose "VenvDir not given as a parameter, using parent directory name as VenvDir."
+    $VenvDir = $VenvExecDir.Parent.FullName.TrimEnd("\\/")
+    Write-Verbose "VenvDir=$VenvDir"
+}
+
+# Next, read the `pyvenv.cfg` file to determine any required value such
+# as `prompt`.
+$pyvenvCfg = Get-PyVenvConfig -ConfigDir $VenvDir
+
+# Next, set the prompt from the command line, or the config file, or
+# just use the name of the virtual environment folder.
+if ($Prompt) {
+    Write-Verbose "Prompt specified as argument, using '$Prompt'"
+}
+else {
+    Write-Verbose "Prompt not specified as argument to script, checking pyvenv.cfg value"
+    if ($pyvenvCfg -and $pyvenvCfg['prompt']) {
+        Write-Verbose "  Setting based on value in pyvenv.cfg='$($pyvenvCfg['prompt'])'"
+        $Prompt = $pyvenvCfg['prompt'];
+    }
+    else {
+        Write-Verbose "  Setting prompt based on parent's directory's name. (Is the directory name passed to venv module when creating the virtual environment)"
+        Write-Verbose "  Got leaf-name of $VenvDir='$(Split-Path -Path $venvDir -Leaf)'"
+        $Prompt = Split-Path -Path $venvDir -Leaf
+    }
+}
+
+Write-Verbose "Prompt = '$Prompt'"
+Write-Verbose "VenvDir='$VenvDir'"
+
+# Deactivate any currently active virtual environment, but leave the
+# deactivate function in place.
+deactivate -nondestructive
+
+# Now set the environment variable VIRTUAL_ENV, used by many tools to determine
+# that there is an activated venv.
+$env:VIRTUAL_ENV = $VenvDir
+
+if (-not $Env:VIRTUAL_ENV_DISABLE_PROMPT) {
+
+    Write-Verbose "Setting prompt to '$Prompt'"
+
+    # Set the prompt to include the env name
+    # Make sure _OLD_VIRTUAL_PROMPT is global
+    function global:_OLD_VIRTUAL_PROMPT { "" }
+    Copy-Item -Path function:prompt -Destination function:_OLD_VIRTUAL_PROMPT
+    New-Variable -Name _PYTHON_VENV_PROMPT_PREFIX -Description "Python virtual environment prompt prefix" -Scope Global -Option ReadOnly -Visibility Public -Value $Prompt
+
+    function global:prompt {
+        Write-Host -NoNewline -ForegroundColor Green "($_PYTHON_VENV_PROMPT_PREFIX) "
+        _OLD_VIRTUAL_PROMPT
+    }
+    $env:VIRTUAL_ENV_PROMPT = $Prompt
+}
+
+# Clear PYTHONHOME
+if (Test-Path -Path Env:PYTHONHOME) {
+    Copy-Item -Path Env:PYTHONHOME -Destination Env:_OLD_VIRTUAL_PYTHONHOME
+    Remove-Item -Path Env:PYTHONHOME
+}
+
+# Add the venv to the PATH
+Copy-Item -Path Env:PATH -Destination Env:_OLD_VIRTUAL_PATH
+$Env:PATH = "$VenvExecDir$([System.IO.Path]::PathSeparator)$Env:PATH"
diff --git a/.sed-dev/bin/activate b/.sed-dev/bin/activate
new file mode 100644
index 00000000..44ec4b76
--- /dev/null
+++ b/.sed-dev/bin/activate
@@ -0,0 +1,63 @@
+# This file must be used with "source bin/activate" *from bash*
+# you cannot run it directly
+
+deactivate () {
+    # reset old environment variables
+    if [ -n "${_OLD_VIRTUAL_PATH:-}" ] ; then
+        PATH="${_OLD_VIRTUAL_PATH:-}"
+        export PATH
+        unset _OLD_VIRTUAL_PATH
+    fi
+    if [ -n "${_OLD_VIRTUAL_PYTHONHOME:-}" ] ; then
+        PYTHONHOME="${_OLD_VIRTUAL_PYTHONHOME:-}"
+        export PYTHONHOME
+        unset _OLD_VIRTUAL_PYTHONHOME
+    fi
+
+    # Call hash to forget past commands. Without forgetting
+    # past commands the $PATH changes we made may not be respected
+    hash -r 2> /dev/null
+
+    if [ -n "${_OLD_VIRTUAL_PS1:-}" ] ; then
+        PS1="${_OLD_VIRTUAL_PS1:-}"
+        export PS1
+        unset _OLD_VIRTUAL_PS1
+    fi
+
+    unset VIRTUAL_ENV
+    unset VIRTUAL_ENV_PROMPT
+    if [ ! "${1:-}" = "nondestructive" ] ; then
+    # Self destruct!
+        unset -f deactivate
+    fi
+}
+
+# unset irrelevant variables
+deactivate nondestructive
+
+VIRTUAL_ENV="/home/abdelhak/sed/.sed-dev"
+export VIRTUAL_ENV
+
+_OLD_VIRTUAL_PATH="$PATH"
+PATH="$VIRTUAL_ENV/bin:$PATH"
+export PATH
+
+# unset PYTHONHOME if set
+# this will fail if PYTHONHOME is set to the empty string (which is bad anyway)
+# could use `if (set -u; : $PYTHONHOME) ;` in bash
+if [ -n "${PYTHONHOME:-}" ] ; then
+    _OLD_VIRTUAL_PYTHONHOME="${PYTHONHOME:-}"
+    unset PYTHONHOME
+fi
+
+if [ -z "${VIRTUAL_ENV_DISABLE_PROMPT:-}" ] ; then
+    _OLD_VIRTUAL_PS1="${PS1:-}"
+    PS1="(.sed-dev) ${PS1:-}"
+    export PS1
+    VIRTUAL_ENV_PROMPT="(.sed-dev) "
+    export VIRTUAL_ENV_PROMPT
+fi
+
+# Call hash to forget past commands. Without forgetting
+# past commands the $PATH changes we made may not be respected
+hash -r 2> /dev/null
diff --git a/.sed-dev/bin/activate.csh b/.sed-dev/bin/activate.csh
new file mode 100644
index 00000000..4495a1f3
--- /dev/null
+++ b/.sed-dev/bin/activate.csh
@@ -0,0 +1,26 @@
+# This file must be used with "source bin/activate.csh" *from csh*.
+# You cannot run it directly.
+# Created by Davide Di Blasi <davidedb@gmail.com>.
+# Ported to Python 3.3 venv by Andrew Svetlov <andrew.svetlov@gmail.com>
+
+alias deactivate 'test $?_OLD_VIRTUAL_PATH != 0 && setenv PATH "$_OLD_VIRTUAL_PATH" && unset _OLD_VIRTUAL_PATH; rehash; test $?_OLD_VIRTUAL_PROMPT != 0 && set prompt="$_OLD_VIRTUAL_PROMPT" && unset _OLD_VIRTUAL_PROMPT; unsetenv VIRTUAL_ENV; unsetenv VIRTUAL_ENV_PROMPT; test "\!:*" != "nondestructive" && unalias deactivate'
+
+# Unset irrelevant variables.
+deactivate nondestructive
+
+setenv VIRTUAL_ENV "/home/abdelhak/sed/.sed-dev"
+
+set _OLD_VIRTUAL_PATH="$PATH"
+setenv PATH "$VIRTUAL_ENV/bin:$PATH"
+
+
+set _OLD_VIRTUAL_PROMPT="$prompt"
+
+if (! "$?VIRTUAL_ENV_DISABLE_PROMPT") then
+    set prompt = "(.sed-dev) $prompt"
+    setenv VIRTUAL_ENV_PROMPT "(.sed-dev) "
+endif
+
+alias pydoc python -m pydoc
+
+rehash
diff --git a/.sed-dev/bin/activate.fish b/.sed-dev/bin/activate.fish
new file mode 100644
index 00000000..5f2d1693
--- /dev/null
+++ b/.sed-dev/bin/activate.fish
@@ -0,0 +1,69 @@
+# This file must be used with "source <venv>/bin/activate.fish" *from fish*
+# (https://fishshell.com/); you cannot run it directly.
+
+function deactivate  -d "Exit virtual environment and return to normal shell environment"
+    # reset old environment variables
+    if test -n "$_OLD_VIRTUAL_PATH"
+        set -gx PATH $_OLD_VIRTUAL_PATH
+        set -e _OLD_VIRTUAL_PATH
+    end
+    if test -n "$_OLD_VIRTUAL_PYTHONHOME"
+        set -gx PYTHONHOME $_OLD_VIRTUAL_PYTHONHOME
+        set -e _OLD_VIRTUAL_PYTHONHOME
+    end
+
+    if test -n "$_OLD_FISH_PROMPT_OVERRIDE"
+        set -e _OLD_FISH_PROMPT_OVERRIDE
+        # prevents error when using nested fish instances (Issue #93858)
+        if functions -q _old_fish_prompt
+            functions -e fish_prompt
+            functions -c _old_fish_prompt fish_prompt
+            functions -e _old_fish_prompt
+        end
+    end
+
+    set -e VIRTUAL_ENV
+    set -e VIRTUAL_ENV_PROMPT
+    if test "$argv[1]" != "nondestructive"
+        # Self-destruct!
+        functions -e deactivate
+    end
+end
+
+# Unset irrelevant variables.
+deactivate nondestructive
+
+set -gx VIRTUAL_ENV "/home/abdelhak/sed/.sed-dev"
+
+set -gx _OLD_VIRTUAL_PATH $PATH
+set -gx PATH "$VIRTUAL_ENV/bin" $PATH
+
+# Unset PYTHONHOME if set.
+if set -q PYTHONHOME
+    set -gx _OLD_VIRTUAL_PYTHONHOME $PYTHONHOME
+    set -e PYTHONHOME
+end
+
+if test -z "$VIRTUAL_ENV_DISABLE_PROMPT"
+    # fish uses a function instead of an env var to generate the prompt.
+
+    # Save the current fish_prompt function as the function _old_fish_prompt.
+    functions -c fish_prompt _old_fish_prompt
+
+    # With the original prompt function renamed, we can override with our own.
+    function fish_prompt
+        # Save the return status of the last command.
+        set -l old_status $status
+
+        # Output the venv prompt; color taken from the blue of the Python logo.
+        printf "%s%s%s" (set_color 4B8BBE) "(.sed-dev) " (set_color normal)
+
+        # Restore the return status of the previous command.
+        echo "exit $old_status" | .
+        # Output the original/"old" prompt.
+        _old_fish_prompt
+    end
+
+    set -gx _OLD_FISH_PROMPT_OVERRIDE "$VIRTUAL_ENV"
+    set -gx VIRTUAL_ENV_PROMPT "(.sed-dev) "
+end
diff --git a/.sed-dev/bin/python b/.sed-dev/bin/python
new file mode 120000
index 00000000..cccf4709
--- /dev/null
+++ b/.sed-dev/bin/python
@@ -0,0 +1 @@
+/software/mamba/2024.01/bin/python
\ No newline at end of file
diff --git a/.sed-dev/bin/python3 b/.sed-dev/bin/python3
new file mode 120000
index 00000000..d8654aa0
--- /dev/null
+++ b/.sed-dev/bin/python3
@@ -0,0 +1 @@
+python
\ No newline at end of file
diff --git a/.sed-dev/bin/python3.11 b/.sed-dev/bin/python3.11
new file mode 120000
index 00000000..d8654aa0
--- /dev/null
+++ b/.sed-dev/bin/python3.11
@@ -0,0 +1 @@
+python
\ No newline at end of file
diff --git a/.sed-dev/lib64 b/.sed-dev/lib64
new file mode 120000
index 00000000..7951405f
--- /dev/null
+++ b/.sed-dev/lib64
@@ -0,0 +1 @@
+lib
\ No newline at end of file
diff --git a/.sed-dev/pyvenv.cfg b/.sed-dev/pyvenv.cfg
new file mode 100644
index 00000000..685910b6
--- /dev/null
+++ b/.sed-dev/pyvenv.cfg
@@ -0,0 +1,5 @@
+home = /software/mamba/2024.01/bin
+include-system-site-packages = false
+version = 3.11.7
+executable = /software/mamba/2024.01/bin/python3.11
+command = /software/mamba/2024.01/bin/python -m venv /home/abdelhak/sed/.sed-dev
diff --git a/src/sed/config/flash_example_config.yaml b/src/sed/config/flash_example_config.yaml
index 9fa598c1..21abe6b9 100644
--- a/src/sed/config/flash_example_config.yaml
+++ b/src/sed/config/flash_example_config.yaml
@@ -10,8 +10,6 @@ core:
   beamtime_id: 11019101
   # the year of the beamtime
   year: 2023
-  # the instrument used
-  instrument: hextof # hextof, wespe, etc
   # The paths to the raw and parquet data directories. If these are not
   # provided, the loader will try to find the data based on year beamtimeID etc
   # paths:
@@ -32,6 +30,7 @@ core:
   # (Not to be changed by user)
   beamtime_dir:
     pg2: "/asap3/flash/gpfs/pg2/"
+    cfel: "/asap3/fs-flash-o/gpfs/hextof/"
 
 binning:
   # Histogram computation mode to use.
@@ -60,6 +59,11 @@ dataframe:
   # Columns used for jitter correction
   jitter_cols: [dldPosX, dldPosY, dldTimeSteps]
 
+  # The index and formats of the data
+  index: [trainId, pulseId, electronId]
+  formats: [per_train, per_pulse, per_electron]
+  fill_formats: [per_train, per_pulse]            # Channels with this format will be forward filled
+
   # Column settings
   columns:
     x: dldPosX
@@ -212,8 +216,7 @@ dataframe:
 
 # metadata collection from scicat
 # metadata:
-#   scicat_url: <URL>
-#   scicat_token: <TOKEN>
+#   archiver_url: <URL>
 
 # The nexus collection routine shall be finalized soon for both instruments
 nexus:
diff --git a/src/sed/config/lab_example_config.yaml b/src/sed/config/lab_example_config.yaml
new file mode 100644
index 00000000..42d591e9
--- /dev/null
+++ b/src/sed/config/lab_example_config.yaml
@@ -0,0 +1,161 @@
+# This file contains the default configuration for the flash loader.
+
+core:
+  # defines the loader
+  loader: cfel
+  # Since this will run on maxwell most probably, we have a lot of cores at our disposal
+  num_cores: 10
+  # the ID number of the beamtime
+  beamtime_id: 11021732
+  # the year of the beamtime
+  year: 2025
+
+  # The paths to the raw and parquet data directories. If these are not
+  # provided, the loader will try to find the data based on year beamtimeID etc
+  paths:
+    # location of the raw data.
+    raw: "/asap3/fs-flash-o/gpfs/hextof/2025/data/11021732/raw/"
+    # location of the intermediate parquet files.
+    processed: "."
+
+  # The beamtime directories for different DAQ systems.
+  # (Not to be changed by user)
+  beamtime_dir:
+    pg2: "/asap3/flash/gpfs/pg2/"
+    cfel: "/asap3/fs-flash-o/gpfs/hextof/"
+
+
+dataframe:
+  daq: fl1user3                                   # DAQ system name to resolve filenames/paths
+  ubid_offset: 5                                  # Offset correction to the pulseId
+  forward_fill_iterations: 0                      # Number of iterations to fill the pulseId forward
+  split_sector_id_from_dld_time: True             # Remove reserved bits for dldSectorID from dldTimeSteps column
+  sector_id_reserved_bits: 3                      # Bits reserved for dldSectorID in the dldTimeSteps column
+  sector_delays: [0., 0., 0., 0., 0., 0., 0., 0.] # Sector delays
+
+  first_event_time_stamp_key: /ScanParam/StartTime
+  ms_markers_key: /SlowData/exposure_time
+  millis_counter_key: /DLD/millisecCounter
+
+  # Time and binning settings
+  tof_binwidth: 2.0576131995767355E-11            # Base time-of-flight bin width in seconds
+  tof_binning: 8                                  # Binning parameter for time-of-flight data
+
+  # Columns used for jitter correction
+  index: [countId]
+  jitter_cols: [dldPosX, dldPosY, dldTimeSteps]
+  formats: [per_file, per_train, per_electron]
+  fill_formats: [per_train]            # Channels with this format will be forward filled
+
+  # Column settings
+  columns:
+    x: dldPosX
+    corrected_x: X
+    kx: kx
+    y: dldPosY
+    corrected_y: Y
+    ky: ky
+    tof: dldTimeSteps
+    tof_ns: dldTime
+    corrected_tof: tm
+    timestamp: timeStamp
+    auxiliary: dldAux
+    sector_id: dldSectorID
+    delay: delayStage
+    corrected_delay: pumpProbeTime
+
+  units:
+    # These are the units of the columns
+    dldPosX: 'step'
+    dldPosY: 'step'
+    dldTimeSteps: 'step'
+    tof_voltage: 'V'
+    extractorVoltage: 'V'
+    extractorCurrent: 'A'
+    cryoTemperature: 'K'
+    sampleTemperature: 'K'
+    dldTime: 'ns'
+    delay: 'ps'
+    timeStamp: 's'
+    energy: 'eV'
+    E: 'eV'
+    kx: '1/A'
+    ky: '1/A'
+
+  # The channels to load.
+  # channels have the following structure:
+  # <channelAlias>:
+  #   format: per_pulse/per_electron/per_train
+  #   index_key: the hdf5 index key
+  #   dataset_key: the hdf5 dataset key
+  #   slice: int to slice a multidimensional data along axis=1. If not defined, there is no slicing
+  #   dtype: the datatype of the data
+  #   subChannels: further aliases for if the data is multidimensional and needs to be split in different cols
+  #                used currently for the auxiliary channel
+  #      <subChannelAlias>:
+  #        slice: int to slice a multidimensional data along axis=1. Must be defined
+  #        dtype: the datatype of the data
+
+  channels:
+    # event key
+    countId:
+      format: per_file
+      dataset_key: /DLD/NumOfEvents
+    # detector x position
+    dldPosX:
+      format: per_electron
+      dataset_key: /DLD/DLD/xPos
+      # dtype: uint32
+
+    # detector y position
+    dldPosY:
+      format: per_electron
+      dataset_key: /DLD/DLD/yPos
+      # dtype: uint32
+
+    # Detector time-of-flight channel
+    # if split_sector_id_from_dld_time is set to True, This this will generate
+    # also the dldSectorID channel
+    dldTimeSteps:
+      format: per_electron
+      dataset_key: /DLD/DLD/times
+      # dtype: uint32
+
+    # The auxiliary channel has a special structure where the group further contains
+    # a multidimensional structure so further aliases are defined below
+    dldAux:
+      format: per_train
+      dataset_key: "/SlowData/hextof/dld/info/Aux"
+      sub_channels:
+        sampleBias:
+          slice: 0
+          dtype: float32
+        tofVoltage:
+          slice: 1
+          dtype: float64
+        extractorVoltage:
+          slice: 2
+        extractorCurrent:
+          slice: 3
+        cryoTemperature:
+          slice: 4
+        sampleTemperature:
+          slice: 5
+        dldTimeBinSize:
+          slice: 15
+
+    vuRead:
+      format: per_train
+      dataset_key: /SlowData/hextof/logic/kmic1/Sample_VURead
+
+
+
+# metadata collection from scicat
+# metadata:
+#   archiver_url: <URL>
+
+# The nexus collection routine shall be finalized soon for both instruments
+# nexus:
+#   reader: "mpes"
+#   definition: "NXmpes"
+#   input_files: ["NXmpes_config-HEXTOF.json"]
diff --git a/src/sed/core/config.py b/src/sed/core/config.py
index d9c7b551..ae6b3ca7 100644
--- a/src/sed/core/config.py
+++ b/src/sed/core/config.py
@@ -18,7 +18,8 @@
 
 package_dir = os.path.dirname(find_spec("sed").origin)
 
-USER_CONFIG_PATH = user_config_path(appname="sed", appauthor="OpenCOMPES", ensure_exists=True)
+USER_CONFIG_PATH = user_config_path(appname="sed", appauthor="OpenCOMPES")
+USER_CONFIG_PATH.mkdir(parents=True, exist_ok=True)
 SYSTEM_CONFIG_PATH = (
     Path(os.environ["ALLUSERSPROFILE"]).joinpath("sed")
     if platform.system() == "Windows"
diff --git a/src/sed/core/config_model.py b/src/sed/core/config_model.py
index bca9f959..738617f9 100644
--- a/src/sed/core/config_model.py
+++ b/src/sed/core/config_model.py
@@ -26,6 +26,7 @@ class PathsModel(BaseModel):
 
     raw: DirectoryPath
     processed: Optional[Union[DirectoryPath, NewPath]] = None
+    meta: Optional[Union[DirectoryPath, NewPath]] = None
 
 
 class CopyToolModel(BaseModel):
@@ -58,7 +59,6 @@ class CoreModel(BaseModel):
     num_cores: Optional[PositiveInt] = None
     year: Optional[int] = None
     beamtime_id: Optional[Union[int, str]] = None
-    instrument: Optional[str] = None
     beamline: Optional[str] = None
     copy_tool: Optional[CopyToolModel] = None
     stream_name_prefixes: Optional[dict] = None
@@ -134,6 +134,8 @@ class DataframeModel(BaseModel):
     # mpes specific settings
     first_event_time_stamp_key: Optional[str] = None
     ms_markers_key: Optional[str] = None
+    # cfel specific settings
+    millis_counter_key: Optional[str] = None
     # flash specific settings
     forward_fill_iterations: Optional[int] = None
     ubid_offset: Optional[int] = None
@@ -141,6 +143,9 @@ class DataframeModel(BaseModel):
     sector_id_reserved_bits: Optional[int] = None
     sector_delays: Optional[Sequence[float]] = None
     daq: Optional[str] = None
+    index: Optional[Sequence[str]] = None
+    formats: Optional[Union[Sequence[str], str]] = None
+    fill_formats: Optional[Union[Sequence[str], str]] = None
     # SXP specific settings
     num_trains: Optional[PositiveInt] = None
     num_pulses: Optional[PositiveInt] = None
diff --git a/src/sed/loader/cfel/__init__.py b/src/sed/loader/cfel/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/sed/loader/cfel/buffer_handler.py b/src/sed/loader/cfel/buffer_handler.py
new file mode 100644
index 00000000..47b47004
--- /dev/null
+++ b/src/sed/loader/cfel/buffer_handler.py
@@ -0,0 +1,340 @@
+from __future__ import annotations
+
+import time
+from pathlib import Path
+
+import h5py
+import numpy as np
+import dask.dataframe as dd
+from joblib import delayed
+from joblib import Parallel
+
+from sed.core.logging import setup_logging
+from sed.loader.cfel.dataframe import DataFrameCreator
+from sed.loader.flash.buffer_handler import BufferFilePaths
+from sed.loader.flash.buffer_handler import BufferHandler as BaseBufferHandler
+from sed.loader.flash.utils import InvalidFileError
+from sed.loader.flash.utils import get_channels
+from sed.loader.flash.utils import get_dtypes
+
+logger = setup_logging("cfel_buffer_handler")
+
+
+class BufferHandler(BaseBufferHandler):
+    """
+    A class for handling the creation and manipulation of buffer files using DataFrameCreator.
+    """
+
+    def __init__(
+        self,
+        config: dict,
+    ) -> None:
+        """
+        Initializes the BufferHandler.
+
+        Args:
+            config (dict): The configuration dictionary.
+        """
+        super().__init__(config)
+
+    def _validate_h5_files(self, config, h5_paths: list[Path]) -> list[Path]:
+        valid_h5_paths = []
+        for h5_path in h5_paths:
+            try:
+                dfc = DataFrameCreator(config_dataframe=config, h5_path=h5_path)
+                dfc.validate_channel_keys()
+                valid_h5_paths.append(h5_path)
+            except InvalidFileError as e:
+                logger.info(f"Skipping invalid file: {h5_path.stem}\n{e}")
+
+        return valid_h5_paths    
+
+    # def _save_buffer_files(self, force_recreate: bool, debug: bool) -> None:
+    #     """
+    #     Creates the buffer files that are missing, handling multi-file runs properly.
+
+    #     Args:
+    #         force_recreate (bool): Flag to force recreation of buffer files.
+    #         debug (bool): Flag to enable debug mode, which serializes the creation.
+    #     """
+    #     file_sets = self.fp.file_sets_to_process(force_recreate)
+    #     logger.info(f"Reading files: {len(file_sets)} new files of {len(self.fp)} total.")
+        
+    #     if len(file_sets) == 0:
+    #         return
+            
+    #     # Sort file sets by filename to ensure proper order
+    #     file_sets = sorted(file_sets, key=lambda x: x['raw'].name)
+        
+    #     # Get base timestamp from the first file if we have multiple files
+    #     base_timestamp = None
+    #     if len(file_sets) > 1:
+    #         try:
+    #             # Find the first file (ends with _0000)
+    #             first_file_set = None
+    #             for file_set in file_sets:
+    #                 if file_set['raw'].stem.endswith('_0000'):
+    #                     first_file_set = file_set
+    #                     break
+                
+    #             if first_file_set:
+    #                 # Create a temporary DataFrameCreator to extract base timestamp
+    #                 first_dfc = DataFrameCreator(
+    #                     config_dataframe=self._config, 
+    #                     h5_path=first_file_set['raw'],
+    #                     is_first_file=True
+    #                 )
+    #                 base_timestamp = first_dfc.get_base_timestamp()
+    #                 first_dfc.h5_file.close()  # Clean up
+    #                 logger.info(f"Multi-file run detected. Base timestamp: {base_timestamp}")
+    #         except Exception as e:
+    #             logger.warning(f"Could not extract base timestamp: {e}. Processing files independently.")
+    #             base_timestamp = None
+        
+    #     n_cores = min(len(file_sets), self.n_cores)
+    #     if n_cores > 0:
+    #         if debug:
+    #             for file_set in file_sets:
+    #                 is_first_file = file_set['raw'].stem.endswith('_0000')
+    #                 self._save_buffer_file(file_set, is_first_file, base_timestamp)
+    #         else:
+    #             # For parallel processing, we need to be careful about the order
+    #             # Process all files in parallel with the correct parameters
+    #             from joblib import delayed, Parallel
+                
+    #             Parallel(n_jobs=n_cores, verbose=10)(
+    #                 delayed(self._save_buffer_file)(
+    #                     file_set, 
+    #                     file_set['raw'].stem.endswith('_0000'),
+    #                     base_timestamp
+    #                 ) 
+    #                 for file_set in file_sets
+    #             )
+    def _save_buffer_files(self, force_recreate: bool, debug: bool) -> None:
+        """
+        Creates the buffer files that are missing, handling multi-file and single-file runs properly.
+
+        Args:
+            force_recreate (bool): Flag to force recreation of buffer files.
+            debug (bool): Flag to enable debug mode, which serializes the creation.
+        """
+        file_sets = self.fp.file_sets_to_process(force_recreate)
+        logger.info(f"Reading files: {len(file_sets)} new files of {len(self.fp)} total.")
+    
+        if not file_sets:
+            return
+    
+        # Sort file sets by filename to ensure deterministic order
+        file_sets = sorted(file_sets, key=lambda x: x["raw"].name)
+        
+        base_timestamp = None
+        
+        try:
+            if len(file_sets) == 1:
+                # Single-file run → that file IS the first file
+                first_file_set = file_sets[0]
+                logger.info(
+                    f"Single-file run detected: {first_file_set['raw'].name}. "
+                    "Extracting base timestamp from this file."
+                )
+        
+            else:
+                # Multi-file run → look for _0000
+                first_file_set = next(
+                    fs for fs in file_sets
+                    if fs["raw"].stem.endswith("_0000")
+                )
+                logger.info(
+                    f"Multi-file run detected. "
+                    f"Extracting base timestamp from {first_file_set['raw'].name}"
+                )
+                
+            # Create a temporary DataFrameCreator to extract base timestamp
+            first_dfc = DataFrameCreator(
+                config_dataframe=self._config,
+                h5_path=first_file_set["raw"],
+                is_first_file=True,
+            )
+            base_timestamp = first_dfc.get_base_timestamp()
+            first_dfc.h5_file.close()
+        
+            logger.info(f"Base timestamp extracted: {base_timestamp}")
+        
+        except StopIteration:
+            logger.warning(
+                "Multi-file run detected but no '_0000' file found. "
+                "Base timestamp will not be extracted."
+            )
+        except Exception as e:
+            logger.warning(
+                f"Could not extract base timestamp: {e}. "
+                "Processing files independently."
+            )
+
+        # -------------------------------------------------------
+        # Calculate index offsets
+        # We need to read the 'index' channel (usually countId/NumOfEvents) to know the count.
+        # This requires a quick scan of files.
+        # -------------------------------------------------------
+        index_offsets = {}
+        current_offset = 0
+        
+        index_alias = self._config.get("index", ["countId"])[0]
+        try:
+            channel_config = self._config["channels"][index_alias]
+            dataset_key = channel_config["dataset_key"]
+            
+            # Prefer serial scan for safety and simplicity, though could be parallelized
+            # For 200 files it might take a few seconds.
+            logger.info("Calculating index offsets...")
+            for file_set in file_sets:
+                try:
+                    with h5py.File(file_set["raw"], "r") as h5_file:
+                        if dataset_key in h5_file:
+                            
+                            dset = h5_file[dataset_key]
+                            # sum of all events in this file
+                            # Use simple read if small enough
+                            n_events = np.sum(dset)
+                            
+                            index_offsets[file_set["raw"].name] = int(current_offset)
+                            current_offset += int(n_events)
+                        else:
+                             index_offsets[file_set["raw"].name] = int(current_offset)
+                except Exception as e:
+                    logger.warning(f"Failed to read index offset from {file_set['raw'].name}: {e}")
+                    index_offsets[file_set["raw"].name] = int(current_offset)
+            
+            logger.debug(f"Total events calculated: {current_offset}")
+
+        except Exception as e:
+            logger.warning(f"Failed to calculate index offsets: {e}. Indices may reset.")
+            for fs in file_sets:
+                index_offsets[fs["raw"].name] = 0
+
+        # -------------------------------------------------------
+    
+        n_cores = min(len(file_sets), self.n_cores)
+        if n_cores <= 0:
+            return
+    
+        def is_first_file(file_set) -> bool:
+            return (
+                len(file_sets) == 1
+                or file_set["raw"].stem.endswith("_0000")
+            )
+    
+        if debug:
+            for file_set in file_sets:
+                self._save_buffer_file(
+                    file_set,
+                    is_first_file(file_set),
+                    base_timestamp,
+                    index_offset=index_offsets.get(file_set["raw"].name, 0),
+                )
+        else:
+            # For parallel processing, we need to be careful about the order
+            # Process all files in parallel with the correct parameters
+            from joblib import Parallel, delayed
+    
+            Parallel(n_jobs=n_cores, verbose=10)(
+                delayed(self._save_buffer_file)(
+                    file_set,
+                    is_first_file(file_set),
+                    base_timestamp,
+                    index_offset=index_offsets.get(file_set["raw"].name, 0),
+                )
+                for file_set in file_sets
+            )
+
+    def _save_buffer_file(self, file_set, is_first_file=True, base_timestamp=None, index_offset=0):
+        """
+        Saves an HDF5 file to a Parquet file using the DataFrameCreator class.
+        
+        Args:
+            file_set: Dictionary containing file paths
+            is_first_file: Whether this is the first file in a multi-file run
+            base_timestamp: Base timestamp from the first file (for subsequent files)
+            index_offset: Offset to apply to the index
+        """
+        start_time = time.time()  # Add this line
+        paths = file_set
+        
+        dfc = DataFrameCreator(
+            config_dataframe=self._config, 
+            h5_path=paths["raw"],
+            is_first_file=is_first_file,
+            base_timestamp=base_timestamp,
+            index_offset=index_offset
+        )
+        df = dfc.df
+
+        df_timed = dfc.df_timed
+
+        # Save electron resolved dataframe
+        electron_channels = get_channels(self._config, "per_electron")
+        dtypes = get_dtypes(self._config, df.columns.values)
+        electron_df = df.dropna(subset=electron_channels).astype(dtypes).reset_index()
+        logger.debug(f"Saving electron buffer with shape: {electron_df.shape}")
+        electron_df.to_parquet(paths["electron"])
+
+        # Create and save timed dataframe
+        dtypes = get_dtypes(self._config, df_timed.columns.values)
+        timed_df = df_timed.astype(dtypes)
+        logger.debug(f"Saving timed buffer with shape: {timed_df.shape}")
+        timed_df.to_parquet(paths["timed"])
+
+        logger.debug(f"Processed {paths['raw'].stem} in {time.time() - start_time:.2f}s")
+
+    def process_and_load_dataframe(
+        self,
+        h5_paths: list[Path],
+        folder: Path,
+        force_recreate: bool = False,
+        suffix: str = "",
+        debug: bool = False,
+        remove_invalid_files: bool = False,
+        filter_timed_by_electron: bool = True,
+    ) -> tuple[dd.DataFrame, dd.DataFrame]:
+        """
+        Runs the buffer file creation process.
+        Does a schema check on the buffer files and creates them if they are missing.
+        Performs forward filling and splits the sector ID from the DLD time lazily.
+
+        Args:
+            h5_paths (List[Path]): List of paths to H5 files.
+            folder (Path): Path to the folder for processed files.
+            force_recreate (bool): Flag to force recreation of buffer files.
+            suffix (str): Suffix for buffer file names.
+            debug (bool): Flag to enable debug mode.):
+            remove_invalid_files (bool): Flag to remove invalid files.
+            filter_timed_by_electron (bool): Flag to filter timed data by valid electron events.
+
+        Returns:
+            Tuple[dd.DataFrame, dd.DataFrame]: The electron and timed dataframes.
+        """
+        self.filter_timed_by_electron = filter_timed_by_electron
+        if remove_invalid_files:
+            h5_paths = self._validate_h5_files(self._config, h5_paths)
+
+        self.fp = BufferFilePaths(h5_paths, folder, suffix)
+
+        if not force_recreate:
+            schema_set = set(
+                get_channels(self._config, formats="all", index=True, extend_aux=True)
+                + [self._config["columns"].get("timestamp")],
+            )
+            self._schema_check(self.fp["timed"], schema_set)
+
+            self._schema_check(self.fp["electron"], schema_set)
+
+        self._save_buffer_files(force_recreate, debug)
+
+        # NEW: all files were invalid and skipped
+        if remove_invalid_files and not self.fp:
+            self.df = {"electron": None, "timed": None}
+            return
+
+        self._get_dataframes()
+
+        return self.df["electron"], self.df["timed"]
diff --git a/src/sed/loader/cfel/dataframe.py b/src/sed/loader/cfel/dataframe.py
new file mode 100644
index 00000000..8c6fd560
--- /dev/null
+++ b/src/sed/loader/cfel/dataframe.py
@@ -0,0 +1,354 @@
+"""
+This module creates pandas DataFrames from HDF5 files for different levels of data granularity
+[per electron, per pulse, and per train]. It efficiently handles concatenation of data from
+various channels within the HDF5 file, making use of the structured nature data to optimize
+join operations. This approach significantly enhances performance compared to earlier.
+"""
+from __future__ import annotations
+
+from pathlib import Path
+
+import h5py
+import numpy as np
+import pandas as pd
+
+from sed.core.logging import setup_logging
+from sed.loader.flash.utils import get_channels
+from sed.loader.flash.utils import InvalidFileError
+
+logger = setup_logging("cfel_dataframe_creator")
+
+
+class DataFrameCreator:
+    """
+    A class for creating pandas DataFrames from an HDF5 file for HEXTOF lab data at CFEL.
+
+    Attributes:
+        h5_file (h5py.File): The HDF5 file object.
+        multi_index (pd.MultiIndex): The multi-index structure for the DataFrame.
+        _config (dict): The configuration dictionary for the DataFrame.
+    """
+
+    def __init__(self, config_dataframe: dict, h5_path: Path, 
+                 is_first_file: bool = True, base_timestamp: pd.Timestamp = None,
+                 index_offset: int = 0) -> None:
+        """
+        Initializes the DataFrameCreator class.
+
+        Args:
+            config_dataframe (dict): The configuration dictionary with only the dataframe key.
+            h5_path (Path): Path to the h5 file.
+            is_first_file (bool): Whether this is the first file in a multi-file run.
+            base_timestamp (pd.Timestamp): Base timestamp from the first file (for subsequent files).
+            index_offset (int): Offset to apply to the index (countId) for multi-file runs.
+        """
+        self.h5_file = h5py.File(h5_path, "r")
+        self._config = config_dataframe
+        self.is_first_file = is_first_file
+        self.base_timestamp = base_timestamp
+        self.index_offset = index_offset
+
+        index_alias = self._config.get("index", ["countId"])[0]
+        
+        # get cumulative counts, but drop last because slow data only covers N-1 intervals
+        # Add index_offset
+        self.index = np.cumsum([0, *self.get_dataset_array(index_alias)])[:-1] + index_offset
+
+
+    def get_dataset_key(self, channel: str) -> str:
+        """
+        Checks if 'dataset_key' exists and returns that.
+
+        Args:
+            channel (str): The name of the channel.
+
+        Returns:
+            str: The 'dataset_key'.
+
+        Raises:
+            ValueError: If 'dataset_key' is not provided.
+        """
+        channel_config = self._config["channels"][channel]
+        if "dataset_key" in channel_config:
+            return channel_config["dataset_key"]
+        error = f"For channel: {channel}, provide 'dataset_key'."
+        raise ValueError(error)
+
+    def get_dataset_array(
+        self,
+        channel: str,
+    ) -> h5py.Dataset:
+        """
+        Returns a numpy array for a given channel name.
+
+        Args:
+            channel (str): The name of the channel.
+            slice_ (bool): Applies slicing on the dataset. Default is True.
+
+        Returns:
+            h5py.Dataset: The channel's data as a h5py.Dataset object.
+        """
+        # Get the data from the necessary h5 file and channel
+        dataset_key = self.get_dataset_key(channel)
+        dataset = self.h5_file[dataset_key]
+
+        return dataset
+
+    def get_base_timestamp(self) -> pd.Timestamp:
+        """
+        Extracts the base timestamp from the first file to be used for subsequent files.
+        
+        Returns:
+            pd.Timestamp: The base timestamp from the first file.
+        """
+        if not self.is_first_file:
+            raise ValueError("get_base_timestamp() should only be called on the first file")
+        
+        first_timestamp = self.h5_file[self._config.get("first_event_time_stamp_key")][0]
+        return pd.to_datetime(first_timestamp.decode())
+
+    @property
+    def df_electron(self) -> pd.DataFrame:
+        """
+        Returns a pandas DataFrame for channel names of type [per electron].
+
+        Returns:
+            pd.DataFrame: The pandas DataFrame for the 'per_electron' channel's data.
+        """
+        # Get the relevant channels and their slice index
+        channels = get_channels(self._config, "per_electron")
+        if channels == []:
+            return pd.DataFrame()
+
+        series = {
+            channel: pd.Series(
+                self.get_dataset_array(channel),
+                index=pd.RangeIndex(
+                    self.index_offset,
+                    self.index_offset + len(self.get_dataset_array(channel)),
+                ),
+            )
+            for channel in channels
+        }
+        dataframe = pd.concat(series, axis=1)
+        return dataframe.dropna()
+
+    @property
+    def df_train(self) -> pd.DataFrame:
+        """
+        Returns a pandas DataFrame for given channel names of type [per train].
+
+        Returns:
+            pd.DataFrame: The pandas DataFrame for the 'per_train' channel's data.
+        """
+        series = []
+        # Get the relevant channel names
+        channels = get_channels(self._config, "per_train")
+        # auxiliary dataset (which is stored in the same dataset as other DLD channels)
+        aux_alias = self._config.get("aux_alias", "dldAux")
+
+        # For each channel, a pd.Series is created and appended to the list
+        for channel in channels:
+            dataset = self.get_dataset_array(channel)
+
+            if channel == aux_alias:
+                try:
+                    sub_channels = self._config["channels"][aux_alias]["sub_channels"]
+                except KeyError:
+                    raise KeyError(
+                        f"Provide 'sub_channels' for auxiliary channel '{aux_alias}'.",
+                    )
+                for name, values in sub_channels.items():
+                    series.append(
+                        pd.Series(
+                            dataset[:, values["slice"]],
+                            self.index,# changed together with __init__ line 52
+                            # works together with  __init__ line 50, but has different len of TimeStamps and Index
+                            # self.index[:-1],
+                            name=name,
+                        ),
+                    )
+            else:
+                series.append(pd.Series(dataset, self.index, name=channel))# changed together with __init__ line 52
+                # works together with  __init__ line 50, but has different len of TimeStamps and Index
+                # series.append(pd.Series(dataset, self.index[:-1], name=channel))
+        # All the channels are concatenated to a single DataFrame
+        return pd.concat(series, axis=1)
+
+    @property
+    def df_timestamp(self) -> pd.DataFrame:
+        """
+        Generates a DataFrame of timestamps for each acquisition point.
+    
+        - Uses `first_event_time_stamp_key` from the first file as the global StartTime.
+        - Uses `millisecCounter` (if available) as a monotonic global time across all files.
+        - If `millisecCounter` is not available, uses cumulative exposure times from `ms_markers_key` 
+          to approximate acquisition times.
+        - Returns timestamps as seconds since the UNIX epoch (1970-01-01).
+    
+        Returns
+        -------
+        pd.DataFrame
+            DataFrame with a single column containing the computed timestamps.
+        """
+        # ------------------------------------------------------------
+        # 1) Establish global StartTime (absolute origin)
+        # ------------------------------------------------------------
+        start_time_key = self._config.get("first_event_time_stamp_key")#"/ScanParam/StartTime"
+    
+        if self.is_first_file:
+            if start_time_key not in self.h5_file:
+                raise KeyError("StartTime not found in first file")
+    
+            start_time_raw = self.h5_file[start_time_key][0]
+            base_timestamp = pd.to_datetime(start_time_raw.decode())
+            logger.warning(f"DEBUG: Taking first file with ScanStart as a timestamp: {base_timestamp}")
+    
+            # Persist base timestamp for subsequent files
+            self.base_timestamp = base_timestamp
+        else:
+            if self.base_timestamp is None:
+                raise RuntimeError("base_timestamp not initialized (first file missing?)")
+            base_timestamp = self.base_timestamp
+    
+        # ------------------------------------------------------------
+        # 2) Determine timing offsets
+        # ------------------------------------------------------------
+        millis_key = self._config.get("millis_counter_key", "/DLD/millisecCounter")
+        exposure_key = self._config.get("ms_markers_key")
+    
+        if millis_key in self.h5_file and len(self.h5_file[millis_key]) > 0:
+            # Preferred: global millisecond counter
+            offsets = pd.to_timedelta(
+                np.asarray(self.h5_file[millis_key], dtype=np.float64),
+                unit="ms",
+            )
+            logger.warning(f"DEBUG: MillisecCounter available, offsets: {offsets}")
+    
+        elif exposure_key in self.h5_file:
+            # Fallback: cumulative exposure time (seconds)
+            exposure = np.asarray(self.h5_file[exposure_key], dtype=np.float64)
+            offsets = pd.to_timedelta(np.cumsum(exposure), unit="s")
+            logger.warning(f"DEBUG: Using cumulative exposure, offsets: {offsets}")
+    
+        else:
+            raise ValueError(
+                "Cannot construct timestamps: neither millisecCounter nor exposure times available"
+            )
+    
+        # ------------------------------------------------------------
+        # 3) Construct absolute timestamps
+        # ------------------------------------------------------------
+        timestamps = base_timestamp + offsets
+    
+        # Convert to UNIX seconds (float)
+        unix_seconds = (timestamps - pd.Timestamp("1970-01-01")) / pd.Timedelta("1s")
+    
+        # ------------------------------------------------------------
+        # 4) Build DataFrame
+        # ------------------------------------------------------------
+        ts_alias = self._config["columns"].get("timestamp", "timeStamp")
+        df = pd.DataFrame({ts_alias: unix_seconds}, index=self.index)
+
+        # # # Suppose df is your timestamp DataFrame
+        # print("DEBUG of df")
+        # ts_alias = "timeStamp"  # or whatever your config uses
+        # timestamps = df[ts_alias].to_numpy()
+        
+        # # Compare lengths
+        # if len(timestamps) != len(df.index):
+        #     print(f"Length mismatch: timestamps={len(timestamps)}, index={len(df.index)}")
+        
+        # # Detect NaNs (if any were introduced)
+        # nan_rows = df[df[ts_alias].isna()]
+        # print("Rows with NaN timestamps (if any):")
+        # print(nan_rows)
+        
+        # # Detect where timestamp differences are huge (likely artificial or missing)
+        # dt = np.diff(timestamps)
+        # threshold = np.median(dt) * 10  # e.g., 10× median interval
+        # anomalous_indices = np.where(dt > threshold)[0]
+        # print("Indices where timestamp jump is unusually large:")
+        # print(anomalous_indices)
+        
+        # # Optionally, see these rows in the DataFrame
+        # print(df.iloc[anomalous_indices])
+
+        return df
+     
+    # def validate_channel_keys(self) -> None:
+    #     """
+    #     Validates if the dataset keys for all channels in the config exist in the h5 file.
+
+    #     Raises:
+    #         InvalidFileError: If the dataset keys are missing in the h5 file.
+    #     """
+    #     invalid_channels = []
+    #     for channel in self._config["channels"]:
+    #         dataset_key = self.get_dataset_key(channel)
+    #         if dataset_key not in self.h5_file:
+    #             invalid_channels.append(channel)
+
+    #     if invalid_channels:
+    #         raise InvalidFileError(invalid_channels)
+    def validate_channel_keys(self) -> None:
+        """
+        Validates if the dataset keys for all channels in the config exist in the h5 file.
+
+        Raises:
+            InvalidFileError: If the dataset keys are missing in the h5 file.
+        """
+        invalid_channels = []
+    
+        for channel in self._config["channels"]:
+            dataset_key = self.get_dataset_key(channel)
+    
+            # missing key
+            if dataset_key not in self.h5_file:
+                invalid_channels.append(channel)
+                continue
+    
+            # empty dataset
+            dataset = self.h5_file[dataset_key]
+            if len(dataset) == 0:
+                invalid_channels.append(channel)
+    
+        if invalid_channels:
+            raise InvalidFileError(invalid_channels)
+
+
+    @property
+    def df(self) -> pd.DataFrame:
+        """
+        Joins the 'per_electron', 'per_pulse' using concat operation,
+        returning a single dataframe.
+
+        Returns:
+            pd.DataFrame: The combined pandas DataFrame.
+        """
+
+        self.validate_channel_keys()
+        df_train = self.df_train
+        df_timestamp = self.df_timestamp
+        df = pd.concat((self.df_electron, df_train, df_timestamp), axis=1)
+        ffill_cols = list(df_train.columns) + list(df_timestamp.columns)
+        df[ffill_cols] = df[ffill_cols].ffill()
+        df.index.name = self._config.get("index", ["countId"])[0]
+        return df
+
+    @property
+    def df_timed(self) -> pd.DataFrame:
+        """
+        Joins the 'per_electron', 'per_pulse' using concat operation,
+        returning a single dataframe.
+
+        Returns:
+            pd.DataFrame: The combined pandas DataFrame.
+        """
+
+        self.validate_channel_keys()
+        df_train = self.df_train
+        df_timestamp = self.df_timestamp
+        df = pd.concat((self.df_electron, df_train, df_timestamp), axis=1, join="inner")
+        df.index.name = self._config.get("index", ["countId"])[0]
+        return df
diff --git a/src/sed/loader/cfel/loader.py b/src/sed/loader/cfel/loader.py
new file mode 100644
index 00000000..be8001c3
--- /dev/null
+++ b/src/sed/loader/cfel/loader.py
@@ -0,0 +1,855 @@
+"""
+This module implements the cfel data loader (for hextof's lab data).
+This loader currently supports hextof, wespe and instruments with similar structure.
+The raw hdf5 data is combined and saved into buffer files and loaded as a dask dataframe.
+The dataframe is an amalgamation of all h5 files for a combination of runs, where the NaNs are
+automatically forward-filled across different files.
+This can then be saved as a parquet for out-of-sed processing and reread back to access other
+sed functionality.
+"""
+from __future__ import annotations
+
+import re
+import time
+from collections.abc import Sequence
+from pathlib import Path
+
+import dask.dataframe as dd
+import h5py
+import numpy as np
+import scipy.interpolate as sint
+from natsort import natsorted
+from typing import Sequence
+
+from sed.core.logging import set_verbosity
+from sed.core.logging import setup_logging
+from sed.loader.base.loader import BaseLoader
+from sed.loader.cfel.buffer_handler import BufferHandler
+from sed.loader.flash.metadata import MetadataRetriever
+
+import pandas as pd
+
+# Configure logging
+logger = setup_logging("flash_loader")
+
+
+class CFELLoader(BaseLoader):
+    """
+    The class generates multiindexed multidimensional pandas dataframes from the new FLASH
+    dataformat resolved by both macro and microbunches alongside electrons.
+    Only the read_dataframe (inherited and implemented) method is accessed by other modules.
+
+    Args:
+        config (dict, optional): Config dictionary. Defaults to None.
+        verbose (bool, optional): Option to print out diagnostic information.
+            Defaults to True.
+    """
+
+    __name__ = "cfel"
+
+    supported_file_types = ["h5"]
+
+    def __init__(self, config: dict, verbose: bool = True) -> None:
+        """
+        Initializes the FlashLoader.
+
+        Args:
+            config (dict): Configuration dictionary.
+            verbose (bool, optional): Option to print out diagnostic information.
+        """
+        super().__init__(config=config, verbose=verbose)
+
+        set_verbosity(logger, self._verbose)
+
+        self.instrument: str = self._config["core"].get("instrument", "hextof")  # default is hextof
+        self.beamtime_dir: str = None
+        self.raw_dir: str = None
+        self.processed_dir: str = None
+        self.meta_dir: str = None
+
+    @property
+    def verbose(self) -> bool:
+        """Accessor to the verbosity flag.
+
+        Returns:
+            bool: Verbosity flag.
+        """
+        return self._verbose
+
+    @verbose.setter
+    def verbose(self, verbose: bool):
+        """Setter for the verbosity.
+
+        Args:
+            verbose (bool): Option to turn on verbose output. Sets loglevel to INFO.
+        """
+        self._verbose = verbose
+        set_verbosity(logger, self._verbose)
+
+    def __len__(self) -> int:
+        """
+        Returns the total number of rows in the electron resolved dataframe.
+
+        Returns:
+            int: Total number of rows.
+        """
+        try:
+            file_statistics = self.metadata["file_statistics"]["electron"]
+        except KeyError as exc:
+            raise KeyError("File statistics missing. Use 'read_dataframe' first.") from exc
+
+        total_rows = sum(stats["num_rows"] for stats in file_statistics.values())
+        return total_rows
+
+
+    def _initialize_dirs(self) -> None:
+        """
+        Initializes the directories on Maxwell based on configuration. If paths is provided in
+        the configuration, the raw data directory and parquet data directory are taken from there.
+        Otherwise, the beamtime_id and year are used to locate the data directories.
+        The first path that has either online- or express- prefix, or the daq name is taken as the
+        raw data directory.
+
+        Raises:
+            ValueError: If required values are missing from the configuration.
+            FileNotFoundError: If the raw data directories are not found.
+        """
+        # Parses to locate the raw beamtime directory from config file
+        # Only raw_dir is necessary, processed_dir can be based on raw_dir, if not provided
+        if "paths" in self._config["core"]:
+            raw_dir = Path(self._config["core"]["paths"].get("raw", ""))
+            print(raw_dir)
+            processed_dir = Path(
+                self._config["core"]["paths"].get("processed", raw_dir.joinpath("processed")),
+            )
+            meta_dir = Path(
+                self._config["core"]["paths"].get("meta", raw_dir.joinpath("meta")),
+            )
+            beamtime_dir = Path(raw_dir).parent
+
+        else:
+            try:
+                beamtime_id = self._config["core"]["beamtime_id"]
+                year = self._config["core"]["year"]
+
+            except KeyError as exc:
+                raise ValueError(
+                    "The beamtime_id and year are required.",
+                ) from exc
+
+            beamtime_dir = Path(
+                self._config["core"]["beamtime_dir"][self._config["core"]["beamline"]],
+            )
+            beamtime_dir = beamtime_dir.joinpath(f"{year}/data/{beamtime_id}/")
+
+            # Use pathlib walk to reach the raw data directory
+            raw_paths: list[Path] = []
+
+            for path in beamtime_dir.joinpath("raw").glob("**/*"):
+                if path.is_dir():
+                    dir_name = path.name
+                    if dir_name.startswith(("online-", "express-")):
+                        raw_paths.append(path.joinpath(self._config["dataframe"]["daq"]))
+                    elif dir_name == self._config["dataframe"]["daq"].upper():
+                        raw_paths.append(path)
+
+            if not raw_paths:
+                raise FileNotFoundError("Raw data directories not found.")
+
+            raw_dir = raw_paths[0].resolve()
+
+            processed_dir = beamtime_dir.joinpath("processed")
+            meta_dir = beamtime_dir.joinpath("meta/fabtrack/")  # cspell:ignore fabtrack
+
+        processed_dir.mkdir(parents=True, exist_ok=True)
+
+        self.beamtime_dir = str(beamtime_dir)
+        self.raw_dir = str(raw_dir)
+        self.processed_dir = str(processed_dir)
+        self.meta_dir = str(meta_dir)
+
+    def _file_index(path: Path) -> int:
+        """
+        Extract file index from filename.
+        Returns 0 for single-file runs.
+        """
+        stem = path.stem  # no extension
+        parts = stem.rsplit("_", 1)
+    
+        if len(parts) == 2 and parts[1].isdigit():
+            return int(parts[1])
+    
+        return 0
+
+    @property
+    def available_runs(self) -> list[int]:
+        # Get all files in raw_dir with "run" in their names
+        files = list(Path(self.raw_dir).glob("*run*"))
+
+        # Extract run IDs from filenames
+        run_ids = set()
+        for file in files:
+            match = re.search(r"run(\d+)", file.name)
+            if match:
+                run_ids.add(int(match.group(1)))
+
+        # Return run IDs in sorted order
+        return sorted(list(run_ids))
+
+    # def get_files_from_run_id(  # type: ignore[override]
+    #     self,
+    #     run_id: str | int,
+    #     folders: str | Sequence[str] = None,
+    #     extension: str = "h5",
+    # ) -> list[str]:
+    #     """
+    #     Returns a list of filenames for a given run located in the specified directory
+    #     for the specified data acquisition (daq).
+
+    #     Args:
+    #         run_id (str | int): The run identifier to locate.
+    #         folders (str | Sequence[str], optional): The directory(ies) where the raw
+    #             data is located. Defaults to config["core"]["base_folder"].
+    #         extension (str, optional): The file extension. Defaults to "h5".
+
+    #     Returns:
+    #         list[str]: A list of path strings representing the collected file names.
+
+    #     Raises:
+    #         FileNotFoundError: If no files are found for the given run in the directory.
+    #     """
+    #     # Define the stream name prefixes based on the data acquisition identifier
+    #     stream_name_prefixes = self._config["core"].get("stream_name_prefixes")
+
+    #     if folders is None:
+    #         folders = self._config["core"]["base_folder"]
+
+    #     if isinstance(folders, str):
+    #         folders = [folders]
+
+    #     daq = self._config["dataframe"]["daq"]
+
+    #     # Generate the file patterns to search for in the directory
+    #     if stream_name_prefixes:
+    #         file_pattern = f"{stream_name_prefixes[daq]}_run{run_id}_*." + extension
+    #     else:
+    #         file_pattern = f"*{run_id}*." + extension
+
+    #     files: list[Path] = []
+    #     # Use pathlib to search for matching files in each directory
+    #     for folder in folders:
+    #         files.extend(
+    #             natsorted(
+    #                 Path(folder).glob(file_pattern),
+    #                 key=lambda filename: str(filename).rsplit("_", maxsplit=1)[-1],
+    #             ),
+    #         )
+
+    #     # Check if any files are found
+    #     if not files:
+    #         raise FileNotFoundError(
+    #             f"No files found for run {run_id} in directory {str(folders)}",
+    #         )
+
+    #     # Return the list of found files
+    #     return [str(file.resolve()) for file in files]
+
+    def get_files_from_run_id(  # type: ignore[override]
+        self,
+        run_id: str | int,
+        folders: str | Sequence[str] = None,
+        extension: str = "h5",
+    ) -> list[str]:
+    
+        stream_name_prefixes = self._config["core"].get("stream_name_prefixes")
+    
+        if folders is None:
+            folders = self._config["core"]["base_folder"]
+    
+        if isinstance(folders, str):
+            folders = [folders]
+    
+        daq = self._config["dataframe"]["daq"]
+    
+        if stream_name_prefixes:
+            file_pattern = f"{stream_name_prefixes[daq]}_run{run_id}*.{extension}"
+        else:
+            file_pattern = f"*{run_id}*.{extension}"
+    
+        def file_index(path: Path) -> int:
+            stem = path.stem
+            parts = stem.rsplit("_", 1)
+            if len(parts) == 2 and parts[1].isdigit():
+                return int(parts[1])
+            return 0  # single-file run
+    
+        files: list[Path] = []
+        for folder in folders:
+            files.extend(
+                natsorted(
+                    Path(folder).glob(file_pattern),
+                    key=file_index,
+                )
+            )
+    
+        if not files:
+            raise FileNotFoundError(
+                f"No files found for run {run_id} in directory {folders}",
+            )
+    
+        return [str(file.resolve()) for file in files]    
+    
+    def _resolve_fids(
+        self,
+        fids: Sequence[int] | None = None,
+        runs: Sequence[int] | None = None,
+        first_files: int | None = None,
+    ) -> list[int]:
+        """
+        Resolve run IDs or file IDs into a list of file indices into self.files.
+        Ensures consistent ordering in acquisition time.
+    
+        Parameters
+        ----------
+        fids : Sequence[int] | None
+            Specific file indices to use.
+        runs : Sequence[int] | None
+            Run IDs to include.
+        first_files : int | None
+            If given, limits the result to the first N files.
+    
+        Returns
+        -------
+        list[int]
+            List of file indices in acquisition order.
+        """
+        if runs is not None:
+            fids_resolved = []
+            for run_id in runs:
+                if self.raw_dir is None:
+                    self._initialize_dirs()
+                files_in_run = self.get_files_from_run_id(run_id=run_id, folders=self.raw_dir)
+                fids_resolved.extend([self.files.index(f) for f in files_in_run])
+        elif fids is not None:
+            fids_resolved = list(fids)
+        else:
+            fids_resolved = list(range(len(self.files)))
+    
+        if first_files is not None:
+            fids_resolved = fids_resolved[:first_files]
+    
+        return fids_resolved
+
+
+    def parse_scicat_metadata(self, token: str = None) -> dict:
+        """Uses the MetadataRetriever class to fetch metadata from scicat for each run.
+
+        Returns:
+            dict: Metadata dictionary
+            token (str, optional):: The scicat token to use for fetching metadata
+        """
+        if "metadata" not in self._config:
+            return {}
+            
+        metadata_retriever = MetadataRetriever(self._config["metadata"], token)
+        metadata = metadata_retriever.get_metadata(
+            beamtime_id=self._config["core"]["beamtime_id"],
+            runs=self.runs,
+            metadata=self.metadata,
+        )
+
+        return metadata
+
+    def parse_local_metadata(self) -> dict:
+        """Uses the MetadataRetriever class to fetch metadata from local folder for each run.
+
+        Returns:
+            dict: Metadata dictionary
+        """
+        if "metadata" not in self._config:
+            return {}
+            
+        metadata_retriever = MetadataRetriever(self._config["metadata"])
+        metadata = metadata_retriever.get_local_metadata(
+            beamtime_id=self._config["core"]["beamtime_id"],
+            beamtime_dir=self.beamtime_dir,
+            meta_dir=self.meta_dir,
+            runs=self.runs,
+            metadata=self.metadata,
+        )
+
+        return metadata
+
+    # -------------------------------
+    # Count rate with millisecCounter
+    # -------------------------------
+    def get_count_rate_ms(
+        self,
+        fids: Sequence[int] | None = None,
+        *,
+        mode: str = "file",       # "file" or "point"
+        first_files: int | None = None,
+    ) -> tuple[np.ndarray, np.ndarray]:
+        """
+        Count-rate calculation using millisecCounter and NumOfEvents.
+    
+        Parameters
+        ----------
+        fids : Sequence[int] or None
+            File IDs to include. Default: all.
+        mode : {"file", "point"}
+            - "point": rate per acquisition window
+            - "file" : one average rate per file
+        first_files : int or None
+            If given, only the first N files are used.
+    
+        Returns
+        -------
+        rates : np.ndarray
+            Count rate in Hz.
+        times : np.ndarray
+            Time in seconds (window end time for point mode, last time per file for file mode)
+        """
+        millis_key = self._config.get("millis_counter_key", "/DLD/millisecCounter")
+        counts_key = self._config.get("num_events_key", "/DLD/NumOfEvents")
+    
+        fids_resolved = self._resolve_fids(fids=fids, first_files=first_files)
+    
+        # -------------------------------
+        # 1) Load and concatenate (for point-mode)
+        # -------------------------------
+        ms_all = []
+        counts_all = []
+        file_ms_min_max = []  # store min/max per file for file-mode
+        file_counts_total = []
+    
+        for fid in fids_resolved:
+            with h5py.File(self.files[fid], "r") as h5:
+                ms = np.asarray(h5[millis_key], dtype=np.float64)
+                c = np.asarray(h5[counts_key], dtype=np.float64) if counts_key in h5 else np.ones_like(ms)
+    
+                if len(ms) != len(c):
+                    raise ValueError(f"Length mismatch in file {self.files[fid]}")
+    
+                ms_all.append(ms)
+                counts_all.append(c)
+                file_ms_min_max.append((ms[0], ms[-1]))
+                file_counts_total.append(c.sum())
+    
+                logger.debug(f"[get_count_rate_ms] File {fid}: ms_min={ms[0]}, ms_max={ms[-1]}, counts={c.sum()}")
+    
+        # Flatten arrays for point-mode
+        ms_concat = np.concatenate(ms_all)
+        counts_concat = np.concatenate(counts_all)
+    
+        # Ensure global time order
+        order = np.argsort(ms_concat)
+        ms_concat = ms_concat[order]
+        counts_concat = counts_concat[order]
+    
+        # -------------------------------
+        # 2) Compute point-resolved rates
+        # -------------------------------
+        if mode == "point":
+            bin_size = kwds.pop("bin_size", 1)
+            dt = np.diff(ms_concat) * 1e-3
+            if np.any(dt <= 0):
+                # Handle potential duplicate timestamps or jump back (should not happen with sort)
+                dt[dt <= 0] = 1e-6 # small epsilon
+            rates_point = counts_concat[1:] / dt
+            
+            if bin_size > 1:
+                # Apply rolling average for smoothing
+                rates_point = (
+                    pd.Series(rates_point)
+                    .rolling(window=bin_size, center=True, min_periods=1)
+                    .mean()
+                    .values
+                )
+            
+            times_point = ms_concat[1:] * 1e-3
+            return rates_point, times_point
+    
+        # -------------------------------
+        # 3) Compute file-resolved rates (correcting gaps)
+        # -------------------------------
+        rates_file = []
+        times_file = []
+        for idx, (ms_min, ms_max) in enumerate(file_ms_min_max):
+            # Duration = internal file window
+            file_duration = ms_max - ms_min
+            if file_duration <= 0:
+                # If single point or overlapping min/max, fallback or raise?
+                # For single point (duration 0), rate is undefined (inf).
+                # Start/End timestamps usually imply a range.
+                # If strictly 0, we can't calculate rate.
+                logger.warning(
+                    f"[get_count_rate_ms] File {fids_resolved[idx]} has duration <= 0 ({file_duration}). "
+                    "Skipping rate calculation for this file (set to NaN).",
+                )
+                rates_file.append(np.nan)
+                times_file.append((ms_min + ms_max) / 2 * 1e-3)
+                continue
+
+            # print(f"Total counts: {file_counts_total[idx]}")
+            # print(f"File duration: {file_duration}")
+            rate = file_counts_total[idx] / (file_duration * 1e-3)
+            rates_file.append(rate)
+            # times_file.append(ms_max * 1e-3)  # last time in file
+            times_file.append((ms_min + ms_max) / 2 * 1e-3)  # midpoint of the file
+
+    
+            logger.debug(
+                f"[get_count_rate_ms][file] File {fids_resolved[idx]}: ms_min={ms_min}, ms_max={ms_max}, "
+                f"counts={file_counts_total[idx]}, duration={file_duration} ms, rate={rate:.2f} Hz"
+            )
+    
+        return np.array(rates_file), np.array(times_file)
+           
+
+    # -------------------------------
+    # File-based count rate
+    # -------------------------------
+    # def get_count_rate(
+    #     self,
+    #     fids: Sequence[int] | None = None,
+    #     runs: Sequence[int] | None = None,
+    # ) -> tuple[np.ndarray, np.ndarray]:
+    #     """
+    #     Returns count rate per file using the total number of events and elapsed time.
+    #     Calculates the count rate using the number of rows and elapsed time for each file.
+    #     Hence the resolution is not very high, but this method is very fast.
+
+    #     Args:
+    #         fids (Sequence[int]): A sequence of file IDs. Defaults to all files.
+
+    #     Keyword Args:
+    #         runs: A sequence of run IDs.
+
+    #     Returns:
+    #         tuple[np.ndarray, np.ndarray]: The count rate and elapsed time in seconds.
+
+    #     Raises:
+    #         KeyError: If the file statistics are missing.
+    #     """
+    #     fids_resolved = self._resolve_fids(fids=fids, runs=runs)
+
+    #     all_counts = [self.metadata["file_statistics"]["electron"][str(fid)]["num_rows"] for fid in fids_resolved]
+    #     elapsed_times = [self.get_elapsed_time(fids=[fid]) for fid in fids_resolved]
+    #     print(elapsed_times,all_counts)
+
+    #     # count_rate = np.array(all_counts) / np.array(elapsed_times)
+    #     count_rate = np.array(all_counts) / np.array(elapsed_times).flatten()
+    #     print(f"Count rates: {count_rate}")
+    #     times = np.cumsum(elapsed_times)
+    #     return count_rate, times
+    def get_count_rate(
+        self,
+        fids: Sequence[int] | None = None,
+        runs: Sequence[int] | None = None,
+        **kwds,
+    ) -> tuple[np.ndarray, np.ndarray]:
+        """
+        Returns the count rate. By default, returns high-resolution
+        point-resolved rates using the millisecond counter.
+        
+        Args:
+            fids (Sequence[int], optional):
+                File IDs to include. Defaults to all files.
+            runs (Sequence[int], optional):
+                Run IDs to include. If provided, overrides `fids`.
+            **kwds:
+                Additional arguments passed to `get_count_rate_ms`.
+                - mode: "point" (default) or "file".
+        
+        Returns:
+            tuple[np.ndarray, np.ndarray]:
+                - count_rate : array of count rates in Hz
+                - time       : array of global times in seconds since scan start
+        """
+        mode = kwds.pop("mode", "point")
+        return self.get_count_rate_ms(fids=fids, mode=mode, runs=runs, **kwds)
+
+    # -------------------------------
+    # Time-resolved count rate (binned)
+    # -------------------------------
+    def get_count_rate_time_resolved(
+        self,
+        fids: Sequence[int] | None = None,
+        time_bin_size: float = 1.0,
+        runs: Sequence[int] | None = None,
+    ) -> tuple[np.ndarray, np.ndarray]:
+        """
+        Returns count rate in time bins using metadata timestamps.
+        Calculates the count rate over time within each file using timestamp binning.
+        
+        Args:
+            fids (Sequence[int]): A sequence of file IDs. Defaults to all files.
+            time_bin_size (float): Time bin size in seconds for rate calculation. Defaults to 1.0.
+            
+        Keyword Args:
+            runs: A sequence of run IDs.
+            
+        Returns:
+            tuple[np.ndarray, np.ndarray]: The count rate array and time array in seconds.
+            
+        Raises:
+            KeyError: If the file statistics are missing.
+        """
+        fids_resolved = self._resolve_fids(fids=fids, runs=runs)
+
+        all_rates = []
+        all_times = []
+        cumulative_time = 0.0
+
+        for fid in fids_resolved:
+            file_statistics = self.metadata["file_statistics"]["timed"]
+            time_stamp_alias = self._config["dataframe"]["columns"].get("timestamp", "timeStamp")
+            time_stamps = file_statistics[str(fid)]["columns"][time_stamp_alias]
+
+            t_min = float(getattr(time_stamps["min"], "total_seconds", lambda: time_stamps["min"])())
+            t_max = float(getattr(time_stamps["max"], "total_seconds", lambda: time_stamps["max"])())
+            total_counts = self.metadata["file_statistics"]["electron"][str(fid)]["num_rows"]
+            file_duration = t_max - t_min
+            print(f"File duration: {file_duration}")
+
+            n_bins = max(int(file_duration / time_bin_size), 1)
+            counts_per_bin = total_counts / n_bins
+            rate_per_bin = counts_per_bin / time_bin_size
+
+            bin_centers = np.linspace(
+                cumulative_time + time_bin_size / 2,
+                cumulative_time + file_duration - time_bin_size / 2,
+                n_bins,
+            )
+
+            rates = np.full(n_bins, rate_per_bin)
+            all_rates.extend(rates)
+            all_times.extend(bin_centers)
+
+            cumulative_time += file_duration
+
+        return np.array(all_rates), np.array(all_times)
+
+    def get_elapsed_time(
+        self,
+        fids: Sequence[int] | None = None,
+        *,
+        runs: Sequence[int] | None = None,
+        first_files: int | None = None,
+        aggregate: bool = False,
+    ) -> float | list[float]:
+        """
+        Calculates the elapsed acquisition time.
+    
+        Uses global timestamp / millisecCounter logic established in
+        read_dataframe() and df_timestamp.
+    
+        Parameters
+        ----------
+        fids : Sequence[int] | None
+            File IDs to include.
+        runs : Sequence[int] | None
+            Run IDs to include.
+        first_files : int | None
+            Limit to first N resolved files.
+        aggregate : bool
+            If True, return total elapsed time (s),
+            otherwise return per-file elapsed times.
+    
+        Returns
+        -------
+        float | list[float]
+            Elapsed time(s) in seconds.
+        """
+    
+        try:
+            file_statistics = self.metadata["file_statistics"]["timed"]
+        except Exception as exc:
+            raise KeyError(
+                "File statistics missing. Use 'read_dataframe' first."
+            ) from exc
+    
+        ts_alias = self._config["dataframe"]["columns"].get(
+            "timestamp",
+            "timeStamp",
+        )
+    
+        # ----------------------------
+        # Resolve files consistently
+        # ----------------------------
+        fids_resolved = self._resolve_fids(
+            fids=fids,
+            runs=runs,
+            first_files=first_files,
+        )
+    
+        elapsed_per_file: list[float] = []
+    
+        for fid in fids_resolved:
+            try:
+                ts_info = file_statistics[str(fid)]["columns"][ts_alias]
+                print(f"ts_info: {ts_info}")
+                dt = ts_info["max"] - ts_info["min"]
+    
+                # normalize to seconds
+                if hasattr(dt, "total_seconds"):
+                    dt_s = dt.total_seconds()
+                else:
+                    dt_s = float(dt)
+    
+                if dt_s < 0:
+                    raise ValueError(
+                        f"Negative elapsed time in file {fid}: {dt_s}"
+                    )
+    
+            except KeyError as exc:
+                filename = (
+                    Path(self.files[fid]).name
+                    if fid < len(self.files)
+                    else f"file_{fid}"
+                )
+                raise KeyError(
+                    f"Timestamp metadata missing in file {filename} (fid={fid}). "
+                    "Add timestamp column and alias to config before loading."
+                ) from exc
+    
+            elapsed_per_file.append(dt_s)
+    
+        if aggregate:
+            print("aggregate is True")
+            return sum(elapsed_per_file)
+
+        print(f"Elapsed time: {elapsed_per_file}")
+        return elapsed_per_file
+
+    def read_dataframe(
+        self,
+        files: str | Sequence[str] = None,
+        folders: str | Sequence[str] = None,
+        runs: str | int | Sequence[str | int] = None,
+        ftype: str = "h5",
+        metadata: dict | None = None,
+        collect_metadata: bool = False,
+        **kwds,
+    ) -> tuple[dd.DataFrame, dd.DataFrame, dict]:
+        """
+        Read express data from the DAQ, generating a parquet in between.
+
+        Args:
+            files (str | Sequence[str], optional): File path(s) to process. Defaults to None.
+            folders (str | Sequence[str], optional): Path to folder(s) where files are stored
+                Path has priority such that if it's specified, the specified files will be ignored.
+                Defaults to None.
+            runs (str | int | Sequence[str | int], optional): Run identifier(s).
+                Corresponding files will be located in the location provided by ``folders``.
+                Takes precedence over ``files`` and ``folders``. Defaults to None.
+            ftype (str, optional): The file extension type. Defaults to "h5".
+            metadata (dict, optional): Additional metadata. Defaults to None.
+            collect_metadata (bool, optional): Whether to collect metadata. Defaults to False.
+
+        Keyword Args:
+            detector (str, optional): The detector to use. Defaults to "".
+            force_recreate (bool, optional): Whether to force recreation of the buffer files.
+                Defaults to False.
+            processed_dir (str, optional): The directory to save the processed files.
+                Defaults to None.
+            debug (bool, optional): Whether to run buffer creation in serial. Defaults to False.
+            remove_invalid_files (bool, optional): Whether to exclude invalid files.
+                Defaults to False.
+            token (str, optional): The scicat token to use for fetching metadata. If provided,
+                will be saved to .env file for future use. If not provided, will check environment
+                variables when collect_metadata is True.
+            filter_timed_by_electron (bool, optional): When True, the timed dataframe will only
+                contain data points where valid electron events were detected. When False, all
+                timed data points are included regardless of electron detection. Defaults to True.
+
+        Returns:
+            tuple[dd.DataFrame, dd.DataFrame, dict]: A tuple containing the concatenated DataFrame
+            and metadata.
+
+        Raises:
+            ValueError: If neither 'runs' nor 'files'/'raw_dir' is provided.
+            FileNotFoundError: If the conversion fails for some files or no data is available.
+            ValueError: If collect_metadata is True and no token is available.
+        """
+        if metadata is None:
+            metadata = {}
+        
+        detector = kwds.pop("detector", "")
+        force_recreate = kwds.pop("force_recreate", False)
+        processed_dir = kwds.pop("processed_dir", None)
+        debug = kwds.pop("debug", False)
+        remove_invalid_files = kwds.pop("remove_invalid_files", False)
+        token = kwds.pop("token", None)
+        filter_timed_by_electron = kwds.pop("filter_timed_by_electron", True)
+
+        if len(kwds) > 0:
+            raise ValueError(f"Unexpected keyword arguments: {kwds.keys()}")
+        t0 = time.time()
+
+        self._initialize_dirs()
+        # Prepare a list of names for the runs to read and parquets to write
+        if runs is not None:
+            files = []
+            runs_ = [str(runs)] if isinstance(runs, (str, int)) else list(map(str, runs))
+            for run in runs_:
+                run_files = self.get_files_from_run_id(
+                    run_id=run,
+                    folders=self.raw_dir,
+                )
+                files.extend(run_files)
+            self.runs = runs_
+            super().read_dataframe(files=files, ftype=ftype)
+        else:
+            # This call takes care of files and folders. As we have converted runs into files
+            # already, they are just stored in the class by this call.
+            super().read_dataframe(
+                files=files,
+                folders=folders,
+                ftype=ftype,
+                metadata=metadata,
+            )
+
+        bh = BufferHandler(
+            config=self._config,
+        )
+
+        # if processed_dir is None, use self.processed_dir
+        processed_dir = processed_dir or self.processed_dir
+        processed_dir = Path(processed_dir)
+
+        # Obtain the parquet filenames, metadata, and schema from the method
+        # which handles buffer file creation/reading
+        h5_paths = [Path(file) for file in self.files]
+        df, df_timed = bh.process_and_load_dataframe(
+            h5_paths=h5_paths,
+            folder=processed_dir,
+            force_recreate=force_recreate,
+            suffix=detector,
+            debug=debug,
+            remove_invalid_files=remove_invalid_files,
+            filter_timed_by_electron=filter_timed_by_electron,
+        )
+
+        scicat_metadata = self.parse_scicat_metadata(token)
+        scicat_runs = scicat_metadata.get("scientificMetadata", {})
+        
+        if not any(scicat_runs.values()):
+            logger.warning("No SciCat metadata available, checking local folder")
+            self.metadata.update(self.parse_local_metadata())
+        else:
+            logger.warning("Metadata taken from SciCat")
+            if collect_metadata:
+                self.metadata.update(scicat_metadata)
+        
+        self.metadata.update(bh.metadata)
+
+        print(f"loading complete in {time.time() - t0: .2f} s")
+
+        return df, df_timed, self.metadata
+
+
+
+
+LOADER = CFELLoader
diff --git a/src/sed/loader/flash/buffer_handler.py b/src/sed/loader/flash/buffer_handler.py
index d56de29f..b68de4d4 100644
--- a/src/sed/loader/flash/buffer_handler.py
+++ b/src/sed/loader/flash/buffer_handler.py
@@ -1,13 +1,14 @@
 from __future__ import annotations
 
 import os
-from pathlib import Path
 import time
+from pathlib import Path
 
 import dask.dataframe as dd
 import pyarrow.parquet as pq
 from joblib import delayed
 from joblib import Parallel
+from pandas import MultiIndex
 
 from sed.core.dfops import forward_fill_lazy
 from sed.core.logging import setup_logging
@@ -40,11 +41,9 @@ class BufferFilePaths:
 
     def __init__(
         self,
-        config: dict,
         h5_paths: list[Path],
         folder: Path,
         suffix: str,
-        remove_invalid_files: bool,
     ) -> None:
         """Initializes the BufferFilePaths.
 
@@ -57,9 +56,6 @@ def __init__(
         folder = folder / "buffer"
         folder.mkdir(parents=True, exist_ok=True)
 
-        if remove_invalid_files:
-            h5_paths = self.remove_invalid_files(config, h5_paths)
-
         self._file_paths = self._create_file_paths(h5_paths, folder, suffix)
 
     def _create_file_paths(
@@ -93,18 +89,6 @@ def file_sets_to_process(self, force_recreate: bool = False) -> list[dict[str, P
             return self._file_paths
         return [file_set for file_set in self if any(not file_set[key].exists() for key in DF_TYP)]
 
-    def remove_invalid_files(self, config, h5_paths: list[Path]) -> list[Path]:
-        valid_h5_paths = []
-        for h5_path in h5_paths:
-            try:
-                dfc = DataFrameCreator(config_dataframe=config, h5_path=h5_path)
-                dfc.validate_channel_keys()
-                valid_h5_paths.append(h5_path)
-            except InvalidFileError as e:
-                logger.info(f"Skipping invalid file: {h5_path.stem}\n{e}")
-
-        return valid_h5_paths
-
 
 class BufferHandler:
     """
@@ -125,14 +109,27 @@ def __init__(
         self.n_cores: int = config["core"].get("num_cores", os.cpu_count() - 1)
         self.fp: BufferFilePaths = None
         self.df: dict[str, dd.DataFrame] = {typ: None for typ in DF_TYP}
+        fill_formats = self._config.get("fill_formats", ["per_train", "per_pulse"])
         self.fill_channels: list[str] = get_channels(
             self._config,
-            ["per_pulse", "per_train"],
+            fill_formats,
             extend_aux=True,
         )
         self.metadata: dict = {}
         self.filter_timed_by_electron: bool = None
 
+    def _validate_h5_files(self, config, h5_paths: list[Path]) -> list[Path]:
+        valid_h5_paths = []
+        for h5_path in h5_paths:
+            try:
+                dfc = DataFrameCreator(config_dataframe=config, h5_path=h5_path)
+                dfc.validate_channel_keys()
+                valid_h5_paths.append(h5_path)
+            except InvalidFileError as e:
+                logger.info(f"Skipping invalid file: {h5_path.stem}\n{e}")
+
+        return valid_h5_paths
+
     def _schema_check(self, files: list[Path], expected_schema_set: set) -> None:
         """
         Checks the schema of the Parquet files.
@@ -182,8 +179,7 @@ def _create_timed_dataframe(self, df: dd.DataFrame) -> dd.DataFrame:
             # Take all timed data rows without filtering
             df_timed = df[timed_channels]
 
-        # Take only first electron per event
-        return df_timed.loc[:, :, 0]
+        return df_timed
 
     def _save_buffer_file(self, paths: dict[str, Path]) -> None:
         """Creates the electron and timed buffer files from the raw H5 file."""
@@ -205,6 +201,12 @@ def _save_buffer_file(self, paths: dict[str, Path]) -> None:
 
         # Create and save timed dataframe
         df_timed = self._create_timed_dataframe(df)
+        # timed dataframe
+        if isinstance(df.index, MultiIndex):
+            # drop the electron channels and only take rows with the first electronId
+            df_timed = df[self.fill_channels].loc[:, :, 0]
+        else:
+            df_timed = df[self.fill_channels]
         dtypes = get_dtypes(self._config, df_timed.columns.values)
         timed_df = df_timed.astype(dtypes).reset_index()
         logger.debug(f"Saving timed buffer with shape: {timed_df.shape}")
@@ -251,25 +253,26 @@ def _get_dataframes(self) -> None:
         filling = {}
         for typ in DF_TYP:
             # Read the parquet files into a dask dataframe
-            df = dd.read_parquet(self.fp[typ], calculate_divisions=True)
+            df = dd.read_parquet(self.fp[typ])  # , calculate_divisions=True)
             # Get the metadata from the parquet files
             file_stats[typ] = get_parquet_metadata(self.fp[typ])
 
             # Forward fill the non-electron channels across files
             overlap = min(file["num_rows"] for file in file_stats[typ].values())
             iterations = self._config.get("forward_fill_iterations", 2)
-            df = forward_fill_lazy(
-                df=df,
-                columns=self.fill_channels,
-                before=overlap,
-                iterations=iterations,
-            )
-            # TODO: This dict should be returned by forward_fill_lazy
-            filling[typ] = {
-                "columns": self.fill_channels,
-                "overlap": overlap,
-                "iterations": iterations,
-            }
+            if iterations:
+                df = forward_fill_lazy(
+                    df=df,
+                    columns=self.fill_channels,
+                    before=overlap,
+                    iterations=iterations,
+                )
+                # TODO: This dict should be returned by forward_fill_lazy
+                filling[typ] = {
+                    "columns": self.fill_channels,
+                    "overlap": overlap,
+                    "iterations": iterations,
+                }
 
             self.df[typ] = df
         self.metadata.update({"file_statistics": file_stats, "filling": filling})
@@ -311,8 +314,11 @@ def process_and_load_dataframe(
         Returns:
             Tuple[dd.DataFrame, dd.DataFrame]: The electron and timed dataframes.
         """
-        self.fp = BufferFilePaths(self._config, h5_paths, folder, suffix, remove_invalid_files)
         self.filter_timed_by_electron = filter_timed_by_electron
+        if remove_invalid_files:
+            h5_paths = self._validate_h5_files(self._config, h5_paths)
+
+        self.fp = BufferFilePaths(h5_paths, folder, suffix)
 
         if not force_recreate:
             schema_set = set(
diff --git a/src/sed/loader/flash/dataframe.py b/src/sed/loader/flash/dataframe.py
index f50abe10..61bc6aa6 100644
--- a/src/sed/loader/flash/dataframe.py
+++ b/src/sed/loader/flash/dataframe.py
@@ -12,9 +12,9 @@
 import numpy as np
 import pandas as pd
 
+from sed.core.logging import setup_logging
 from sed.loader.flash.utils import get_channels
 from sed.loader.flash.utils import InvalidFileError
-from sed.core.logging import setup_logging
 
 logger = setup_logging("flash_dataframe_creator")
 
@@ -39,8 +39,8 @@ def __init__(self, config_dataframe: dict, h5_path: Path) -> None:
         """
         logger.debug(f"Initializing DataFrameCreator for file: {h5_path}")
         self.h5_file = h5py.File(h5_path, "r")
-        self.multi_index = get_channels(index=True)
         self._config = config_dataframe
+        self.multi_index = get_channels(self._config, index=True)
 
     def get_index_dataset_key(self, channel: str) -> tuple[str, str]:
         """
diff --git a/src/sed/loader/flash/instruments.py b/src/sed/loader/flash/instruments.py
deleted file mode 100644
index 8ef0146e..00000000
--- a/src/sed/loader/flash/instruments.py
+++ /dev/null
@@ -1,9 +0,0 @@
-from __future__ import annotations
-
-from dask import dataframe as dd
-
-
-def wespe_convert(df: dd.DataFrame, df_timed: dd.DataFrame) -> tuple[dd.DataFrame, dd.DataFrame]:
-    df
-    df_timed
-    raise NotImplementedError("This function is not implemented yet.")
diff --git a/src/sed/loader/flash/loader.py b/src/sed/loader/flash/loader.py
index c2cf79b9..799399cb 100644
--- a/src/sed/loader/flash/loader.py
+++ b/src/sed/loader/flash/loader.py
@@ -1,6 +1,5 @@
 """
 This module implements the flash data loader.
-This loader currently supports hextof, wespe and instruments with similar structure.
 The raw hdf5 data is combined and saved into buffer files and loaded as a dask dataframe.
 The dataframe is an amalgamation of all h5 files for a combination of runs, where the NaNs are
 automatically forward-filled across different files.
@@ -21,7 +20,6 @@
 from sed.core.logging import setup_logging
 from sed.loader.base.loader import BaseLoader
 from sed.loader.flash.buffer_handler import BufferHandler
-from sed.loader.flash.instruments import wespe_convert
 from sed.loader.flash.metadata import MetadataRetriever
 
 # Configure logging
@@ -225,10 +223,85 @@ def parse_metadata(self, token: str = None) -> dict:
 
     def get_count_rate(
         self,
-        fids: Sequence[int] = None,  # noqa: ARG002
-        **kwds,  # noqa: ARG002
-    ):
-        return None, None
+        fids: Sequence[int] = None,
+        **kwds,
+    ) -> tuple[np.ndarray, np.ndarray]:
+        """
+        Calculates the count rate for the specified files.
+        Returns high-resolution (per-train) rates by counting electrons per trainId.
+        
+        Args:
+            fids (Sequence[int]): A sequence of file IDs. Defaults to all files.
+            **kwds: Keyword arguments.
+        
+        Returns:
+            tuple[np.ndarray, np.ndarray]: The count rate array (Hz) and time array (seconds).
+        """
+        import h5py
+        import numpy as np
+        import pandas as pd
+        
+        if fids is None:
+            fids = range(len(self.files))
+        
+        # Get the electron channel configuration
+        per_electron_channels = get_channels(self._config["dataframe"], "per_electron")
+        if not per_electron_channels:
+             return None, None
+        
+        # We need the 'index_key' (trainId) for an electron channel
+        first_channel = per_electron_channels[0]
+        channel_config = self._config["dataframe"]["channels"][first_channel]
+        index_key = channel_config["index_key"]
+        
+        all_counts = []
+        all_times = []
+        
+        # FLASH repetition rate is usually 10Hz. 
+        # We try to use timestamps if available, otherwise fallback to trainId gaps.
+        time_stamp_alias = self._config["dataframe"].get("time_stamp_alias", "timeStamp")
+        
+        # We need a reference time (t0) from the first selected file
+        with h5py.File(self.files[fids[0]], "r") as h5:
+             # Try to find a global start time if any, otherwise use relative
+             t0 = 0
+             if time_stamp_alias in h5:
+                  # This depends on how timestamps are stored in FLASH files
+                  # For now, we use a simple relative time if not easily found.
+                  pass
+
+        for fid in fids:
+            with h5py.File(self.files[fid], "r") as h5:
+                # Read trainIds of all electron events
+                train_ids = np.asarray(h5[index_key])
+                
+                if len(train_ids) == 0:
+                    continue
+                
+                # Count electrons per train
+                df_counts = pd.Series(train_ids).value_counts().sort_index()
+                counts = df_counts.values
+                u_train_ids = df_counts.index.values
+                
+                # Convert trainIds to relative seconds (assuming 10Hz)
+                # Note: This is an approximation. A better way would be to 
+                # use the actual timestamps of the trains.
+                if fid == fids[0]:
+                    t_start_id = u_train_ids[0]
+                
+                times = (u_train_ids - t_start_id) * 0.1
+                
+                # Rate per trainId interval (usually 0.1s)
+                # If we assume exactly 10Hz, duration is 0.1s
+                rates = counts / 0.1 
+                
+                all_counts.append(rates)
+                all_times.append(times)
+        
+        if not all_counts:
+             return None, None
+             
+        return np.concatenate(all_counts), np.concatenate(all_times)
 
     def get_elapsed_time(self, fids: Sequence[int] = None, **kwds) -> float | list[float]:  # type: ignore[override]
         """
@@ -401,9 +474,6 @@ def read_dataframe(
             filter_timed_by_electron=filter_timed_by_electron,
         )
 
-        if self.instrument == "wespe":
-            df, df_timed = wespe_convert(df, df_timed)
-
         self.metadata.update(self.parse_metadata(token) if collect_metadata else {})
         self.metadata.update(bh.metadata)
 
diff --git a/src/sed/loader/flash/metadata.py b/src/sed/loader/flash/metadata.py
index 578fa9fd..9a840a22 100644
--- a/src/sed/loader/flash/metadata.py
+++ b/src/sed/loader/flash/metadata.py
@@ -1,13 +1,15 @@
 """
 The module provides a MetadataRetriever class for retrieving metadata
-from a Scicat Instance based on beamtime and run IDs.
+from a Scicat instance based on beamtime and run IDs.
 """
 from __future__ import annotations
 
+import json
+from pathlib import Path
 import requests
+import yaml
 
-from sed.core.config import read_env_var
-from sed.core.config import save_env_var
+from sed.core.config import read_env_var, save_env_var
 from sed.core.logging import setup_logging
 
 logger = setup_logging("flash_metadata_retriever")
@@ -15,51 +17,48 @@
 
 class MetadataRetriever:
     """
-    A class for retrieving metadata from a Scicat instance based
-    on beamtime and run IDs.
+    Retrieves metadata from SciCat or local YAML files for a given beamtime and runs.
     """
 
     def __init__(self, metadata_config: dict, token: str = None) -> None:
         """
         Initializes the MetadataRetriever class.
-
+        
         Args:
-            metadata_config (dict): Takes a dict containing at least url for the scicat instance.
-            token (str, optional): The token to use for fetching metadata. If provided,
-                will be saved to .env file for future use.
+            metadata_config (dict): Dict containing at least 'archiver_url' for SciCat.
+            token (str, optional): Token for fetching metadata. Saved to .env if provided.
         """
-        # Token handling
         if token:
             self.token = token
             save_env_var("SCICAT_TOKEN", self.token)
         else:
-            # Try to load token from config or .env file
             self.token = read_env_var("SCICAT_TOKEN")
 
         if not self.token:
             raise ValueError(
-                "Token is required for metadata collection. Either provide a token "
-                "parameter or set the SCICAT_TOKEN environment variable.",
+                "Token is required for metadata collection. Provide a token "
+                "or set SCICAT_TOKEN in environment."
             )
 
         self.url = metadata_config.get("archiver_url")
         if not self.url:
-            raise ValueError("No URL provided for fetching metadata from scicat.")
+            raise ValueError("No URL provided for fetching metadata from SciCat.")
 
-        self.headers = {
-            "Content-Type": "application/json",
-            "Accept": "application/json",
-        }
+        self.headers = {"Content-Type": "application/json", "Accept": "application/json"}
 
+    # ----------------------------
+    # Remote SciCat metadata
+    # ----------------------------
     def get_metadata(
         self,
         beamtime_id: str,
         runs: list,
-        metadata: dict = None,
+        metadata: dict | None = None,
     ) -> dict:
         """
-        Retrieves metadata for a given beamtime ID and list of runs.
-
+        Retrieves metadata for a beamtime and runs from SciCat.
+        Returns a dict with 'scientificMetadata' keyed by run ID.
+        
         Args:
             beamtime_id (str): The ID of the beamtime.
             runs (list): A list of run IDs.
@@ -77,12 +76,15 @@ def get_metadata(
         if metadata is None:
             metadata = {}
 
+        all_runs_metadata: dict[str, dict] = {}
+
         for run in runs:
             pid = f"{beamtime_id}/{run}"
-            logger.debug(f"Retrieving metadata for PID: {pid}")
             metadata_run = self._get_metadata_per_run(pid)
-            metadata.update(metadata_run)  # TODO: Not correct for multiple runs
+            # Use 'scientificMetadata' if available, otherwise entire dict
+            all_runs_metadata[run] = metadata_run.get("scientificMetadata", metadata_run)
 
+        metadata["scientificMetadata"] = all_runs_metadata
         logger.debug(f"Retrieved metadata with {len(metadata)} entries")
         return metadata
 
@@ -103,44 +105,135 @@ def _get_metadata_per_run(self, pid: str) -> dict:
         headers2["Authorization"] = f"Bearer {self.token}"
 
         try:
-            logger.debug(f"Attempting to fetch metadata with new URL format for PID: {pid}")
-            dataset_response = requests.get(
-                self._create_new_dataset_url(pid),
-                headers=headers2,
-                timeout=10,
-            )
-            dataset_response.raise_for_status()
+            logger.debug(f"Fetching metadata (new URL) for PID: {pid}")
+            response = requests.get(self._create_new_dataset_url(pid), headers=headers2, timeout=10)
+            response.raise_for_status()
 
             # Check if response is an empty object because wrong url for older implementation
-            if not dataset_response.content:
+            if not response.content:
                 logger.debug("Empty response, trying old URL format")
-                dataset_response = requests.get(
-                    self._create_old_dataset_url(pid),
-                    headers=headers2,
-                    timeout=10,
-                )
+                response = requests.get(self._create_old_dataset_url(pid), headers=headers2, timeout=10)
             # If the dataset request is successful, return the retrieved metadata
             # as a JSON object
-            return dataset_response.json()
-
-        except requests.exceptions.RequestException as exception:
-            logger.warning(f"Failed to retrieve metadata for PID {pid}: {str(exception)}")
-            return {}  # Return an empty dictionary for this run
+            return response.json()
+        except requests.exceptions.RequestException as e:
+            logger.warning(f"Failed to retrieve metadata for PID {pid}: {e}")
+            return {}
 
     def _create_old_dataset_url(self, pid: str) -> str:
-        return "{burl}/{url}/%2F{npid}".format(
-            burl=self.url,
-            url="Datasets",
-            npid=self._reformat_pid(pid),
-        )
+        return f"{self.url}datasets/%2F{self._reformat_pid(pid)}"
 
     def _create_new_dataset_url(self, pid: str) -> str:
-        return "{burl}/{url}/{npid}".format(
-            burl=self.url,
-            url="Datasets",
-            npid=self._reformat_pid(pid),
-        )
+        return f"{self.url}datasets/{self._reformat_pid(pid)}"
 
     def _reformat_pid(self, pid: str) -> str:
         """SciCat adds a pid-prefix + "/"  but at DESY prefix = "" """
-        return (pid).replace("/", "%2F")
+        """Replace '/' with '%2F' for SciCat PID."""
+        return pid.replace("/", "%2F")
+
+    # ----------------------------
+    # Local metadata
+    # ----------------------------
+    def get_local_metadata(
+        self,
+        beamtime_id: str,
+        beamtime_dir: str | Path,
+        meta_dir: str | Path,
+        runs: list,
+        metadata: dict | None = None,
+    ) -> dict:
+        """
+        Retrieves metadata for a beamtime and runs from local YAML files.
+        Returns a dict with 'scientificMetadata' keyed by run ID.
+        
+        Args:
+            beamtime_id (str): The ID of the beamtime.
+            beamtime_dir (str)|Path: Beamtime directory.
+            meta_dir (str)|Path: Local metadata directory.
+            runs (list): A list of run IDs.
+            metadata (dict, optional): The existing metadata dictionary.
+            Defaults to None.
+
+        Returns:
+            Dict: The updated metadata dictionary.
+
+        Raises:
+            Exception: If the request to retrieve metadata fails.
+        """
+        if metadata is None:
+            metadata = {}
+
+        beamtime_metadata = self._get_beamtime_metadata(beamtime_dir, beamtime_id)
+        metadata.update(beamtime_metadata)
+
+        all_runs_metadata: dict[str, dict] = {}
+
+        for run in runs:
+            logger.debug(f"Retrieving local metadata for run: {run}")
+            run_metadata = self._get_local_metadata_per_run(meta_dir, run)
+            all_runs_metadata[run] = run_metadata.get("_data", {})
+
+        metadata["scientificMetadata"] = all_runs_metadata
+        logger.debug(f"Retrieved metadata with {len(metadata)} entries")
+        return metadata
+
+    def _get_beamtime_metadata(self, beamtime_dir: str | Path, beamtime_id: str) -> dict:
+        """
+        Retrieves general metadata from beamtime-metadata-{beamtime_id}.json
+        
+        Args:
+            beamtime_dir (str)|Path: Beamtime directory.
+            beamtime_id (str): The ID of the beamtime.
+
+        Returns:
+            Dict: The retrieved metadata dictionary.
+
+        Raises:
+            Exception: If the request to retrieve metadata fails.
+        """
+        try:
+            beamtime_dir = Path(beamtime_dir)
+            filepath = beamtime_dir / f"beamtime-metadata-{beamtime_id}.json"
+            with filepath.open("r") as f:
+                return json.load(f)
+        except Exception as exc:
+            logger.warning(f"Failed to retrieve metadata for beamtime {beamtime_id}: {exc}")
+            return {}
+
+    def _get_local_metadata_per_run(self, meta_dir: str | Path, run: str) -> dict:
+        """
+        Retrieves metadata for a specific run from the latest YAML file:
+        {run}_N.yaml (highest N) or fallback to {run}.yaml
+        """
+        try:
+            meta_dir = Path(meta_dir)
+            run = str(run)
+            candidates: list[tuple[int, Path]] = []
+
+            # Look for versioned YAML files
+            for path in meta_dir.glob(f"{run}_*.yaml"):
+                try:
+                    version = int(path.stem.split("_")[-1])
+                    candidates.append((version, path))
+                except ValueError:
+                    continue
+
+            # Fallback: unversioned single file
+            if not candidates:
+                single_file = meta_dir / f"{run}.yaml"
+                if single_file.exists():
+                    candidates.append((0, single_file))
+
+            if not candidates:
+                raise FileNotFoundError(f"No metadata files found for run {run} in {meta_dir}")
+
+            # Pick the latest version
+            _, latest_path = max(candidates, key=lambda x: x[0])
+            logger.info(f"Loading local metadata from {latest_path.name}")
+
+            run_metadata = yaml.safe_load(latest_path.read_text())
+            return run_metadata or {"_data": {}}
+
+        except Exception as exc:
+            logger.warning(f"Failed to retrieve local metadata for run {run}: {exc}")
+            return {"_data": {}}
diff --git a/src/sed/loader/flash/utils.py b/src/sed/loader/flash/utils.py
index 85bca9a4..0f41aaaa 100644
--- a/src/sed/loader/flash/utils.py
+++ b/src/sed/loader/flash/utils.py
@@ -1,12 +1,6 @@
 from __future__ import annotations
 
 
-# TODO: move to config
-MULTI_INDEX = ["trainId", "pulseId", "electronId"]
-PULSE_ALIAS = MULTI_INDEX[1]
-FORMATS = ["per_electron", "per_pulse", "per_train"]
-
-
 def get_channels(
     config_dataframe: dict = {},
     formats: str | list[str] = None,
@@ -29,7 +23,9 @@ def get_channels(
         List[str]: A list of channels with the specified format(s).
     """
     channel_dict = config_dataframe.get("channels", {})
-    aux_alias = config_dataframe.get("aux_alias", "dldAux")
+    index_list = config_dataframe.get("index", ["trainId", "pulseId", "electronId"])
+    formats_list = config_dataframe.get("formats", ["per_train", "per_pulse", "per_electron"])
+    aux_alias = channel_dict.get("auxiliary", "dldAux")
 
     # If 'formats' is a single string, convert it to a list for uniform processing.
     if isinstance(formats, str):
@@ -39,7 +35,7 @@ def get_channels(
     if formats == ["all"]:
         channels = get_channels(
             config_dataframe,
-            FORMATS,
+            formats_list,
             index,
             extend_aux,
         )
@@ -47,24 +43,25 @@ def get_channels(
 
     channels = []
 
-    # Include channels from multi_index if 'index' is True.
+    # Include channels from index_list if 'index' is True.
     if index:
-        channels.extend(MULTI_INDEX)
+        channels.extend(index_list)
 
     if formats:
         # If 'formats' is a list, check if all elements are valid.
-        err_msg = (
-            "Invalid format. Please choose from 'per_electron', 'per_pulse', 'per_train', 'all'."
-        )
         for format_ in formats:
-            if format_ not in FORMATS + ["all"]:
-                raise ValueError(err_msg)
+            if format_ not in formats_list + ["all"]:
+                raise ValueError(
+                    f"Invalid format: {format_}. " f"Valid formats are: {formats_list + ['all']}",
+                )
 
         # Get the available channels excluding 'pulseId'.
         available_channels = list(channel_dict.keys())
         # pulse alias is an index and should not be included in the list of channels.
-        if PULSE_ALIAS in available_channels:
-            available_channels.remove(PULSE_ALIAS)
+        # Remove index channels if they are present in available_channels.
+        for channel in index_list:
+            if channel in available_channels:
+                available_channels.remove(channel)
 
         for format_ in formats:
             # Gather channels based on the specified format(s).
@@ -75,7 +72,7 @@ def get_channels(
             )
             # Include 'dldAuxChannels' if the format is 'per_train' and extend_aux is True.
             # Otherwise, include 'dldAux'.
-            if format_ == FORMATS[2] and aux_alias in available_channels:
+            if format_ == "per_train" and aux_alias in available_channels:
                 if extend_aux:
                     channels.extend(
                         channel_dict[aux_alias]["sub_channels"].keys(),
diff --git a/tests/data/loader/cfel/20250411_12h34m03s185_000123.h5 b/tests/data/loader/cfel/20250411_12h34m03s185_000123.h5
new file mode 100644
index 00000000..c7146891
Binary files /dev/null and b/tests/data/loader/cfel/20250411_12h34m03s185_000123.h5 differ
diff --git a/tests/data/loader/cfel/config.yaml b/tests/data/loader/cfel/config.yaml
new file mode 100644
index 00000000..f80b90d0
--- /dev/null
+++ b/tests/data/loader/cfel/config.yaml
@@ -0,0 +1,160 @@
+# This file contains the default configuration for the flash loader.
+
+core:
+  # defines the loader
+  loader: cfel
+  # Since this will run on maxwell most probably, we have a lot of cores at our disposal
+  num_cores: 10
+  # the ID number of the beamtime
+  beamtime_id: 11021732
+  # the year of the beamtime
+  year: 2025
+
+  # The paths to the raw and parquet data directories. If these are not
+  # provided, the loader will try to find the data based on year beamtimeID etc
+  paths:
+    # location of the raw data.
+    raw: "tests/data/loader/cfel/"
+    # location of the intermediate parquet files.
+    processed: "tests/data/loader/cfel/parquet"
+
+  # The beamtime directories for different DAQ systems.
+  # (Not to be changed by user)
+  beamtime_dir:
+    pg2: "/asap3/flash/gpfs/pg2/"
+    cfel: "/asap3/fs-flash-o/gpfs/hextof/"
+
+
+dataframe:
+  daq: fl1user3                                   # DAQ system name to resolve filenames/paths
+  ubid_offset: 5                                  # Offset correction to the pulseId
+  forward_fill_iterations: 0                      # Number of iterations to fill the pulseId forward
+  split_sector_id_from_dld_time: True             # Remove reserved bits for dldSectorID from dldTimeSteps column
+  sector_id_reserved_bits: 3                      # Bits reserved for dldSectorID in the dldTimeSteps column
+  sector_delays: [0., 0., 0., 0., 0., 0., 0., 0.] # Sector delays
+
+  first_event_time_stamp_key: /ScanParam/StartTime
+  ms_markers_key: /SlowData/exposure_time
+
+  # Time and binning settings
+  tof_binwidth: 2.0576131995767355E-11            # Base time-of-flight bin width in seconds
+  tof_binning: 8                                  # Binning parameter for time-of-flight data
+
+  # Columns used for jitter correction
+  index: [countId]
+  jitter_cols: [dldPosX, dldPosY, dldTimeSteps]
+  formats: [per_file, per_train, per_electron]
+  fill_formats: [per_train]            # Channels with this format will be forward filled
+
+  # Column settings
+  columns:
+    x: dldPosX
+    corrected_x: X
+    kx: kx
+    y: dldPosY
+    corrected_y: Y
+    ky: ky
+    tof: dldTimeSteps
+    tof_ns: dldTime
+    corrected_tof: tm
+    timestamp: timeStamp
+    auxiliary: dldAux
+    sector_id: dldSectorID
+    delay: delayStage
+    corrected_delay: pumpProbeTime
+
+  units:
+    # These are the units of the columns
+    dldPosX: 'step'
+    dldPosY: 'step'
+    dldTimeSteps: 'step'
+    tof_voltage: 'V'
+    extractorVoltage: 'V'
+    extractorCurrent: 'A'
+    cryoTemperature: 'K'
+    sampleTemperature: 'K'
+    dldTime: 'ns'
+    delay: 'ps'
+    timeStamp: 's'
+    energy: 'eV'
+    E: 'eV'
+    kx: '1/A'
+    ky: '1/A'
+
+  # The channels to load.
+  # channels have the following structure:
+  # <channelAlias>:
+  #   format: per_pulse/per_electron/per_train
+  #   index_key: the hdf5 index key
+  #   dataset_key: the hdf5 dataset key
+  #   slice: int to slice a multidimensional data along axis=1. If not defined, there is no slicing
+  #   dtype: the datatype of the data
+  #   subChannels: further aliases for if the data is multidimensional and needs to be split in different cols
+  #                used currently for the auxiliary channel
+  #      <subChannelAlias>:
+  #        slice: int to slice a multidimensional data along axis=1. Must be defined
+  #        dtype: the datatype of the data
+
+  channels:
+    # event key
+    countId:
+      format: per_file
+      dataset_key: /DLD/NumOfEvents
+    # detector x position
+    dldPosX:
+      format: per_electron
+      dataset_key: /DLD/DLD/xPos
+      # dtype: uint32
+
+    # detector y position
+    dldPosY:
+      format: per_electron
+      dataset_key: /DLD/DLD/yPos
+      # dtype: uint32
+
+    # Detector time-of-flight channel
+    # if split_sector_id_from_dld_time is set to True, This this will generate
+    # also the dldSectorID channel
+    dldTimeSteps:
+      format: per_electron
+      dataset_key: /DLD/DLD/times
+      # dtype: uint32
+
+    # The auxiliary channel has a special structure where the group further contains
+    # a multidimensional structure so further aliases are defined below
+    dldAux:
+      format: per_train
+      dataset_key: "/SlowData/hextof/dld/info/Aux"
+      sub_channels:
+        sampleBias:
+          slice: 0
+          dtype: float32
+        tofVoltage:
+          slice: 1
+          dtype: float64
+        extractorVoltage:
+          slice: 2
+        extractorCurrent:
+          slice: 3
+        cryoTemperature:
+          slice: 4
+        sampleTemperature:
+          slice: 5
+        dldTimeBinSize:
+          slice: 15
+
+    vuRead:
+      format: per_train
+      dataset_key: /SlowData/hextof/logic/kmic1/Sample_VURead
+
+
+
+# metadata collection from scicat
+# metadata:
+#   archiver_url: <URL>
+
+# The nexus collection routine shall be finalized soon for both instruments
+# nexus:
+#   reader: "mpes"
+#   definition: "NXmpes"
+#   input_files: ["NXmpes_config-HEXTOF.json"]
diff --git a/tests/data/loader/cfel/config2.yaml b/tests/data/loader/cfel/config2.yaml
new file mode 100644
index 00000000..541830f1
--- /dev/null
+++ b/tests/data/loader/cfel/config2.yaml
@@ -0,0 +1,163 @@
+# This file contains the default configuration for the flash loader.
+
+core:
+  # defines the loader
+  loader: cfel
+  # the beamline where experiment took place
+  beamline: cfel
+  # Since this will run on maxwell most probably, we have a lot of cores at our disposal
+  num_cores: 10
+  # the ID number of the beamtime
+  beamtime_id: 11021732
+  # the year of the beamtime
+  year: 2025
+
+  # The paths to the raw and parquet data directories. If these are not
+  # provided, the loader will try to find the data based on year beamtimeID etc
+  paths:
+    # location of the raw data.
+    raw: "/asap3/fs-flash-o/gpfs/hextof/2025/data/11021732/raw/"
+    # location of the intermediate parquet files.
+    processed: "."
+
+  # The beamtime directories for different DAQ systems.
+  # (Not to be changed by user)
+  beamtime_dir:
+    pg2: "/asap3/flash/gpfs/pg2/"
+    cfel: "/asap3/fs-flash-o/gpfs/hextof/"
+
+
+dataframe:
+  daq: fl1user3                                   # DAQ system name to resolve filenames/paths
+  ubid_offset: 5                                  # Offset correction to the pulseId
+  forward_fill_iterations: 0                      # Number of iterations to fill the pulseId forward
+  split_sector_id_from_dld_time: True             # Remove reserved bits for dldSectorID from dldTimeSteps column
+  sector_id_reserved_bits: 3                      # Bits reserved for dldSectorID in the dldTimeSteps column
+  sector_delays: [0., 0., 0., 0., 0., 0., 0., 0.] # Sector delays
+
+  first_event_time_stamp_key: /ScanParam/StartTime
+  ms_markers_key: /SlowData/exposure_time
+  millis_counter_key: /DLD/millisecCounter
+
+  # Time and binning settings
+  tof_binwidth: 2.0576131995767355E-11            # Base time-of-flight bin width in seconds
+  tof_binning: 8                                  # Binning parameter for time-of-flight data
+
+  # Columns used for jitter correction
+  index: [countId]
+  jitter_cols: [dldPosX, dldPosY, dldTimeSteps]
+  formats: [per_file, per_train, per_electron]
+  fill_formats: [per_train]            # Channels with this format will be forward filled
+
+  # Column settings
+  columns:
+    x: dldPosX
+    corrected_x: X
+    kx: kx
+    y: dldPosY
+    corrected_y: Y
+    ky: ky
+    tof: dldTimeSteps
+    tof_ns: dldTime
+    corrected_tof: tm
+    timestamp: timeStamp
+    auxiliary: dldAux
+    sector_id: dldSectorID
+    delay: delayStage
+    corrected_delay: pumpProbeTime
+
+  units:
+    # These are the units of the columns
+    dldPosX: 'step'
+    dldPosY: 'step'
+    dldTimeSteps: 'step'
+    tof_voltage: 'V'
+    extractorVoltage: 'V'
+    extractorCurrent: 'A'
+    cryoTemperature: 'K'
+    sampleTemperature: 'K'
+    dldTime: 'ns'
+    delay: 'ps'
+    timeStamp: 's'
+    energy: 'eV'
+    E: 'eV'
+    kx: '1/A'
+    ky: '1/A'
+
+  # The channels to load.
+  # channels have the following structure:
+  # <channelAlias>:
+  #   format: per_pulse/per_electron/per_train
+  #   index_key: the hdf5 index key
+  #   dataset_key: the hdf5 dataset key
+  #   slice: int to slice a multidimensional data along axis=1. If not defined, there is no slicing
+  #   dtype: the datatype of the data
+  #   subChannels: further aliases for if the data is multidimensional and needs to be split in different cols
+  #                used currently for the auxiliary channel
+  #      <subChannelAlias>:
+  #        slice: int to slice a multidimensional data along axis=1. Must be defined
+  #        dtype: the datatype of the data
+
+  channels:
+    # event key
+    countId:
+      format: per_file
+      dataset_key: /DLD/NumOfEvents
+    # detector x position
+    dldPosX:
+      format: per_electron
+      dataset_key: /DLD/DLD/xPos
+      # dtype: uint32
+
+    # detector y position
+    dldPosY:
+      format: per_electron
+      dataset_key: /DLD/DLD/yPos
+      # dtype: uint32
+
+    # Detector time-of-flight channel
+    # if split_sector_id_from_dld_time is set to True, This this will generate
+    # also the dldSectorID channel
+    dldTimeSteps:
+      format: per_electron
+      dataset_key: /DLD/DLD/times
+      # dtype: uint32
+
+    # The auxiliary channel has a special structure where the group further contains
+    # a multidimensional structure so further aliases are defined below
+    dldAux:
+      format: per_train
+      dataset_key: "/SlowData/hextof/dld/info/Aux"
+      sub_channels:
+        sampleBias:
+          slice: 0
+          dtype: float32
+        tofVoltage:
+          slice: 1
+          dtype: float64
+        extractorVoltage:
+          slice: 2
+        extractorCurrent:
+          slice: 3
+        cryoTemperature:
+          slice: 4
+        sampleTemperature:
+          slice: 5
+        dldTimeBinSize:
+          slice: 15
+
+    vuRead:
+      format: per_train
+      dataset_key: /SlowData/hextof/logic/kmic1/Sample_VURead
+
+
+
+# metadata collection from scicat
+# metadata:
+#   archiver_url: <URL>
+
+# The nexus collection routine shall be finalized soon for both instruments
+# nexus:
+#   reader: "mpes"
+#   definition: "NXmpes"
+#   input_files: ["NXmpes_config-HEXTOF.json"]
diff --git a/tests/data/loader/flash/config.yaml b/tests/data/loader/flash/config.yaml
index fbbcba25..90101c81 100644
--- a/tests/data/loader/flash/config.yaml
+++ b/tests/data/loader/flash/config.yaml
@@ -31,6 +31,7 @@ core:
   # (Not to be changed by user)
   beamtime_dir:
     pg2: "/asap3/flash/gpfs/pg2/"
+    cfel: "/asap3/fs-flash-o/gpfs/hextof/"
 
 
 dataframe:
@@ -52,6 +53,10 @@ dataframe:
   sector_delays: [0., 0., 0., 0., 0., 0., 0., 0.]
 
   jitter_cols: ["dldPosX", "dldPosY", "dldTimeSteps"]
+  # The index and formats of the data
+  index: [trainId, pulseId, electronId]
+  formats: [per_train, per_pulse, per_electron]
+  fill_formats: [per_train, per_pulse]            # Channels with this format will be forward filled
   columns:
     x: dldPosX
     corrected_x: X
diff --git a/tests/loader/cfel/__init__.py b/tests/loader/cfel/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/loader/cfel/conftest.py b/tests/loader/cfel/conftest.py
new file mode 100644
index 00000000..e11a4d0d
--- /dev/null
+++ b/tests/loader/cfel/conftest.py
@@ -0,0 +1,92 @@
+""" This module contains fixtures for the CFEL module tests.
+"""
+import os
+import shutil
+from pathlib import Path
+
+import h5py
+import pytest
+
+from sed.core.config import parse_config
+
+test_dir = os.path.join(os.path.dirname(__file__), "../..")
+# Use CFEL config instead of FLASH config
+config_path = os.path.join(test_dir, "data/loader/cfel/config2.yaml")
+# Use CFEL test data paths
+H5_PATH = "20250411_12h34m03s185_000123.h5"
+H5_PATHS = [H5_PATH]  
+
+
+@pytest.fixture
+def config():
+    config_dict = parse_config(
+        config=config_path,
+        user_config=None,
+        system_config=None,
+    )
+    
+
+    return config_dict
+
+
+@pytest.fixture(name="config_dataframe")
+def fixture_config_file_dataframe() -> dict:
+    """Fixture providing a configuration file for CFELLoader tests.
+
+    Returns:
+        dict: The parsed configuration file.
+    """
+    return parse_config(config_path, folder_config={}, user_config={}, system_config={})[
+        "dataframe"
+    ]
+
+
+@pytest.fixture(name="h5_file")
+def fixture_h5_file() -> h5py.File:
+    """Fixture providing an open h5 file.
+
+    Returns:
+        h5py.File: The open h5 file.
+    """
+    return h5py.File(os.path.join(test_dir, f"data/loader/cfel/{H5_PATH}"), "r")
+
+
+@pytest.fixture(name="h5_file_copy")
+def fixture_h5_file_copy(tmp_path: Path) -> h5py.File:
+    """Fixture providing a copy of an open h5 file.
+
+    Returns:
+        h5py.File: The open h5 file copy.
+    """
+    # Create a copy of the h5 file in a temporary directory
+    original_file_path = os.path.join(test_dir, f"data/loader/cfel/{H5_PATH}")
+    copy_file_path = tmp_path / "copy.h5"
+    shutil.copyfile(original_file_path, copy_file_path)
+
+    return h5py.File(copy_file_path, "r+")
+
+
+@pytest.fixture(name="h5_file2_copy")
+def fixture_h5_file2_copy(tmp_path: Path) -> h5py.File:
+    """Fixture providing a copy of an open h5 file.
+
+    Returns:
+        h5py.File: The open h5 file copy.
+    """
+    # Create a copy of the h5 file in a temporary directory
+    original_file_path = os.path.join(test_dir, f"data/loader/cfel/{H5_PATHS[0] if len(H5_PATHS) > 1 else H5_PATH}")  # Use first file if multiple, else single file
+    copy_file_path = tmp_path / "copy2.h5"
+    shutil.copyfile(original_file_path, copy_file_path)
+
+    # Open the copy in 'read-write' mode and return it
+    return h5py.File(copy_file_path, "r+")
+
+
+@pytest.fixture(name="h5_paths")
+def fixture_h5_paths() -> list[Path]:
+    """Fixture providing a list of h5 file paths.
+
+    Returns:
+        list: A list of h5 file paths.
+    """
+    return [Path(os.path.join(test_dir, f"data/loader/cfel/{path}")) for path in H5_PATHS]
diff --git a/tests/loader/cfel/test_buffer_handler.py b/tests/loader/cfel/test_buffer_handler.py
new file mode 100644
index 00000000..97af4ce6
--- /dev/null
+++ b/tests/loader/cfel/test_buffer_handler.py
@@ -0,0 +1,372 @@
+"""Test cases for the BufferHandler class in the Flash module."""
+from copy import deepcopy
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
+import pytest
+from h5py import File
+
+from sed.loader.cfel.buffer_handler import BufferFilePaths
+from sed.loader.cfel.buffer_handler import BufferHandler
+from sed.loader.cfel.dataframe import DataFrameCreator
+from sed.loader.cfel.loader import CFELLoader
+from sed.loader.flash.utils import get_channels
+from sed.loader.flash.utils import InvalidFileError
+
+
+def create_parquet_dir(config: dict, folder: str) -> Path:
+    """
+    Creates a directory for storing Parquet files based on the provided configuration
+    and folder name.
+    """
+
+    parquet_path = Path(config["core"]["paths"]["processed"])
+    parquet_path = parquet_path.joinpath(folder)
+    parquet_path.mkdir(parents=True, exist_ok=True)
+    return parquet_path
+
+
+def test_buffer_file_paths(config: dict, h5_paths: list[Path]) -> None:
+    """
+    Test the BufferFilePath's ability to identify files that need to be read and
+    manage buffer file paths using a directory structure.
+
+    This test performs several checks to ensure the BufferFilePath correctly identifies
+    which HDF5 files need to be read and properly manages the paths for saving buffer
+    files. It follows these steps:
+    1. Creates a directory structure for storing buffer files and initializes the BufferHandler.
+    2. Checks if the file_sets_to_process method populates the dict of missing file sets and
+       verify that initially, all provided files are considered missing.
+    3. Checks that the paths for saving buffer files are correctly generated.
+    4. Creates a single buffer file and reruns file_sets_to_process to ensure that the BufferHandler
+        recognizes one less missing file.
+    5. Checks if the force_recreate parameter forces the BufferHandler to consider all files
+    6. Cleans up by removing the created buffer file.
+    7. Tests the handling of suffix in buffer file names (for multidetector setups) by rerunning
+        the checks with modified file name parameters.
+    """
+    folder = create_parquet_dir(config, "get_files_to_read")
+    fp = BufferFilePaths(h5_paths, folder, suffix="")
+
+    # check that all files are to be read
+    assert len(fp.file_sets_to_process()) == len(h5_paths)
+    # create expected paths
+    expected_buffer_electron_paths = [
+        folder / f"buffer/electron_{Path(path).stem}" for path in h5_paths
+    ]
+    expected_buffer_timed_paths = [folder / f"buffer/timed_{Path(path).stem}" for path in h5_paths]
+
+    # check that all buffer paths are correct
+    assert np.all(fp["electron"] == expected_buffer_electron_paths)
+    assert np.all(fp["timed"] == expected_buffer_timed_paths)
+
+    # create a single buffer file to check if it changes
+    path = {
+        "raw": h5_paths[0],
+        "electron": expected_buffer_electron_paths[0],
+        "timed": expected_buffer_timed_paths[0],
+    }
+    bh = BufferHandler(config)
+    bh._save_buffer_file(path, is_first_file=True, base_timestamp=None)
+
+    # check again for files to read and expect one less file
+    fp = BufferFilePaths(h5_paths, folder, suffix="")
+    # check that only one file is to be read
+    assert len(fp.file_sets_to_process()) == len(h5_paths) - 1
+
+    # check that both files are to be read if force_recreate is set to True
+    assert len(fp.file_sets_to_process(force_recreate=True)) == len(h5_paths)
+
+    # remove buffer files
+    Path(path["electron"]).unlink()
+    Path(path["timed"]).unlink()
+
+    # Test for adding a suffix
+    fp = BufferFilePaths(h5_paths, folder, "suffix")
+
+    # expected buffer paths with prefix and suffix
+    for typ in ["electron", "timed"]:
+        expected_buffer_paths = [
+            folder / "buffer" / f"{typ}_{Path(path).stem}_suffix" for path in h5_paths
+        ]
+        assert np.all(fp[typ] == expected_buffer_paths)
+
+
+def test_buffer_schema_mismatch(config: dict, h5_paths: list[Path]) -> None:
+    """
+    Test schema mismatch handling in BufferHandler / CFEL loader.
+
+    Steps:
+    1) Channel exists in config but NOT in HDF5 → expect InvalidFileError.
+    2) Same situation, but ignored via remove_invalid_files=True → should succeed.
+    3) True schema mismatch (parquet has column not in config) → expect ValueError.
+    """
+    from copy import deepcopy
+
+    # --------------------------------------------------
+    # Step 1: HDF5 missing channel → InvalidFileError
+    # --------------------------------------------------
+    folder_step1 = create_parquet_dir(config, "schema_mismatch_step1")
+    config_missing_channel = deepcopy(config)
+    config_missing_channel["dataframe"]["channels"]["gmdTunnel2"] = {
+        "dataset_key": "/some/cfel/test/dataset",
+        "format": "per_train",
+    }
+
+    with pytest.raises(InvalidFileError) as exc:
+        bh = BufferHandler(config_missing_channel)
+        bh.process_and_load_dataframe(
+            h5_paths=h5_paths,
+            folder=folder_step1,
+            debug=True,
+            force_recreate=True,   # ← THIS IS REQUIRED
+        )
+    
+    assert "gmdTunnel2" in str(exc.value)
+
+    # --------------------------------------------------
+    # Step 2: Same missing channel, but ignored
+    # All files become invalid → no buffers → FileNotFoundError
+    # --------------------------------------------------
+    folder_step2 = create_parquet_dir(config, "schema_mismatch_step2")
+    
+    # create buffer files normally
+    bh_base = BufferHandler(config)
+    bh_base.process_and_load_dataframe(
+        h5_paths=h5_paths,
+        folder=folder_step2,
+        debug=True,
+        force_recreate=True,
+    )
+    
+    # now re-run with missing channel ignored
+    bh_missing = BufferHandler(config_missing_channel)
+    bh_missing.process_and_load_dataframe(
+        h5_paths=h5_paths,
+        folder=folder_step2,
+        debug=True,
+        remove_invalid_files=True,
+        force_recreate=True,
+    )
+    
+    # correct post-condition
+    assert bh_missing.df["electron"] is None
+    assert bh_missing.df["timed"] is None
+
+    # --------------------------------------------------
+    # Step 3: TRUE schema mismatch → ValueError
+    # --------------------------------------------------
+    
+    folder_step3 = create_parquet_dir(config, "schema_mismatch_step3")
+    
+    # choose a REAL channel that exists in HDF5
+    removed_channel = "dldPosX"
+    assert removed_channel in config["dataframe"]["channels"]
+    
+    # 1) create parquet normally (with that channel)
+    bh_base = BufferHandler(config)
+    bh_base.process_and_load_dataframe(
+        h5_paths=h5_paths,
+        folder=folder_step3,
+        debug=True,
+        force_recreate=True,
+    )
+    
+    # 2) remove the channel from config
+    config_removed = deepcopy(config)
+    del config_removed["dataframe"]["channels"][removed_channel]
+    
+    # 3) reload → schema mismatch
+    with pytest.raises(ValueError) as exc:
+        bh_removed = BufferHandler(config_removed)
+        bh_removed.process_and_load_dataframe(
+            h5_paths=h5_paths,
+            folder=folder_step3,
+            debug=True,
+        )
+    
+    msg = str(exc.value).lower()
+    assert "available channels do not match the schema" in msg
+    assert "missing in parquet" in msg or "missing" in msg
+
+
+def test_save_buffer_files(config: dict, h5_paths: list[Path]) -> None:
+    """
+    Test the BufferHandler's ability to save buffer files serially and in parallel.
+
+    This test ensures that the BufferHandler can run both serially and in parallel, saving the
+    output to buffer files, and then it compares the resulting DataFrames to ensure they are
+    identical. This verifies that parallel processing does not affect the integrity of the data
+    saved. After the comparison, it cleans up by removing the created buffer files.
+    """
+    folder_serial = create_parquet_dir(config, "save_buffer_files_serial")
+    bh_serial = BufferHandler(config)
+    bh_serial.process_and_load_dataframe(h5_paths, folder_serial, debug=True)
+
+    folder_parallel = create_parquet_dir(config, "save_buffer_files_parallel")
+    bh_parallel = BufferHandler(config)
+    bh_parallel.process_and_load_dataframe(h5_paths, folder_parallel)
+
+    df_serial = pd.read_parquet(folder_serial)
+    df_parallel = pd.read_parquet(folder_parallel)
+
+    pd.testing.assert_frame_equal(df_serial, df_parallel)
+
+    # remove buffer files
+    for df_type in ["electron", "timed"]:
+        for path in bh_serial.fp[df_type]:
+            path.unlink()
+        for path in bh_parallel.fp[df_type]:
+            path.unlink()
+
+def test_save_buffer_files_exception(
+    config: dict,
+    h5_paths: list[Path],
+    h5_file_copy: File,
+    h5_file2_copy: File,
+    tmp_path: Path,
+) -> None:
+    """Test BufferHandler exception handling for missing keys and empty datasets."""
+
+    folder = create_parquet_dir(config, "save_buffer_files_exception")
+    config_ = deepcopy(config)
+
+    # --------------------------------------------------
+    # 1) Missing dataset_key in config → ValueError
+    # --------------------------------------------------
+    channel = "dldPosX"
+    del config_["dataframe"]["channels"][channel]["dataset_key"]
+
+    with pytest.raises(ValueError):
+        bh = BufferHandler(config_)
+        bh.process_and_load_dataframe(
+            h5_paths, folder, debug=False
+        )
+
+    # --------------------------------------------------
+    # 2) Empty dataset → InvalidFileError
+    # --------------------------------------------------
+    config_ = deepcopy(config)
+    empty_channel = "testChannel"
+    empty_dataset_key = "test/dataset/empty/value"
+
+    config_["dataframe"]["channels"][empty_channel] = {
+        "dataset_key": empty_dataset_key,
+        "format": "per_train",
+    }
+
+    # create empty dataset in first HDF5 file
+    h5_file_copy.create_dataset(name=empty_dataset_key, shape=(0,))
+
+    # Expect InvalidFileError because dataset is empty
+    with pytest.raises(InvalidFileError):
+        bh = BufferHandler(config_)
+        bh.process_and_load_dataframe(
+            [tmp_path / "copy.h5"],
+            folder,
+            debug=False,
+            force_recreate=True,
+        )
+
+    # --------------------------------------------------
+    # 3) remove_invalid_files=True → no error, only invalid files are skipped
+    # --------------------------------------------------
+    # add empty dataset to second HDF5 file
+    h5_file2_copy.create_dataset(name=empty_dataset_key, shape=(0,))
+
+    bh = BufferHandler(config_)
+    bh.process_and_load_dataframe(
+        [tmp_path / "copy.h5", tmp_path / "copy2.h5"],
+        folder,
+        debug=False,
+        force_recreate=True,
+        remove_invalid_files=True,
+    )
+
+    # When all files are invalid, the DataFrames should be None
+    assert bh.df["electron"] is None
+    assert bh.df["timed"] is None
+
+    # --------------------------------------------------
+    # 4) Single invalid file → nothing valid to load
+    # --------------------------------------------------
+    # Only provide one invalid file    
+    bh.process_and_load_dataframe(
+        [tmp_path / "copy.h5"],
+        folder,
+        debug=False,
+        force_recreate=True,
+        remove_invalid_files=True,
+    )
+    
+    assert bh.df["electron"] is None
+    assert bh.df["timed"] is None
+
+
+def test_get_filled_dataframe(config: dict, h5_paths: list[Path]) -> None:
+    """Test function to verify the creation of a filled dataframe from the buffer files."""
+    folder = create_parquet_dir(config, "get_filled_dataframe")
+    bh = BufferHandler(config)
+    bh.process_and_load_dataframe(h5_paths, folder)
+
+    df = pd.read_parquet(folder)
+
+    # The buffer handler's electron dataframe may have additional derived columns
+    # like dldSectorID that aren't in the saved parquet file
+    expected_columns = set(list(df.columns) + ["timeStamp", "countId", "dldSectorID"])
+    assert set(bh.df["electron"].columns).issubset(expected_columns)
+
+    # For CFEL, check that the timed dataframe contains per_train channels and timestamp
+    # but excludes per_electron channels (this is CFEL-specific behavior)
+    per_train_channels = set(get_channels(config["dataframe"], formats=["per_train"], extend_aux=True))
+    per_electron_channels = set(get_channels(config["dataframe"], formats=["per_electron"]))
+    
+    timed_columns = set(bh.df["timed"].columns)
+    
+    # Timed should include per_train channels and timestamp
+    assert per_train_channels.issubset(timed_columns)
+    assert "timeStamp" in timed_columns
+    
+    # Check that we can read the data
+    assert len(df) > 0
+    assert len(bh.df["electron"]) > 0
+    assert len(bh.df["timed"]) > 0
+    # remove buffer files
+    for df_type in ["electron", "timed"]:
+        for path in bh.fp[df_type]:
+            path.unlink()
+
+
+def test_cfel_multi_file_handling(config: dict, h5_paths: list[Path]) -> None:
+    """Test CFEL's multi-file timestamp handling."""
+    folder = create_parquet_dir(config, "multi_file_handling")
+    bh = BufferHandler(config)
+    
+    # Test that multi-file processing works with timestamp coordination
+    bh.process_and_load_dataframe(h5_paths=h5_paths, folder=folder, debug=True)
+    
+    # Verify that timestamps are properly coordinated across files
+    df = pd.read_parquet(folder)
+    assert "timeStamp" in df.columns  # CFEL uses timeStamp, not timestamp
+    
+    # Clean up
+    for df_type in ["electron", "timed"]:
+        for path in bh.fp[df_type]:
+            path.unlink()
+
+def test_cfel_timestamp_base_handling(config: dict, h5_paths: list[Path]) -> None:
+    """Test CFEL's base timestamp extraction and handling."""
+    if len(h5_paths) > 1:
+        # Test with multiple files to verify base timestamp logic
+        folder = create_parquet_dir(config, "timestamp_base")
+        bh = BufferHandler(config)
+        bh.process_and_load_dataframe(h5_paths=h5_paths, folder=folder, debug=True)
+        
+        # Verify processing completed successfully
+        assert len(bh.fp["electron"]) == len(h5_paths)
+        
+        # Clean up
+        for df_type in ["electron", "timed"]:
+            for path in bh.fp[df_type]:
+                path.unlink()
diff --git a/tests/loader/cfel/test_cfel_loader.py b/tests/loader/cfel/test_cfel_loader.py
new file mode 100644
index 00000000..1127182e
--- /dev/null
+++ b/tests/loader/cfel/test_cfel_loader.py
@@ -0,0 +1,253 @@
+"""Tests for CFEL Loader functionality"""
+from __future__ import annotations
+
+import os
+from pathlib import Path
+from typing import Literal
+
+import pytest
+
+from .test_buffer_handler import create_parquet_dir
+from sed.loader.cfel.loader import CFELLoader
+
+
+@pytest.mark.parametrize(
+    "sub_dir",
+    ["online-0/fl1user3/", "express-0/fl1user3/", "FL1USER3/"],
+)
+def test_initialize_dirs(
+    config: dict,
+    fs,
+    sub_dir: Literal["online-0/fl1user3/", "express-0/fl1user3/", "FL1USER3/"],
+) -> None:
+    """
+    Test the initialization of paths based on the configuration and directory structures.
+
+    Args:
+    fs: A fixture for a fake file system.
+    sub_dir (Literal["online-0/fl1user3/", "express-0/fl1user3/", "FL1USER3/"]): Sub-directory.
+    """
+    config_ = config.copy()
+    del config_["core"]["paths"]
+    config_["core"]["beamtime_id"] = "12345678"
+    config_["core"]["year"] = "2000"
+
+    # Find base path of beamline from config. Here, we use cfel for CFEL loader
+    base_path = config_["core"]["beamtime_dir"]["cfel"]
+    expected_path = (
+        Path(base_path) / config_["core"]["year"] / "data" / config_["core"]["beamtime_id"]
+    )
+    # Create expected paths
+    expected_raw_path = expected_path / "raw" / sub_dir
+    expected_processed_path = expected_path / "processed"
+
+    # Create a fake file system for testing
+    fs.create_dir(expected_raw_path)
+    fs.create_dir(expected_processed_path)
+
+    # Instance of class with correct config and call initialize_dirs
+    fl = CFELLoader(config=config_)
+    fl._initialize_dirs()
+    assert str(expected_raw_path) == fl.raw_dir
+    assert str(expected_processed_path) == fl.processed_dir
+
+    # remove beamtime_id, year and daq from config to raise error
+    del config_["core"]["beamtime_id"]
+    with pytest.raises(ValueError) as e:
+        fl._initialize_dirs()
+    assert "The beamtime_id and year are required." in str(e.value)
+
+
+def test_initialize_dirs_filenotfound(config: dict) -> None:
+    """
+    Test FileNotFoundError during the initialization of paths.
+    """
+    # Test the FileNotFoundError
+    config_ = config.copy()
+    del config_["core"]["paths"]
+    config_["core"]["beamtime_id"] = "11111111"
+    config_["core"]["year"] = "2000"
+
+    # Instance of class with correct config and call initialize_dirs
+    with pytest.raises(FileNotFoundError):
+        fl = CFELLoader(config=config_)
+        fl._initialize_dirs()
+
+
+def test_save_read_parquet_cfel(config: dict) -> None:
+    """
+    Test the functionality of saving and reading parquet files with CFELLoader.
+
+    This test performs three main actions:
+    1. First call to create and read parquet files. Verifies new files are created.
+    2. Second call with the same parameters to check that it only reads from
+    the existing parquet files without creating new ones. It asserts that the files' modification
+    times remain unchanged, indicating no new files were created or existing files overwritten.
+    3. Third call with `force_recreate=True` to force the recreation of parquet files.
+    It verifies that the files were indeed overwritten by checking that their modification
+    times have changed.
+    """
+    config_ = config.copy()
+    data_parquet_dir = create_parquet_dir(config_, "cfel_save_read")
+    config_["core"]["paths"]["processed"] = data_parquet_dir
+    # Update the raw path to point to the CFEL test data directory
+    config_["core"]["paths"]["raw"] = "tests/data/loader/cfel/"
+    fl = CFELLoader(config=config_)
+
+    # First call: should create and read the parquet file
+    df1, _, _ = fl.read_dataframe(runs=[123], force_recreate=True)#was runs = [179]
+    # Check if new files were created
+    data_parquet_dir = data_parquet_dir.joinpath("buffer")
+    new_files = {
+        file: os.path.getmtime(data_parquet_dir.joinpath(file))
+        for file in os.listdir(data_parquet_dir)
+    }
+    assert new_files
+
+    # Second call: should only read the parquet file, not create new ones
+    df2, _, _ = fl.read_dataframe(runs=[123])
+
+    # Verify no new files were created after the second call
+    final_files = {
+        file: os.path.getmtime(data_parquet_dir.joinpath(file))
+        for file in os.listdir(data_parquet_dir)
+    }
+    assert (
+        new_files == final_files
+    ), "Files were overwritten or new files were created after the second call."
+
+    # Third call: We force_recreate the parquet files
+    df3, _, _ = fl.read_dataframe(runs=[123], force_recreate=True)
+
+    # Verify files were overwritten
+    new_files = {
+        file: os.path.getmtime(data_parquet_dir.joinpath(file))
+        for file in os.listdir(data_parquet_dir)
+    }
+    assert new_files != final_files, "Files were not overwritten after the third call."
+
+    # remove the parquet files
+    for file in new_files:
+        data_parquet_dir.joinpath(file).unlink()
+
+
+def test_get_elapsed_time_fid(config: dict) -> None:
+    """Test get_elapsed_time method of CFELLoader class"""
+    # Create an instance of CFELLoader
+    fl = CFELLoader(config=config)
+
+    # Mock the file_statistics and files
+    fl.metadata = {
+        "file_statistics": {
+            "timed": {
+                "0": {"columns": {"timeStamp": {"min": 10, "max": 20}}},
+                "1": {"columns": {"timeStamp": {"min": 20, "max": 30}}},
+                "2": {"columns": {"timeStamp": {"min": 30, "max": 40}}},
+            },
+        },
+    }
+    fl.files = ["file0", "file1", "file2"]
+
+    # -------------------------
+    # Aggregate=True → sum differences
+    # -------------------------
+    elapsed_total = fl.get_elapsed_time(fids=[0, 1], aggregate=True)
+    expected_total = (20 - 10) + (30 - 20)  # 20
+    assert elapsed_total == expected_total
+
+    # -------------------------
+    # Aggregate=False → list of per-file differences
+    # -------------------------
+    elapsed_list = fl.get_elapsed_time(fids=[0, 1], aggregate=False)
+    expected_list = [(20 - 10), (30 - 20)]  # [10, 10]
+    assert elapsed_list == expected_list
+
+    # -------------------------
+    # Test KeyError when file_statistics is missing
+    # -------------------------
+    fl.metadata = {"something": "else"}
+    with pytest.raises(KeyError) as e:
+        fl.get_elapsed_time(fids=[0, 1])
+    assert "File statistics missing. Use 'read_dataframe' first." in str(e.value)
+
+    # -------------------------
+    # Test KeyError when timeStamp metadata is missing for a file
+    # -------------------------
+    fl.metadata = {
+        "file_statistics": {
+            "timed": {
+                "0": {},
+                "1": {"columns": {"timeStamp": {"min": 20, "max": 30}}},
+            },
+        },
+    }
+    with pytest.raises(KeyError) as e:
+        fl.get_elapsed_time(fids=[0, 1])
+    assert "Timestamp metadata missing in file file0 (fid=0)" in str(e.value)
+
+
+def test_get_elapsed_time_run(config: dict) -> None:
+    """Test get_elapsed_time method for runs with multiple files"""
+    config_ = config.copy()
+    data_parquet_dir = create_parquet_dir(config_, "get_elapsed_time_run")
+    config_["core"]["paths"]["processed"] = data_parquet_dir
+    config_["core"]["paths"]["raw"] = "tests/data/loader/cfel/"
+
+    # Create an instance of CFELLoader
+    fl = CFELLoader(config=config_)
+
+    # Read dataframe for run 123
+    fl.read_dataframe(runs=[123])
+
+    # Extract expected elapsed times per file from metadata
+    file_stats = fl.metadata["file_statistics"]["electron"]
+    expected_elapsed_list = [
+        file_stats[str(fid)]["columns"]["timeStamp"]["max"]
+        - file_stats[str(fid)]["columns"]["timeStamp"]["min"]
+        for fid in range(len(fl.files))
+    ]
+
+    # -------------------------
+    # Aggregate=False → list of per-file elapsed times
+    # -------------------------
+    elapsed_list = fl.get_elapsed_time(runs=[123], aggregate=False)
+    assert elapsed_list == expected_elapsed_list
+
+    # -------------------------
+    # Aggregate=True → sum of per-file elapsed times
+    # -------------------------
+    elapsed_total = fl.get_elapsed_time(runs=[123], aggregate=True)
+    expected_total = sum(expected_elapsed_list)
+    assert elapsed_total == expected_total
+
+    # -------------------------
+    # Remove the parquet files created during test
+    # -------------------------
+    buffer_dir = Path(fl.processed_dir, "buffer")
+    if buffer_dir.exists():
+        for file in buffer_dir.iterdir():
+            file.unlink()
+
+
+def test_available_runs(monkeypatch: pytest.MonkeyPatch, config: dict) -> None:
+    """Test available_runs property of CFELLoader class"""
+    # Create an instance of CFELLoader
+    fl = CFELLoader(config=config)
+
+    # Mock the raw_dir and files
+    fl.raw_dir = "/path/to/raw_dir"
+    files = [
+        "run1_file1.h5",
+        "run3_file1.h5",
+        "run2_file1.h5",
+        "run1_file2.h5",
+    ]
+
+    # Mock the glob method to return the mock files
+    def mock_glob(*args, **kwargs):  # noqa: ARG001
+        return [Path(fl.raw_dir, file) for file in files]
+
+    monkeypatch.setattr(Path, "glob", mock_glob)
+
+    # Test available_runs
+    assert fl.available_runs == [1, 2, 3]
diff --git a/tests/loader/cfel/test_dataframe_creator.py b/tests/loader/cfel/test_dataframe_creator.py
new file mode 100644
index 00000000..dc04d24d
--- /dev/null
+++ b/tests/loader/cfel/test_dataframe_creator.py
@@ -0,0 +1,212 @@
+"""Tests for DataFrameCreator functionality (per_file, per_train, per_electron)"""
+from pathlib import Path
+
+import h5py
+import numpy as np
+import pytest
+import pandas as pd
+
+from sed.loader.cfel.dataframe import DataFrameCreator
+from sed.loader.flash.utils import get_channels
+
+
+def test_get_dataset_key(config_dataframe: dict, h5_paths: list[Path]) -> None:
+    df = DataFrameCreator(config_dataframe, h5_paths[0])
+    channel = "dldPosX"
+    dataset_key = df.get_dataset_key(channel)
+    assert dataset_key == config_dataframe["channels"][channel]["dataset_key"]
+
+    config_copy = config_dataframe.copy()
+    del config_copy["channels"][channel]["dataset_key"]
+    df2 = DataFrameCreator(config_copy, h5_paths[0])
+    with pytest.raises(ValueError):
+        df2.get_dataset_key(channel)
+
+
+def test_get_dataset_array(config_dataframe: dict, h5_paths: list[Path]) -> None:
+    df = DataFrameCreator(config_dataframe, h5_paths[0])
+    for channel in config_dataframe["channels"]:
+        dset = df.get_dataset_array(channel)
+        assert isinstance(dset, h5py.Dataset)
+        assert dset.shape[0] > 0
+
+
+def test_df_per_file(config_dataframe: dict, h5_paths: list[Path]) -> None:
+    """Test per_file data (countId index)"""
+    df = DataFrameCreator(config_dataframe, h5_paths[0])
+    per_file_channels = get_channels(config_dataframe, "per_file")
+    if not per_file_channels:
+        pytest.skip("No per_file channels in config")
+
+    # Index should be countId
+    df_file = df.df  # combined DataFrame includes per_file data
+    assert "countId" in df_file.index.names or df_file.index.name == "countId"
+
+    # All per_file columns exist in df
+    for ch in per_file_channels:
+        assert ch in df_file.columns
+
+
+def test_df_train(config_dataframe: dict, h5_paths: list[Path]) -> None:
+    """Test df_train (per_train channels)"""
+    df = DataFrameCreator(config_dataframe, h5_paths[0])
+    per_train_channels = get_channels(config_dataframe, "per_train")
+    aux_alias = config_dataframe.get("aux_alias", "dldAux")
+    if aux_alias in config_dataframe["channels"]:
+        subchannels = config_dataframe["channels"][aux_alias].get("sub_channels", {})
+        per_train_channels.extend(subchannels.keys())
+
+    if not per_train_channels:
+        pytest.skip("No per_train channels in config")
+
+    df_train = df.df_train
+    assert isinstance(df_train, pd.DataFrame)
+
+    # Index should be single-level trainId (because no pulseId/electronId in current code)
+    assert df_train.index.name == "trainId" or df_train.index.name is None
+
+    # Columns check
+    assert set(df_train.columns).issubset(set(per_train_channels))
+
+
+def test_df_electron(config_dataframe: dict, h5_paths: list[Path]) -> None:
+    """Test df_electron (per_electron channels)"""
+    df = DataFrameCreator(config_dataframe, h5_paths[0])
+    per_electron_channels = get_channels(config_dataframe, "per_electron")
+    if not per_electron_channels:
+        pytest.skip("No per-electron channels in config")
+
+    df_elec = df.df_electron
+    assert isinstance(df_elec, pd.DataFrame)
+
+    # Index can be RangeIndex (single-level) if trainId/electronId not implemented
+    idx = df_elec.index
+    assert idx is not None
+    # Columns
+    assert set(df_elec.columns).issubset(set(per_electron_channels))
+    # No NaNs
+    assert not df_elec.isnull().values.any()
+
+# def test_df_electron(config_dataframe: dict, h5_paths: list[Path]) -> None:
+#     """Test df_electron (per_electron channels)"""
+#     df = DataFrameCreator(config_dataframe, h5_paths[0])
+#     per_electron_channels = get_channels(config_dataframe, "per_electron")
+#     if not per_electron_channels:
+#         pytest.skip("No per-electron channels in config")
+
+#     df_elec = df.df_electron
+#     assert isinstance(df_elec, pd.DataFrame)
+#     # MultiIndex: trainId + electronId
+#     idx = df_elec.index
+#     assert isinstance(idx, pd.MultiIndex)
+#     assert set(idx.names) == {"trainId", "electronId"}
+
+#     # Columns
+#     assert set(df_elec.columns).issubset(set(per_electron_channels))
+#     # No NaNs
+#     assert not df_elec.isnull().values.any()
+
+
+def test_df_timestamp(config_dataframe: dict, h5_paths: list[Path]) -> None:
+    """Test timestamp DataFrame"""
+    df = DataFrameCreator(config_dataframe, h5_paths[0])
+    ts_df = df.df_timestamp
+    assert isinstance(ts_df, pd.DataFrame)
+    ts_col = config_dataframe["columns"].get("timestamp", "timeStamp")
+    assert ts_col in ts_df.columns
+    # Length matches main index
+    assert ts_df.shape[0] == len(df.index)
+
+
+def test_df_combined(config_dataframe: dict, h5_paths: list[Path]) -> None:
+    dfc = DataFrameCreator(config_dataframe, h5_paths[0])
+    df = dfc.df
+
+    assert isinstance(df, pd.DataFrame)
+
+    df_elec = dfc.df_electron
+    df_train = dfc.df_train
+    df_ts = dfc.df_timestamp
+
+    # 1) All electron rows must be present in the combined DF
+    assert df_elec.index.isin(df.index).all()
+
+    # 2) Electron values must be unchanged (dtype upcast is OK)
+    pd.testing.assert_frame_equal(
+        df.loc[df_elec.index, df_elec.columns],
+        df_elec,
+        check_dtype=False,
+    )
+
+    # 3) Columns must be the union
+    expected_cols = (
+        set(df_elec.columns)
+        | set(df_train.columns)
+        | set(df_ts.columns)
+    )
+    assert set(df.columns) == expected_cols
+
+    # 4) per_train + timestamp columns must be forward-filled
+    ffill_cols = list(df_train.columns) + list(df_ts.columns)
+    assert not df[ffill_cols].isna().any().any()
+
+# def test_df_combined(config_dataframe: dict, h5_paths: list[Path]) -> None:
+#     """Test df property (combined DataFrame)"""
+#     df = DataFrameCreator(config_dataframe, h5_paths[0])
+#     combined = df.df
+#     assert isinstance(combined, pd.DataFrame)
+
+#     # Columns = per_file + per_train + per_electron + timestamp
+#     expected_cols = set()
+#     try:
+#         expected_cols.update(get_channels(config_dataframe, "per_file"))
+#     except ValueError:
+#         pass
+#     try:
+#         expected_cols.update(get_channels(config_dataframe, "per_train"))
+#     except ValueError:
+#         pass
+#     try:
+#         expected_cols.update(get_channels(config_dataframe, "per_electron"))
+#     except ValueError:
+#         pass
+#     expected_cols.add(config_dataframe["columns"].get("timestamp", "timeStamp"))
+
+#     # Columns in combined are subset of expected
+#     assert set(combined.columns).issubset(expected_cols)
+
+
+def test_group_name_not_in_h5(
+    config_dataframe: dict,
+    h5_paths: list[Path],
+) -> None:
+    """Test error when dataset_key does not exist in H5 file."""
+
+    # Pick a non-index channel
+    channel = next(
+        ch for ch in config_dataframe["channels"]
+        if ch != config_dataframe.get("index", ["countId"])[0]
+    )
+
+    # Deep copy only what we mutate
+    config = dict(config_dataframe)
+    config["channels"] = dict(config_dataframe["channels"])
+    config["channels"][channel] = dict(config_dataframe["channels"][channel])
+
+    # Break ONLY this channel
+    config["channels"][channel]["dataset_key"] = "/this/does/not/exist"
+
+    dfc = DataFrameCreator(config, h5_paths[0])
+
+    with pytest.raises(KeyError):
+        _ = dfc.get_dataset_array(channel)
+
+# def test_group_name_not_in_h5(config_dataframe: dict, h5_paths: list[Path]) -> None:
+#     """Test KeyError when a dataset_key is missing"""
+#     channel = "dldPosX"
+#     config = config_dataframe.copy()
+#     config["channels"][channel]["dataset_key"] = "non_existent_dataset"
+
+#     df = DataFrameCreator(config, h5_paths[0])
+#     with pytest.raises(KeyError):
+#         _ = df.df_train
diff --git a/tests/loader/cfel/test_metadata.py b/tests/loader/cfel/test_metadata.py
new file mode 100644
index 00000000..8431c0bb
--- /dev/null
+++ b/tests/loader/cfel/test_metadata.py
@@ -0,0 +1,64 @@
+
+import pytest
+from unittest.mock import MagicMock, patch
+from sed.loader.cfel.loader import CFELLoader
+from sed.core.config import parse_config
+import os
+
+# Dummy config
+config = {
+    "core": {
+        "instrument": "hextof",
+        "beamtime_id": "12345",
+        "year": "2024",
+        "beamline": "pg2",
+        "beamtime_dir": {"pg2": "/tmp/beamtime"},
+        "paths": {"raw": "/tmp/raw"}
+    },
+    "dataframe": {
+        "daq": "fadc"
+    },
+    "metadata": {
+        "scicat_url": "http://fake.url"
+    }
+}
+
+@pytest.fixture
+def loader():
+    return CFELLoader(config=config)
+
+def test_parse_scicat_metadata(loader):
+    with patch("sed.loader.cfel.loader.MetadataRetriever") as MockRetriever:
+        instance = MockRetriever.return_value
+        instance.get_metadata.return_value = {"scientificMetadata": {"key": "value"}}
+        
+        loader.runs = ["1"]
+        meta = loader.parse_scicat_metadata(token="fake_token")
+        
+        assert meta == {"scientificMetadata": {"key": "value"}}
+        instance.get_metadata.assert_called_once_with(
+            beamtime_id="12345",
+            runs=["1"],
+            metadata={},
+        )
+
+def test_parse_local_metadata(loader):
+    with patch("sed.loader.cfel.loader.MetadataRetriever") as MockRetriever:
+        instance = MockRetriever.return_value
+        instance.get_local_metadata.return_value = {"local": "meta"}
+        
+        loader.runs = ["1"]
+        # Mock paths since _initialize_dirs might not be called or fail
+        loader.beamtime_dir = "/tmp/bt"
+        loader.meta_dir = "/tmp/meta"
+        
+        meta = loader.parse_local_metadata()
+        
+        assert meta == {"local": "meta"}
+        instance.get_local_metadata.assert_called_once_with(
+            beamtime_id="12345",
+            beamtime_dir="/tmp/bt",
+            meta_dir="/tmp/meta",
+            runs=["1"],
+            metadata={},
+        )
diff --git a/tests/loader/flash/test_buffer_handler.py b/tests/loader/flash/test_buffer_handler.py
index 3eb0e625..62c696c8 100644
--- a/tests/loader/flash/test_buffer_handler.py
+++ b/tests/loader/flash/test_buffer_handler.py
@@ -45,7 +45,7 @@ def test_buffer_file_paths(config: dict, h5_paths: list[Path]) -> None:
         the checks with modified file name parameters.
     """
     folder = create_parquet_dir(config, "get_files_to_read")
-    fp = BufferFilePaths(config, h5_paths, folder, suffix="", remove_invalid_files=False)
+    fp = BufferFilePaths(h5_paths, folder, suffix="")
 
     # check that all files are to be read
     assert len(fp.file_sets_to_process()) == len(h5_paths)
@@ -70,7 +70,7 @@ def test_buffer_file_paths(config: dict, h5_paths: list[Path]) -> None:
     bh._save_buffer_file(path)
 
     # check again for files to read and expect one less file
-    fp = BufferFilePaths(config, h5_paths, folder, suffix="", remove_invalid_files=False)
+    fp = BufferFilePaths(h5_paths, folder, suffix="")
     # check that only one file is to be read
     assert len(fp.file_sets_to_process()) == len(h5_paths) - 1
 
@@ -82,7 +82,7 @@ def test_buffer_file_paths(config: dict, h5_paths: list[Path]) -> None:
     Path(path["timed"]).unlink()
 
     # Test for adding a suffix
-    fp = BufferFilePaths(config, h5_paths, folder, "suffix", remove_invalid_files=False)
+    fp = BufferFilePaths(h5_paths, folder, "suffix")
 
     # expected buffer paths with prefix and suffix
     for typ in ["electron", "timed"]:
diff --git a/tests/loader/flash/test_utils.py b/tests/loader/flash/test_utils.py
index 929a9305..d65d8010 100644
--- a/tests/loader/flash/test_utils.py
+++ b/tests/loader/flash/test_utils.py
@@ -45,8 +45,8 @@ def test_get_channels_by_format(config_dataframe: dict) -> None:
     # Request channels for 'all' formats using a list.
     format_all = get_channels(ch_dict, ["all"])
 
-    # Request index channels only. No need for channel_dict.
-    format_index = get_channels(index=True)
+    # Request index channels only.
+    format_index = get_channels(ch_dict, index=True)
 
     # Request 'per_electron' format and include index channels.
     format_index_electron = get_channels(ch_dict, ["per_electron"], index=True)
diff --git a/tests/loader/test_loaders.py b/tests/loader/test_loaders.py
index a5b357d0..da13fcad 100644
--- a/tests/loader/test_loaders.py
+++ b/tests/loader/test_loaders.py
@@ -22,7 +22,13 @@
 test_data_dir = os.path.join(test_dir, "data")
 
 read_types = ["one_file", "files", "one_folder", "folders", "one_run", "runs"]
-runs = {"generic": None, "mpes": ["30", "50"], "flash": ["43878", "43878"], "sxp": ["0016", "0016"]}
+runs = {
+    "generic": None,
+    "mpes": ["30", "50"],
+    "flash": ["43878", "43878"],
+    "sxp": ["0016", "0016"],
+    "cfel": ["123"],
+}
 
 
 def get_loader_name_from_loader_object(loader: BaseLoader) -> str:
@@ -94,7 +100,7 @@ def test_has_correct_read_dataframe_func(loader: BaseLoader, read_type: str) ->
     assert callable(loader.read_dataframe)
 
     # Fix for race condition during parallel testing
-    if loader.__name__ in {"flash", "sxp"}:
+    if loader.__name__ in {"flash", "sxp", "cfel"}:
         config = deepcopy(loader._config)  # pylint: disable=protected-access
         config["core"]["paths"]["processed"] = Path(
             config["core"]["paths"]["processed"],
@@ -167,7 +173,7 @@ def test_has_correct_read_dataframe_func(loader: BaseLoader, read_type: str) ->
             assert loaded_dataframe.npartitions == expected_size
             assert isinstance(loaded_metadata, dict)
 
-    if loader.__name__ in {"flash", "sxp"}:
+    if loader.__name__ in {"flash", "sxp", "cfel"}:
         loader = cast(FlashLoader, loader)
         loader._initialize_dirs()
         for file in os.listdir(Path(loader.processed_dir, "buffer")):
@@ -183,7 +189,7 @@ def test_timed_dataframe(loader: BaseLoader) -> None:
     """
 
     # Fix for race condition during parallel testing
-    if loader.__name__ in {"flash", "sxp"}:
+    if loader.__name__ in {"flash", "sxp", "cfel"}:
         config = deepcopy(loader._config)  # pylint: disable=protected-access
         config["core"]["paths"]["processed"] = Path(
             config["core"]["paths"]["processed"],
@@ -201,7 +207,7 @@ def test_timed_dataframe(loader: BaseLoader) -> None:
                 collect_metadata=False,
             )
             if loaded_timed_dataframe is None:
-                if loader.__name__ in {"flash", "sxp"}:
+                if loader.__name__ in {"flash", "sxp", "cfel"}:
                     loader = cast(FlashLoader, loader)
                     loader._initialize_dirs()
                     for file in os.listdir(Path(loader.processed_dir, "buffer")):
@@ -211,7 +217,7 @@ def test_timed_dataframe(loader: BaseLoader) -> None:
             assert set(loaded_timed_dataframe.columns).issubset(set(loaded_dataframe.columns))
             assert loaded_timed_dataframe.npartitions == loaded_dataframe.npartitions
 
-    if loader.__name__ in {"flash", "sxp"}:
+    if loader.__name__ in {"flash", "sxp", "cfel"}:
         loader = cast(FlashLoader, loader)
         loader._initialize_dirs()
         for file in os.listdir(Path(loader.processed_dir, "buffer")):
@@ -227,7 +233,7 @@ def test_get_count_rate(loader: BaseLoader) -> None:
     """
 
     # Fix for race condition during parallel testing
-    if loader.__name__ in {"flash", "sxp"}:
+    if loader.__name__ in {"flash", "sxp", "cfel"}:
         config = deepcopy(loader._config)  # pylint: disable=protected-access
         config["core"]["paths"]["processed"] = Path(
             config["core"]["paths"]["processed"],
@@ -246,7 +252,7 @@ def test_get_count_rate(loader: BaseLoader) -> None:
             )
             loaded_time, loaded_countrate = loader.get_count_rate()
             if loaded_time is None and loaded_countrate is None:
-                if loader.__name__ in {"flash", "sxp"}:
+                if loader.__name__ in {"flash", "sxp", "cfel"}:
                     loader = cast(FlashLoader, loader)
                     loader._initialize_dirs()
                     for file in os.listdir(Path(loader.processed_dir, "buffer")):
@@ -261,7 +267,7 @@ def test_get_count_rate(loader: BaseLoader) -> None:
             with pytest.raises(TypeError):
                 loader.get_count_rate(illegal_kwd=True)
 
-    if loader.__name__ in {"flash", "sxp"}:
+    if loader.__name__ in {"flash", "sxp", "cfel"}:
         loader = cast(FlashLoader, loader)
         loader._initialize_dirs()
         for file in os.listdir(Path(loader.processed_dir, "buffer")):
@@ -277,7 +283,7 @@ def test_get_elapsed_time(loader: BaseLoader) -> None:
     """
 
     # Fix for race condition during parallel testing
-    if loader.__name__ in {"flash", "sxp"}:
+    if loader.__name__ in {"flash", "sxp", "cfel"}:
         config = deepcopy(loader._config)  # pylint: disable=protected-access
         config["core"]["paths"]["processed"] = Path(
             config["core"]["paths"]["processed"],
@@ -311,7 +317,7 @@ def test_get_elapsed_time(loader: BaseLoader) -> None:
             with pytest.raises(TypeError):
                 loader.get_elapsed_time(illegal_kwd=True)
 
-    if loader.__name__ in {"flash", "sxp"}:
+    if loader.__name__ in {"flash", "sxp", "cfel"}:
         loader = cast(FlashLoader, loader)
         loader._initialize_dirs()
         for file in os.listdir(Path(loader.processed_dir, "buffer")):