diff --git a/CMakeLists.txt b/CMakeLists.txt
index 6ff3cfec..35450f84 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -80,9 +80,7 @@ endif ()
 #------------------------------------------------------------------------------
 # Build options
 #------------------------------------------------------------------------------
-# option(ENABLE_DYAD_DEBUG "Include debugging prints and logging" OFF) # This is not needed as we have CMAKE_BUILD_TYPE
 # This is verbose, maybe an alternate might help simplify
-#option(BUILD_URPC "Build DYAD's URPC code" OFF)
 #option(ENABLE_PERFFLOW "Build with PerfFlow Aspect support" OFF)
 #option(ENABLE_UCX_DTL "Build DYAD's UCX data transport layer" OFF)
 
diff --git a/README.md b/README.md
index 5cc5551f..6fdb9e5b 100644
--- a/README.md
+++ b/README.md
@@ -1,11 +1,26 @@
 DYAD: DYnamic and Asynchronous Data Streamliner
 
-DYAD aims to help sharing data files between producer and consumer job elements,
-especially within an ensemble or between co-scheduled ensembles.
-DYAD provides the service by two components: a FLUX module and a I/O wraper set.
-DYAD transparently synchronizes file I/O between producer and consumer, and
-transfers data from the producer location to the consumer location managed by the service.
-Users only need to use the file path that is under the directory managed by the service.
+DYAD aims to facilitate data file sharing between producer and consumer job elements, particularly within an ensemble or across co-scheduled ensembles.
+
+DYAD delivers this functionality through two components: a FLUX module that provides the service and a set of I/O wrappers for client-side integration.
+
+DYAD transparently synchronizes file access at the file level (rather than the byte level) between producers and consumers, and manages data transfer from the producer’s location to the consumer’s location.
+
+Users simply access files via paths located under the directory managed by the DYAD service.
+
+### Documentation
+For further information, build and refer to the documentation under `docs`
+
+```
+cd docs
+python3 -m venv .venv
+source .venv/bin/activate
+pip install "Sphinx<7.0.0" myst-parser rst2pdf
+make html
+make pdf
+```
+Then, open `index.html` under `_build/html` or DYAD.pdf under `_build/pdf`
+
 
 ### License
 
diff --git a/cmake/modules/SetupCompiler.cmake b/cmake/modules/SetupCompiler.cmake
index 3b196fa2..0728f27e 100644
--- a/cmake/modules/SetupCompiler.cmake
+++ b/cmake/modules/SetupCompiler.cmake
@@ -96,11 +96,11 @@ endmacro()
 
 dyad_add_cxx_flags(CMAKE_CXX_FLAGS
   -Wall -Wextra -pedantic -Wno-unused-parameter -Wnon-virtual-dtor
-  -Wno-deprecated-declarations)
+  -Wno-deprecated-declarations -Wno-nonnull-compare)
 
 dyad_add_c_flags(CMAKE_C_FLAGS
   -Wall -Wextra -pedantic -Wno-unused-parameter
-  -Wno-deprecated-declarations)
+  -Wno-deprecated-declarations -Wno-nonnull-compare)
 
 if (${GLIBC_VERSION} VERSION_GREATER_EQUAL "2.19")
   # to suppress usleep() warning
diff --git a/docs/SCA-HPCAsia26_tutorial.rst b/docs/SCA-HPCAsia26_tutorial.rst
new file mode 100644
index 00000000..e473348b
--- /dev/null
+++ b/docs/SCA-HPCAsia26_tutorial.rst
@@ -0,0 +1,11 @@
+**************************************************************************
+SCA/HPCAsia 2026 · The SupercomputingAsia (SCA) Tutorial: January 26, 2026
+**************************************************************************
+
+
+.. toctree::
+   :maxdepth: 1
+
+   demos/SCA26/instruction
+
+Material for the DYAD tutorial at SCA26 can be found under ``docs/demos/SCA26``.
diff --git a/docs/_static/Paper_2024_SBACPAD_DYAD.pdf b/docs/_static/Paper_2024_SBACPAD_DYAD.pdf
new file mode 100644
index 00000000..ba300895
Binary files /dev/null and b/docs/_static/Paper_2024_SBACPAD_DYAD.pdf differ
diff --git a/docs/conf.py b/docs/conf.py
index 9fea25b8..36e097ba 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -29,8 +29,16 @@
 # ones.
 extensions = [
     "sphinx.ext.autosectionlabel",
+    'myst_parser',
+    'rst2pdf.pdfbuilder',
 ]
 
+# This line explicitly tells Sphinx which parser to use for each extension
+source_suffix = {
+    '.rst': 'restructuredtext',
+    '.md': 'markdown',
+}
+
 # Add any paths that contain templates here, relative to this directory.
 templates_path = ['_templates']
 
diff --git a/docs/debugging.rst b/docs/debugging.rst
new file mode 100644
index 00000000..84f30be3
--- /dev/null
+++ b/docs/debugging.rst
@@ -0,0 +1,96 @@
+====================================
+Common Tips for Debugging with DYAD
+====================================
+
+Debugging distributed operations of mutiple jobs coordination under batch system is quite challending. Here are several tips.
+
+
+Build DYAD for Debugging
+========================
+
+To facilitate debugging, DYAD provides several CMake options that can be enabled
+at build time.
+
+- **For users:** Enable DYAD logging support:
+
+  ::
+
+     -DDYAD_LOGGER=FLUX|CPP_LOGGER -DDYAD_LOGGER_LEVEL=Debug
+
+- **For developers:** Treat all compiler warnings as errors:
+
+  ::
+
+     -DDYAD_WARNINGS_AS_ERRORS=ON
+
+- **For developers:** Use Clang with AddressSanitizer as needed:
+
+  ::
+
+     -DCMAKE_C_COMPILER=clang
+     -DCMAKE_CXX_COMPILER=clang++
+     -DCMAKE_BUILD_TYPE=Debug
+
+
+Runtime Logging
+===============
+
+Enable Flux logging when starting an instance to capture DYAD logs:
+
+::
+
+   flux start -v -o,-S,log-filename=out.txt
+
+
+Controlling Job Standard I/O
+============================
+
+Flux job-related options can be used to control standard I/O behavior (see
+`flux-run <https://flux-framework.readthedocs.io/projects/flux-core/en/latest/man1/flux-run.html>`_):
+
+- Disable output buffering:
+
+  ::
+
+     -u, --unbuffered
+
+- Label output by rank:
+
+  ::
+
+     -l, --label-io
+
+- Redirect job output streams:
+
+  ::
+
+     --output=, --error=, --log=, --log-stderr=
+
+- Use
+  `mustache templates <https://flux-framework.readthedocs.io/projects/flux-core/en/latest/man1/flux-submit.html#mustache-templates>`_
+  for fine-controlling output.
+
+
+Simulated Multi-Node Debugging
+==============================
+
+Use a single node with a simulated multi-node setup via
+``flux start --test-size=N``. In this configuration, DYAD should use different
+managed paths to mimic operations on distinct nodes.
+
+
+Common Debugging Steps
+======================
+
+When isolating errors in DYAD-enabled applications, the following steps are
+recommended:
+
+- Verify environment variable propagation by running a script that prints all
+  DYAD-related environment variables in place of a DYAD job.
+- Ensure environment variables are set consistently between producers and consumers.
+- Confirm that ``DYAD_KVS_NAMESPACE`` is set and that the namespace exists in the KVS. ``flux kvs namespace list``
+- Clear any namespaces or files left over from previous runs.
+- Inspect logging output to identify where a DYAD consumer may be hanging or where
+  a DYAD job may have crashed.
+- Inspect `KVS <https://flux-framework.readthedocs.io/projects/flux-core/en/latest/man1/flux-kvs.html>`_  entries at both the producer and consumer as needed. ``flux kvs dir -N ${DYAD_KVS_NAMESPACE} [key]``
+
diff --git a/docs/demos/SCA26/DL/dyad_torch_data_loader.py b/docs/demos/SCA26/DL/dyad_torch_data_loader.py
new file mode 100644
index 00000000..12c599d2
--- /dev/null
+++ b/docs/demos/SCA26/DL/dyad_torch_data_loader.py
@@ -0,0 +1,241 @@
+"""
+   Copyright (c) 2025, UChicago Argonne, LLC
+   All Rights Reserved
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+"""
+import math
+import pickle
+import torch
+from torch.utils.data import Dataset, DataLoader
+from torch.utils.data.sampler import Sampler
+
+from dlio_benchmark.common.constants import MODULE_DATA_LOADER
+from dlio_benchmark.common.enumerations import DatasetType, DataLoaderType
+from dlio_benchmark.data_loader.base_data_loader import BaseDataLoader
+from dlio_benchmark.reader.reader_factory import ReaderFactory
+from dlio_benchmark.utils.utility import utcnow, DLIOMPI, Profile, dft_ai
+from dlio_benchmark.utils.config import ConfigArguments
+from pydyad import Dyad, dyad_open
+from pydyad.bindings import DTLMode, DTLCommMode
+import numpy as np
+#import flux
+import os
+
+dlp = Profile(MODULE_DATA_LOADER)
+
+
+class DYADTorchDataset(Dataset):
+    """
+    Currently, we only support loading one sample per file
+    TODO: support multiple samples per file
+    """
+
+    @dlp.log_init
+    def __init__(self, format_type, dataset_type, epoch, num_samples, num_workers, batch_size):
+        self.format_type = format_type
+        self.dataset_type = dataset_type
+        self.epoch_number = epoch
+        self.num_samples = num_samples
+        self.reader = None
+        self.num_images_read = 0
+        self.batch_size = batch_size
+        args = ConfigArguments.get_instance()
+        self.serial_args = pickle.dumps(args)
+        self.logger = args.logger
+        self.dlp_logger = None
+        if num_workers == 0:
+            self.worker_init(-1)
+
+    @dlp.log
+    def worker_init(self, worker_id):
+        pickle.loads(self.serial_args)
+        _args = ConfigArguments.get_instance()
+        _args.configure_dlio_logging(is_child=True)
+        self.dlp_logger = _args.configure_dftracer(is_child=True, use_pid=True)
+        self.logger.debug(f"{utcnow()} worker initialized {worker_id} with format {self.format_type}")
+        self.reader = ReaderFactory.get_reader(type=self.format_type,
+                                               dataset_type=self.dataset_type,
+                                               thread_index=worker_id,
+                                               epoch_number=self.epoch_number)
+        self.dyad_io = Dyad()
+        self.namespace = os.getenv("DYAD_KVS_NAMESPACE")
+        #f = flux.Flux()
+        self.my_node_index = 0 #f.get_rank()
+        self.dyad_managed_directory = os.getenv("DYAD_PATH_PRODUCER")
+        #self.dyad_managed_directory = os.getenv("DYAD_PATH_CONSUMER")
+        mode = DTLMode.DYAD_DTL_MARGO
+        self.dyad_io.init(debug=False, check=False, shared_storage=False, reinit=False,
+                          async_publish=True, fsync_write=False, key_depth=3,
+                          service_mux=1,
+                          key_bins=1024, kvs_namespace=self.namespace,
+                          prod_managed_path=self.dyad_managed_directory,
+                          cons_managed_path=self.dyad_managed_directory,
+                          dtl_mode=mode, dtl_comm_mode=DTLCommMode.DYAD_COMM_RECV)
+        if self.dataset_type is DatasetType.TRAIN:
+            self.global_index_map = _args.train_global_index_map
+            self.file_map = _args.train_file_map
+        else:
+            self.file_map = _args.val_file_map
+            self.global_index_map = _args.val_global_index_map
+
+    def __del__(self):
+        if self.dlp_logger:
+            self.dlp_logger.finalize()
+
+    @dlp.log
+    def __len__(self):
+        return self.num_samples
+
+    @dlp.log
+    def __getitem__(self, image_idx):
+        self.num_images_read += 1
+        step = int(math.ceil(self.num_images_read / self.batch_size))
+        self.logger.debug(f"{utcnow()} Rank {DLIOMPI.get_instance().rank()} reading {image_idx} sample")
+        filename, sample_index = self.global_index_map[image_idx]
+        is_present = False
+        file_obj = None
+        base_fname = filename
+        dlp.update(args={"fname":filename})
+        dlp.update(args={"image_idx":image_idx})
+        if self.dyad_managed_directory != "":
+            self.logger.debug(f"{utcnow()} Rank {DLIOMPI.get_instance().rank()} reading metadata")
+            base_fname = os.path.join(self.dyad_managed_directory, os.path.basename(filename))
+            file_obj = self.dyad_io.get_metadata(fname=base_fname, should_wait=False, raw=True)
+            self.logger.debug(f"Using managed directory {self.dyad_managed_directory} {base_fname} {file_obj}")
+            is_present = True
+        if file_obj:
+            access_mode = "remote"
+            if self.my_node_index == file_obj.contents.owner_rank:
+                access_mode = "local"
+            dlp.update(args={"owner_rank":str(file_obj.contents.owner_rank)})
+            dlp.update(args={"mode":"dyad"})
+            dlp.update(args={"access":access_mode})
+            self.logger.debug(f"Reading from managed directory {base_fname}")
+            with dyad_open(base_fname, "rb", dyad_ctx=self.dyad_io, metadata_wrapper=file_obj) as f:
+                try:
+                    data = np.load(f, allow_pickle=True)["x"]
+                except:
+                    data = self._args.resized_image
+            self.dyad_io.free_metadata(file_obj)
+        else:
+            dlp.update(args={"mode":"pfs"})
+            dlp.update(args={"access":"remote"})
+            self.logger.debug(f"Reading from pfs {base_fname}")
+            data = self.reader.read_index(image_idx, step)
+            if is_present:
+                self.logger.debug(f"Writing to managed_directory {base_fname}")
+                with dyad_open(base_fname, "wb", dyad_ctx=self.dyad_io) as f:
+                    np.savez(f, x=data)
+            self.logger.debug(f"Read from pfs {base_fname}")
+
+        dlp.update(step=step)
+        dft_ai.update(step=step)
+        return self.reader.read_index(image_idx, step)
+
+
+class dlio_sampler(Sampler):
+    def __init__(self, rank, size, num_samples, epochs):
+        self.size = size
+        self.rank = rank
+        self.num_samples = num_samples
+        self.epochs = epochs
+        samples_per_proc = int(math.ceil(num_samples/size)) 
+        start_sample = self.rank * samples_per_proc
+        end_sample = (self.rank + 1) * samples_per_proc - 1
+        if end_sample > num_samples - 1:
+            end_sample = num_samples - 1
+        self.indices = list(range(start_sample, end_sample + 1))
+
+
+    def __len__(self):
+        return self.num_samples
+
+    def __iter__(self):
+        for sample in self.indices:
+            yield sample
+
+
+class DyadTorchDataLoader(BaseDataLoader):
+    @dlp.log_init
+    def __init__(self, format_type, dataset_type, epoch_number):
+        super().__init__(format_type, dataset_type, epoch_number, DataLoaderType.PYTORCH)
+
+    @dlp.log
+    def read(self):
+        dataset = DYADTorchDataset(self.format_type, self.dataset_type, self.epoch_number, self.num_samples,
+                                   self._args.read_threads, self.batch_size)
+        sampler = dlio_sampler(self._args.my_rank, self._args.comm_size, self.num_samples, self._args.epochs)
+        if self._args.read_threads >= 1:
+            prefetch_factor = math.ceil(self._args.prefetch_size / self._args.read_threads)
+        else:
+            prefetch_factor = self._args.prefetch_size
+        if prefetch_factor > 0:
+            if self._args.my_rank == 0:
+                self.logger.debug(
+                    f"{utcnow()} Prefetch size is {self._args.prefetch_size}; prefetch factor of {prefetch_factor} will be set to Torch DataLoader.")
+        else:
+            prefetch_factor = 2
+            if self._args.my_rank == 0:
+                self.logger.debug(
+                    f"{utcnow()} Prefetch size is 0; a default prefetch factor of 2 will be set to Torch DataLoader.")
+        self.logger.debug(f"{utcnow()} Setup dataloader with {self._args.read_threads} workers {torch.__version__}")
+        if self._args.read_threads==0:
+            kwargs={}
+        else:
+            kwargs={'multiprocessing_context':self._args.multiprocessing_context,
+                    'prefetch_factor': prefetch_factor}
+            if torch.__version__ != '1.3.1':       
+                kwargs['persistent_workers'] = True
+        if torch.__version__ == '1.3.1':
+            if 'prefetch_factor' in kwargs:
+                del kwargs['prefetch_factor']
+            self._dataset = DataLoader(dataset,
+                                       batch_size=self.batch_size,
+                                       sampler=sampler,
+                                       num_workers=self._args.read_threads,
+                                       pin_memory=self._args.pin_memory,
+                                       drop_last=True,
+                                       worker_init_fn=dataset.worker_init, 
+                                       **kwargs)
+        else: 
+            self._dataset = DataLoader(dataset,
+                                       batch_size=self.batch_size,
+                                       sampler=sampler,
+                                       num_workers=self._args.read_threads,
+                                       pin_memory=self._args.pin_memory,
+                                       drop_last=True,
+                                       worker_init_fn=dataset.worker_init,
+                                       **kwargs)  # 2 is the default value
+        self.logger.debug(f"{utcnow()} Rank {self._args.my_rank} will read {len(self._dataset) * self.batch_size} files")
+
+        # self._dataset.sampler.set_epoch(epoch_number)
+
+    @dlp.log
+    def next(self):
+        super().next()
+        total = self._args.training_steps if self.dataset_type is DatasetType.TRAIN else self._args.eval_steps
+        self.logger.debug(f"{utcnow()} Rank {self._args.my_rank} should read {total} batches")
+        step = 1
+        for batch in dft_ai.dataloader.fetch.iter(self._dataset):
+            dlp.update(step=step)
+            dft_ai.update(step=step)
+            step += 1
+            yield batch
+        self.epoch_number += 1
+        dlp.update(epoch=self.epoch_number)
+        dft_ai.update(epoch=self.epoch_number)
+
+    @dlp.log
+    def finalize(self):
+        pass
diff --git a/docs/demos/SCA26/DYAD_KEY.png b/docs/demos/SCA26/DYAD_KEY.png
new file mode 100644
index 00000000..44bed597
Binary files /dev/null and b/docs/demos/SCA26/DYAD_KEY.png differ
diff --git a/docs/demos/SCA26/Makefile b/docs/demos/SCA26/Makefile
new file mode 100644
index 00000000..779c6b84
--- /dev/null
+++ b/docs/demos/SCA26/Makefile
@@ -0,0 +1,32 @@
+# Note:
+# Please set environment variable
+# DYAD_INSTALL_PREFIX to your DYAD
+# install directory first!
+
+DYAD_INSTALL_LIBDIR = $(shell if [ -d $${DYAD_INSTALL_PREFIX}/lib64 ] ; then echo $${DYAD_INSTALL_PREFIX}/lib64; elif [ -d $${DYAD_INSTALL_PREFIX}/lib ] ; then echo $${DYAD_INSTALL_PREFIX}/lib; fi)
+DYAD_INCLUDE_PATH = $(DYAD_INSTALL_PREFIX)/include
+
+
+CFLAGS_LOC = -g -std=c11 -DDYAD_HAS_CONFIG=1 $(CFLAGS)
+CPPFLAGS_LOC = -g -O3 -I. $(CPPFLAGS)
+CXXFLAGS_LOC = -g -std=c++11 -DDYAD_HAS_CONFIG=1 -I$(DYAD_INCLUDE_PATH) $(CXXFLAGS)
+CXXLIBS_LOC = -L$(DYAD_INSTALL_LIBDIR) -Wl,-rpath=$(DYAD_INSTALL_LIBDIR) -ldyad_fstream $(CXXLIBS) $(UCXLIBS)
+
+all: c_prod c_cons cpp_prod cpp_cons
+
+c_prod: ../ecp_feb_2023/prod.c
+	$(CC) $(CPPFLAGS_LOC) $(CFLAGS_LOC) $? -o $@
+
+c_cons: ../ecp_feb_2023/cons.c
+	$(CC) $(CPPFLAGS_LOC) $(CFLAGS_LOC) $? -o $@
+
+cpp_prod: ../ecp_feb_2023/prod.cpp
+	$(CXX) $(CPPFLAGS_LOC) $(CXXFLAGS_LOC) $? -o $@ $(CXXLIBS_LOC)
+
+cpp_cons: ../ecp_feb_2023/cons.cpp
+	$(CXX) $(CPPFLAGS_LOC) $(CXXFLAGS_LOC) $? -o $@ $(CXXLIBS_LOC)
+
+.PHONY: clean
+
+clean:
+	rm -f c_prod c_cons cpp_prod cpp_cons
diff --git a/docs/demos/SCA26/batch/select_language.sh b/docs/demos/SCA26/batch/select_language.sh
new file mode 100755
index 00000000..21801b7f
--- /dev/null
+++ b/docs/demos/SCA26/batch/select_language.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+
+script_name="$0"
+
+if test "$#" -gt 1; then
+    echo "Invalid number of arguments to $script_name"
+    exit 1
+elif test "$#" -eq 0; then
+    mode="c"
+else
+    mode="$1"
+fi
+
+valid_modes=("c" "cpp" "python")
+mode_is_valid=0
+for vm in "${valid_modes[@]}"; do
+    if [[ $mode_is_valid -eq 1 ]] || [[ "$mode" == "$vm" ]]; then
+        mode_is_valid=1
+    else
+        mode_is_valid=0
+    fi
+done
+
+if [[ $mode_is_valid -eq 0 ]]; then
+    echo "Invalid arg for language mode: $mode"
+    echo 'Choose either "c", "cpp" or "python"'
+    exit 2
+fi
+echo "Language: ${mode}"
diff --git a/docs/demos/SCA26/batch/task_cons.sh b/docs/demos/SCA26/batch/task_cons.sh
new file mode 100755
index 00000000..7ab15155
--- /dev/null
+++ b/docs/demos/SCA26/batch/task_cons.sh
@@ -0,0 +1,32 @@
+#!/bin/bash
+
+script_dir=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+
+if [ $# -lt 1 ] ; then
+  echo "Need a task id"
+  exit 1
+fi
+
+task=$1
+mkdir -p ${DYAD_PATH_CONSUMER}/${task}
+shift
+
+source ${script_dir}/select_language.sh
+
+
+# Choose among C, C++, Python
+if [[ "$mode" == "${valid_modes[0]}" ]]; then
+    echo `hostname` \$ LD_PRELOAD=${DYAD_INSTALL_LIBDIR}/libdyad_wrapper.so ${script_dir}/../c_cons 10 ${DYAD_PATH_CONSUMER}/${task}
+    LD_PRELOAD=${DYAD_INSTALL_LIBDIR}/libdyad_wrapper.so ${script_dir}/../c_cons 10 ${DYAD_PATH_CONSUMER}/${task}
+elif [[ "$mode" == "${valid_modes[1]}" ]]; then
+    echo ${script_dir}/../cpp_cons 10 ${DYAD_PATH_CONSUMER}/${task}
+    ${script_dir}/../cpp_cons 10 ${DYAD_PATH_CONSUMER}/${task}
+elif [[ "$mode" == "${valid_modes[2]}" ]]; then
+    echo python3 ../../../../tests/pydyad_spsc/consumer.py ${DYAD_PATH_CONSUMER}/${task} 10 50
+    python3 ../../../../tests/pydyad_spsc/consumer.py ${DYAD_PATH_CONSUMER}/${task} 10 50
+else
+    echo "Invalid language mode for consumer ${task}: $mode"
+    exit 1
+fi
+
+ls -l ${DYAD_PATH_CONSUMER}/${task}
diff --git a/docs/demos/SCA26/batch/task_prod.sh b/docs/demos/SCA26/batch/task_prod.sh
new file mode 100755
index 00000000..2572f2c1
--- /dev/null
+++ b/docs/demos/SCA26/batch/task_prod.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+script_dir=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+
+if [ $# -lt 1 ] ; then
+  echo "Need a task id"
+  exit 1
+fi
+
+task=$1
+mkdir -p ${DYAD_PATH_PRODUCER}/${task}
+shift
+
+source ${script_dir}/select_language.sh
+
+
+# Choose among C, C++, Python
+if [[ "$mode" == "${valid_modes[0]}" ]]; then # C
+    echo `hostname` \$ LD_PRELOAD=${DYAD_INSTALL_LIBDIR}/libdyad_wrapper.so ${script_dir}/../c_prod 10 ${DYAD_PATH_PRODUCER}/${task}
+    LD_PRELOAD=${DYAD_INSTALL_LIBDIR}/libdyad_wrapper.so ${script_dir}/../c_prod 10 ${DYAD_PATH_PRODUCER}/${task}
+elif [[ "$mode" == "${valid_modes[1]}" ]]; then # C++
+    echo ${script_dir}/../cpp_prod 10 ${DYAD_PATH_PRODUCER}/${task}
+    ${script_dir}/../cpp_prod 10 ${DYAD_PATH_PRODUCER}/${task}
+elif [[ "$mode" == "${valid_modes[2]}" ]]; then # Python
+    echo python3 ../../../../tests/pydyad_spsc/producer.py ${DYAD_PATH_PRODUCER}/${task} 10 50
+    python3 ../../../../tests/pydyad_spsc/producer.py ${DYAD_PATH_PRODUCER}/${task} 10 50
+else
+    echo "Invalid language mode for producer ${task}: $mode"
+    exit 1
+fi
diff --git a/docs/demos/SCA26/batch/workflow.sh b/docs/demos/SCA26/batch/workflow.sh
new file mode 100755
index 00000000..b13b543c
--- /dev/null
+++ b/docs/demos/SCA26/batch/workflow.sh
@@ -0,0 +1,87 @@
+#!/bin/bash
+# Flux commands manual
+# https://flux-framework.readthedocs.io/projects/flux-core/en/latest/man1
+# This script submits jobs for ${n_tasks} pairs of producers and consumers.
+n_tasks=1
+
+script_dir=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+
+# Check if lib64 is under the installation. If not, try lib.
+if [ -d ${DYAD_INSTALL_PREFIX}/lib64 ] ; then
+    export DYAD_INSTALL_LIBDIR=${DYAD_INSTALL_PREFIX}/lib64
+elif [ -d ${DYAD_INSTALL_PREFIX}/lib ] ; then
+    export DYAD_INSTALL_LIBDIR=${DYAD_INSTALL_PREFIX}/lib
+else
+    echo "Cannot find DYAD LIB DIR"
+    exit 1
+fi
+
+
+# Parse the argument that specifies the application language mode (c|cpp|python)
+source ${script_dir}/select_language.sh
+
+
+if [ "${DYAD_PATH_CONSUMER}" == "" || "${DYAD_PATH_PRODUCER}" == "" ]  ; then
+    echo Undefined environment variables: DYAD_PATH_PRODUCER and DYAD_PATH_CONSUMER
+    exit 1
+fi
+
+# Clean up any potentially existing data from previous runs
+flux exec -r all rm -rf ${DYAD_PATH_CONSUMER} ${DYAD_PATH_PRODUCER}
+# Prepare directories on local storages
+flux exec -r all mkdir -p ${DYAD_PATH_CONSUMER} ${DYAD_PATH_PRODUCER}
+
+
+# Start DYAD service
+echo "Creating a KVS namespace for DYAD: ${DYAD_KVS_NAMESPACE}"
+flux kvs namespace create ${DYAD_KVS_NAMESPACE}
+
+echo "Loading DYAD service module ${DYAD_INSTALL_LIBDIR}/dyad.so"
+flux exec -r all flux module load ${DYAD_INSTALL_LIBDIR}/dyad.so #--mode="${DYAD_DTL_MODE}" ${DYAD_PATH_PRODUCER}
+
+
+# Here, we submit the consumer jobs first for demonstration purposes.
+# In practice, producer jobs are submitted first.
+
+for i_task in `seq 1 $n_tasks`
+do
+    echo "Submitting Consumer job"
+    flux submit --nodes 1 --exclusive -t 10 \
+                --env=DYAD_KVS_NAMESPACE=${DYAD_KVS_NAMESPACE} \
+                --env=DYAD_DTL_MODE=${DYAD_DTL_MODE} \
+                --env=DYAD_INSTALL_LIBDIR=${DYAD_INSTALL_LIBDIR} \
+                ${script_dir}/task_cons.sh ${i_task} ${mode}
+    CONS_IDs="${CONS_IDs} $(flux job last)"
+
+    echo "Submitting Producer job"
+    flux submit --nodes 1 --exclusive -t 10 \
+                --env=DYAD_KVS_NAMESPACE=${DYAD_KVS_NAMESPACE} \
+                --env=DYAD_DTL_MODE=${DYAD_DTL_MODE} \
+                --env=DYAD_INSTALL_LIBDIR=${DYAD_INSTALL_LIBDIR} \
+                ${script_dir}/task_prod.sh ${i_task} ${mode}
+    PROD_IDs="${PROD_IDs} $(flux job last)"
+done
+
+
+# Inspect job's stdout/stderr by interactively attaching to the job while it is running or after it has completed
+flux jobs -a
+for id_prod in ${PROD_IDs}
+do
+    flux job attach ${id_prod}
+done
+
+for id_cons in ${CONS_IDs}
+do
+    flux job attach ${id_cons}
+done
+
+# Block until all queues become empty
+flux queue drain
+
+
+# Clean up
+# Make sure the consumers are complete. In our case, `flux queue drain` facilitates that.
+echo "Cleaning up"
+flux exec -r all flux module remove dyad 2> /dev/null
+flux kvs namespace remove ${DYAD_KVS_NAMESPACE}
+flux exec -r all rm -rf ${DYAD_PATH_CONSUMER} ${DYAD_PATH_PRODUCER}
diff --git a/docs/demos/SCA26/c_cons.sh b/docs/demos/SCA26/c_cons.sh
new file mode 100755
index 00000000..5eba0724
--- /dev/null
+++ b/docs/demos/SCA26/c_cons.sh
@@ -0,0 +1,3 @@
+mkdir -m 775 -p ${DYAD_PATH_CONSUMER}/prod-cons
+echo LD_PRELOAD=${DYAD_INSTALL_PREFIX}/lib/libdyad_wrapper.so ./c_cons 10 ${DYAD_PATH_CONSUMER}/prod-cons
+LD_PRELOAD=${DYAD_INSTALL_PREFIX}/lib/libdyad_wrapper.so ./c_cons 10 ${DYAD_PATH_CONSUMER}/prod-cons
diff --git a/docs/demos/SCA26/c_prod.sh b/docs/demos/SCA26/c_prod.sh
new file mode 100755
index 00000000..09f6bbb5
--- /dev/null
+++ b/docs/demos/SCA26/c_prod.sh
@@ -0,0 +1,3 @@
+mkdir -m 775 -p ${DYAD_PATH_PRODUCER}/prod-cons
+echo LD_PRELOAD=${DYAD_INSTALL_PREFIX}/lib/libdyad_wrapper.so ./c_prod 10 ${DYAD_PATH_PRODUCER}/prod-cons
+LD_PRELOAD=${DYAD_INSTALL_PREFIX}/lib/libdyad_wrapper.so ./c_prod 10 ${DYAD_PATH_PRODUCER}/prod-cons
diff --git a/docs/demos/SCA26/check_env.sh b/docs/demos/SCA26/check_env.sh
new file mode 100755
index 00000000..78915ec8
--- /dev/null
+++ b/docs/demos/SCA26/check_env.sh
@@ -0,0 +1,5 @@
+echo DYAD_INSTALL_PREFIX=${DYAD_INSTALL_PREFIX}
+echo DYAD_KVS_NAMESPACE=${DYAD_KVS_NAMESPACE}
+echo DYAD_DTL_MODE=${DYAD_DTL_MODE}
+echo DYAD_PATH_PRODUCER=${DYAD_PATH_PRODUCER}
+echo DYAD_PATH_CONSUMER=${DYAD_PATH_CONSUMER}
diff --git a/docs/demos/SCA26/instruction.md b/docs/demos/SCA26/instruction.md
new file mode 100644
index 00000000..f0df64a8
--- /dev/null
+++ b/docs/demos/SCA26/instruction.md
@@ -0,0 +1,443 @@
+﻿# DYAD Tutorial at SCA/HPCAsia 2026 in Osaka Japan
+This tutorial is offered as a part of the "Accelerating HPC Application I/O with Fast Node-Local Storage" session.
+
+## DYAD dependencies
+- requires: [flux-core](https://github.com/flux-framework/flux-core.git), [jansson](https://github.com/akheron/jansson.git)
+- optional: [mochi-margo](https://github.com/mochi-hpc/mochi-margo.git) for using libfabric
+            [ucx](https://github.com/openucx/ucx.git) for using ucx
+            [dftracer](https://github.com/llnl/dftracer.git) (with numpy and h5py) for performance tracing
+
+Also, we recommend to install [flux-sched](https://github.com/flux-framework/flux-sched.git)
+For this tutorial, we already have these dependencies pre-installed.
+
+
+## In case of installing dependencies using [spack](https://github.com/spack/spack.git)
+
+```
+git clone https://github.com/spack/spack.git
+spack env create dyad
+spack env activate dyad
+spack external find
+spack compiler find
+spack install --add ucx
+spack install --add mochi-margo
+spack load mochi-margo
+# spack env deactivate
+```
+
+pip install flux-python==0.80.0
+
+## Setup the environment
+```
+export DYAD_INSTALL_PREFIX=/home/${USER}/venv
+module load flux-core mochi-margo
+# Or
+# spack env activate dyad
+# spack load mochi-margo
+```
+
+## Build DYAD
+
+```
+git clone https://github.com/flux-framework/dyad.git
+cd dyad; mkdir build; cd build
+cmake -DDYAD_ENABLE_MARGO_DATA=ON \
+      -DDYAD_LIBDIR_AS_LIB=ON \
+      -DCMAKE_INSTALL_PREFIX=${DYAD_INSTALL_PREFIX} \
+      ..
+make -j install
+cd ../docs/demos/SCA26
+make
+cd ../../..
+```
+
+To enable DYAD with Python, set up PyDYAD in a virtual environment.
+
+```
+python3 -m venv ${DYAD_INSTALL_PREFIX}
+source ${DYAD_INSTALL_PREFIX}/bin/activate
+pip install flux-python==0.80.0
+cd pydyad
+pip install .
+```
+
+
+## Producer-Consumer example with DYAD
+In this example, we use two simple implementations of a producer and a consumer
+written in C: [prod.c](../ecp_feb_2023/prod.c) and [cons.c](../ecp_feb_2023/cons.c). Each program accepts two command-line arguments.
+
+For the producer, the arguments specify the number of files to write and the 
+directory into which the files are written. For the consumer, the arguments
+specify the number of files to read and the directory from which the files are
+read.
+
+To improve I/O performance, we create the directory on the local SSD of each
+node. If the directory is configured as a DYAD-managed directory (DMD), DYAD
+intercepts file read and write operations and transparently transfers files
+between nodes as needed.
+
+The consumer opens the files it needs to read as if they already exist on its
+local storage. If a requested file does not exist locally, DYAD pauses the file
+open operation until the producer writes the file to its local storage (which 
+is remote to the consumer). DYAD then creates a local copy for the consumer,
+after which the read operation resumes normally.
+
+If a file access is not within a DMD, DYAD does not intervene and allows the
+operation to proceed without interference.
+
+```
++------------------------+            +------------------------+
+|                        |            |                        |
+|    Producer Process    |            |    Consumer Process    |
+|                        |            |                        |
++------------------------+            +------------------------+
+|     Local Storage      | -- File -> |     Local Storage      |
+| /mnt/ssd/${USER}/dyad  |            | /mnt/ssd/${USER}/dyad  |
++------------------------+            +------------------------+
+      Compute Node 1                        Compute Node 2
+```
+
+### Walk through the example step by step using two terminals, starting by setting the required environment variables.
+
+Open two terminals connected to the login node, and ensure that the `flux-core`
+module is loaded in both sessions. One terminal will be used for the producer
+and the other for the consumer.
+
+Set the environment variables on both terminals that need to be replicated
+across all compute nodes in the allocation. Unless a Flux proxy is used, this
+step can be deferred until starting the Flux instance. This is because Flux
+captures and propagates the current environment to the resources it begins to
+manage when the instance is started.
+
+Especially, DYAD requires four environment variables, **DYAD_KVS_NAMESPACE**, **DYAD_DTL_MODE**, **DYAD_PATH_PRODUCER** and **DYAD_PATH_CONSUMER**.
+
+```
+export DYAD_KVS_NAMESPACE=dyad
+export DYAD_DTL_MODE=MARGO
+```
+Other data transfer layer (DTL) choices include `UCX` and `FLUX_RPC`.
+
+**The DYAD Managed Directory (DMD)** 
+
+Although it is possible to set different DMDs for the producer and the consumer,
+we typically use the same directory for both to keep file paths consistent.
+
+```
+export DYAD_PATH_PRODUCER=/mnt/ssd/${USER}/dyad
+export DYAD_PATH_CONSUMER=/mnt/ssd/${USER}/dyad
+```
+
+For the full list of DYAD environment variables, refer to [dyad_envs.h](../../../include/dyad/common/dyad_envs.h).
+- DYAD_PATH_RELATIVE: The presence of this variable in the environment indicates that DYAD treats relative paths as relative to the managed directory.
+- DYAD_SHARED_STORAGE: The presence of this variable in the environment indicates that the storage containing the managed path is shared. In this case, DYAD does not transfer files but only synchronizes access.
+- DYAD_ASYNC_PUBLISH: Enable asynchronous metadata publishing by producers.
+- DYAD_SERVICE_MUX: Number of Flux brokers sharing node-local storage. The default is 1.
+- DYAD_KEY_BINS: DYAD utilizes a hierarchical key structure with multiple levels of hash tables. This variable specifies the number of hash bins at each level. The default is 1024.
+- DYAD_KEY_DEPTH: This variable specifies the number of hash table levels. The default is 3.
+
+![Hierarchical Key Structure for Scalability](DYAD_KEY.png)
+
+
+### Allocate compute nodes and start a Flux instance
+
+In one terminal, allocate compute nodes—two nodes in this example, with one core
+per node. Ensure that the `mochi-margo` module is loaded to take advantage of
+the underlying high-speed interconnect. On systems where the native scheduler
+is SLRUM, use [salloc](https://slurm.schedmd.com/salloc.html) command. On systems where Flux is the native scheduler,
+use the [flux alloc](https://flux-framework.readthedocs.io/projects/flux-core/en/latest/man1/flux-alloc.html) command.
+
+The `salloc` command below returns an interactive shell on a compute node, from
+which you can start a Flux instance. When using `flux alloc`, a sub-instance is
+started automatically, so no additional step is required.
+
+```
+$ salloc -N 2 -n 2 --tasks-per-node=1 -t 30
+salloc: Granted job allocation 70
+```
+
+
+[Start a Flux instance](https://flux-framework.readthedocs.io/projects/flux-core/en/latest/guide/start.html) using the allocated resources (two nodes in this example).
+The [srun](https://slurm.schedmd.com/srun.html) option `--pty` returns an interactive shell on the first compute node.
+It is recommended to use `--mpi=pmi2` when bootstrapping Flux.
+For running OpenMPI v5+ under Flux, refer to [flux-pmix](https://github.com/flux-framework/flux-pmix).
+
+```
+$ srun --mpi=pmi2 -N 2 -n 2 --pty flux start # -v -o,-S,log-filename=out.txt
+```
+
+Verify the nodes managed by the Flux instance using [`flux exec`](https://flux-framework.readthedocs.io/projects/flux-core/en/latest/man1/flux-exec.html).
+
+```
+$ flux exec -r all hostname
+dsaicn01
+dsaicn02
+```
+
+### Set up a Flux proxy in the second terminal for interactive demonstration
+
+Switch to the second terminal to connect to the second compute node through the
+*Flux proxy*. In this terminal, first run [`squeue`](https://slurm.schedmd.com/squeue.html) to obtain the ID of the SLURM
+allocation (job) in which the Flux instance is running.
+
+```
+$ squeue
+             JOBID PARTITION     NAME     USER ST       TIME  NODES NODELIST(REASON)
+                70   compute interact    yeom2  R       1:38      2 dsaicn[01-02]
+```
+
+Then, set up a Flux proxy to connect to the existing instance under that
+allocation using the commands, [flux uri](https://flux-framework.readthedocs.io/projects/flux-core/en/latest/man1/flux-uri.html) and [flux proxy](https://flux-framework.readthedocs.io/projects/flux-core/en/latest/man1/flux-proxy.html).
+
+```
+$ flux proxy `flux uri slurm:70`
+$ flux exec -r all hostname
+dsaicn01
+dsaicn02
+```
+
+### Start DYAD service
+
+Switch back to the first terminal, which has a shell on the first compute node.
+**Create a DYAD namespace** in the Flux key-value-store (KVS) using the [flux kvs](https://flux-framework.readthedocs.io/projects/flux-core/en/latest/man1/flux-kvs.html) command.
+
+```
+flux kvs namespace create ${DYAD_KVS_NAMESPACE}
+```
+
+The namespace will be visible to all nodes managed by the Flux instance,
+allowing you to verify its existence from another terminal.
+
+```
+flux kvs namespace list
+```
+
+Flux maintains job-related information using a hierarchical key-value store (KVS) in a scalable manner
+DYAD leverages this capability while isolating data using namespaces.
+
+
+Start the DYAD service by loading the DYAD Flux plugin module into the Flux instance.
+
+```
+flux exec -r all flux module load ${DYAD_INSTALL_PREFIX}/lib/dyad.so \
+                      --mode="${DYAD_DTL_MODE}" ${DYAD_PATH_PRODUCER}
+```
+The DYAD module can be configured using environment variables, and users can
+override these settings through command-line arguments. Therefore, specifying
+both module arguments may not have been necessary in this example.
+
+For this particular example, we only need to load the DYAD module on the node
+where the producer is running. However, it is generally a good practice to make 
+it available on all nodes if they are expected to produce files to be shared.
+That is why we execute [flux module load](https://flux-framework.readthedocs.io/projects/flux-core/en/latest/man1/flux-module.html) on all the Flux broker ranks.
+The DYAD module `dyad.so` requires an argument to specify the DYAD Managed Directory (DMD).
+In this case, it is `${DYAD_PATH_PRODUCER}`.
+
+
+### Run the consumer and producer on separate nodes using local storage
+
+For demonstration purposes, start the consumer first on one of the nodes (e.g., dsaicn01).
+When it attempts to open a file that does not yet exist, the operation blocks
+until the file becomes locally available. If the producer were started first,
+the consumer would complete without blocking.
+
+```
+cd interactive
+flux exec -r 0 ./c_cons.sh
+```
+Where `c_cons.sh` contains the following:
+```
+mkdir -m 775 -p ${DYAD_PATH_CONSUMER}/prod-cons
+LD_PRELOAD=${DYAD_INSTALL_PREFIX}/lib/libdyad_wrapper.so ../c_cons 10 ${DYAD_PATH_CONSUMERER}/proc-cons
+```
+./c_cons is the executable build out of [cons.c](../ecp_feb_2023/cons.c).
+The DYAD wrapper library `libdyad_wrapper.so` leverages the LD_PRELOAD mechanism
+to intercept *open()/close()* and *fopen()/fclose()* calls on files under the DMD.
+It then performs DYAD-specific operations on those file accesses.
+In particular, when the consumer reads a file, metadata discovery and file
+transfer are triggered during the *open()/fopen()* operation.
+
+
+On the other terminal, run the producer:
+```
+cd interactive
+flux exec -r 1 ./c_prod.sh
+```
+where `c_prod.sh` is as contains the following:
+```
+LD_PRELOAD=${DYAD_INSTALL_PREFIX}/lib/libdyad_wrapper.so ../c_prod 10 ${DYAD_PATH_PRODUCER}/prod-cons
+```
+./c_prod is the executable build out of [prod.c](../ecp_feb_2023/prod.c).
+When the producer writes a file, DYAD intercepts the *close()/fclose()* calls to
+publish metadata in the form of a key-value pair consisting of the file path
+and the owner broker rank.
+
+You will see that the consumer completes.
+For further inspection, you can check the files on the local storage of each node.
+
+```
+flux exec -r 0 ls -l ${DYAD_PATH_CONSUMER}/prod-cons
+flux exec -r 1 ls -l ${DYAD_PATH_PRODUCER}/prod-cons
+```
+
+### Clean up
+
+```
+flux exec -r all rm -rf ${DYAD_PATH_PRODUCER}/prod-cons ${DYAD_PATH_PRODUCER}/prod-cons
+flux kvs namespace remove dyad
+flux exec -r all flux module remove dyad
+```
+
+Finally, `exit` both the proxy terminal and the compute-node shell, or remain in
+them to run the next example.
+
+
+## Runing a workflow in a batch processing mode
+
+As the number of workflow tasks scales, manually coordinating tasks becomes
+increasingly difficult. Consequently, task jobs are typically submitted to a
+batch scheduler such as SLURM or Flux. Without DYAD, it is necessary to
+explicitly specify dependencies between consumer and producer task jobs. In
+addition, data are most often shared through a global shared file system rather
+than through node-local storage. DYAD relaxes these scheduling constraints and
+exposes new opportunities for scheduling optimization.
+The initial steps of this example are identical to those of the interactive
+example, up to the proxy setup. Unlike the interactive case, this example does
+not require a Flux proxy, since jobs are submitted directly to the batch
+scheduler using the existing Flux instance. If you are reusing the same
+allocation from the interactive example, remove and recreate the DYAD namespace.
+In addition, delete any files generated by previous examples before continuing.
+
+```
+cd batch
+./workflow.sh
+```
+
+
+## Deep learning training with DYAD
+
+In this demonstration, we use the UNet3D example from the Deep Learning I/O ([DLIO](https://dlio-benchmark.readthedocs.io/en/latest/)) Benchmark.
+In deep learning (DL) training, the stochastic gradient descent (SGD) algorithm
+relies on sample shuffling. In distributed training, the entire dataset may
+reside on shared storage accessible to all trainers. However, this approach can
+introduce bottlenecks due to concentrated metadata lookups and saturated
+bandwidth. Alternatively, the dataset can be sharded across compute nodes,
+provided there is a mechanism to make locally stored samples accessible to
+remote nodes. DYAD transparently fulfills this requirement and significantly
+improves I/O performance. In this setup, each trainer acts as both a consumer
+and a producer. DYAD first checks whether a sample exists in the cache; if not,
+the sample is loaded from shared storage.
+
+In this example, `${HOME}/demo_DLIO` is defined as the location where training
+data and benchmark results are stored.
+
+
+### Install the DLIO benchmark in a Python virtual environment
+
+If possible, use the same environment as PyDYAD.
+
+```
+module load flux-core mochi-margo mpich
+export DYAD_INSTALL_PREFIX=/home/${USER}/venv
+source ${DYAD_INSTALL_PREFIX}/bin/activate
+
+git clone https://github.com/argonne-lcf/dlio_benchmark
+cd dlio_benchmark
+# comment out nvidia-dali-cuda in setup.py
+pip install .[dftracer]
+pip install flux-python==0.80.0
+```
+
+### Set up the environment to use DLIO, DFTracer and DYAD
+
+```
+module load flux-core mochi-margo mpich
+export DYAD_INSTALL_PREFIX=/home/${USER}/venv
+source ${DYAD_INSTALL_PREFIX}/bin/activate
+export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${HOME}/venv/lib
+export DFTRACER_ENABLE=1
+export DYAD_KVS_NAMESPACE=dyad
+export DYAD_DTL_MODE=MARGO
+export DYAD_PATH_PRODUCER=/mnt/ssd/${USER}/dyad
+export DYAD_PATH_CONSUMER=/mnt/ssd/${USER}/dyad
+mkdir -p ${HOME}/demo_DLIO
+export PYTHONPATH=${HOME}/demo_DLIO:$PYTHONPATH
+export TEST_FILE_SIZE=33554432
+export TEST_NUM_FILES=6400
+```
+Copy `dyad/docs/demos/SCA26/DL/dyad_torch_data_loader.py` into `${HOME}/demo_DLIO`
+
+Alllocate compute nodes and start a Flux instance
+
+```
+salloc -N 4 -n 128 --tasks-per-node=32 -t 60
+source ${HOME}/new_venv/bin/activate
+srun --mpi=pmi2 -N 4 -n 4 --pty flux start # -v -o,-S,log-filename=out.txt
+flux kvs namespace create ${DYAD_KVS_NAMESPACE}
+flux exec -r all flux module load ${DYAD_INSTALL_PREFIX}/lib/dyad.so
+flux exec -r all rm -rf ${DYAD_PATH_PRODUCER}
+flux exec -r all mkdir -p ${DYAD_PATH_PRODUCER}
+```
+
+These dlio_benchmark commands customize the existing unet3d template in the
+benchmark suite by overriding parameters such as the size and number of data
+files.
+
+
+### Generate Training Data
+
+
+```
+export DLIO_LOG_LEVEL=info;
+flux run -N 4 -n 128 dlio_benchmark \
+  workload=unet3d_a100 \
+  ++workload.workflow.generate_data=True \
+  ++workload.workflow.train=False \
+  hydra.run.dir=${HOME}/demo_DLIO/output \
+  ++workload.output.folder=${HOME}/demo_DLIO/output \
+  ++workload.dataset.num_files_train=${TEST_NUM_FILES} \
+  ++workload.dataset.record_length_bytes=${TEST_FILE_SIZE} \
+  ++workload.dataset.record_length_bytes_stdev=0 \
+  ++workload.dataset.data_folder=${HOME}/demo_DLIO/dataset \
+  ++workload.checkpoint.checkpoint_folder=${HOME}/demo_DLIO/checkpoint
+```
+
+### Train without DYAD
+
+```
+flux run -N 4 -n 128 dlio_benchmark \
+  workload=unet3d_a100 \
+  ++workload.workflow.generate_data=False \
+  ++workload.workflow.train=True \
+  hydra.run.dir=${HOME}/demo_DLIO/output \
+  ++workload.output.folder=${HOME}/demo_DLIO/output  \
+  ++workload.dataset.num_files_train=${TEST_NUM_FILES} \
+  ++workload.dataset.record_length_bytes=${TEST_FILE_SIZE} \
+  ++workload.dataset.record_length_bytes_stdev=0 \
+  ++workload.dataset.data_folder=${HOME}/demo_DLIO/dataset \
+  ++workload.checkpoint.checkpoint_folder=${HOME}/demo_DLIO/checkpoint \
+  ++workload.reader.batch_size=1 \
+  ++workload.train.epochs=20 \
+  ++workload.train.computation_time=0
+```
+
+### Train with DYAD
+
+```
+flux run -N 4 -n 128 dlio_benchmark \
+  workload=unet3d_a100 \
+  ++workload.workflow.generate_data=False \
+  ++workload.workflow.train=True \
+  hydra.run.dir=${HOME}/demo_DLIO/output \
+  ++workload.output.folder=${HOME}/demo_DLIO/output  \
+  ++workload.dataset.num_files_train=${TEST_NUM_FILES} \
+  ++workload.dataset.record_length_bytes=${TEST_FILE_SIZE} \
+  ++workload.dataset.record_length_bytes_stdev=0 \
+  ++workload.dataset.data_folder=${HOME}/demo_DLIO/dataset \
+  ++workload.checkpoint.checkpoint_folder=${HOME}/demo_DLIO/checkpoint \
+  ++workload.reader.batch_size=1 \
+  ++workload.train.epochs=20 \
+  ++workload.train.computation_time=0 \
+  ++workload.reader.multiprocessing_context=fork \
+  ++workload.reader.data_loader_classname=dyad_torch_data_loader.DyadTorchDataLoader \
+  ++workload.reader.data_loader_sampler=index
+```
diff --git a/docs/demos/SCA26/interactive/c_cons.sh b/docs/demos/SCA26/interactive/c_cons.sh
new file mode 100755
index 00000000..3457f80f
--- /dev/null
+++ b/docs/demos/SCA26/interactive/c_cons.sh
@@ -0,0 +1,3 @@
+mkdir -m 775 -p ${DYAD_PATH_CONSUMER}/prod-cons
+echo LD_PRELOAD=${DYAD_INSTALL_PREFIX}/lib/libdyad_wrapper.so ../c_cons 10 ${DYAD_PATH_CONSUMER}/prod-cons
+LD_PRELOAD=${DYAD_INSTALL_PREFIX}/lib/libdyad_wrapper.so ../c_cons 10 ${DYAD_PATH_CONSUMER}/prod-cons
diff --git a/docs/demos/SCA26/interactive/c_prod.sh b/docs/demos/SCA26/interactive/c_prod.sh
new file mode 100755
index 00000000..6008be62
--- /dev/null
+++ b/docs/demos/SCA26/interactive/c_prod.sh
@@ -0,0 +1,3 @@
+mkdir -m 775 -p ${DYAD_PATH_PRODUCER}/prod-cons
+echo LD_PRELOAD=${DYAD_INSTALL_PREFIX}/lib/libdyad_wrapper.so ../c_prod 10 ${DYAD_PATH_PRODUCER}/prod-cons
+LD_PRELOAD=${DYAD_INSTALL_PREFIX}/lib/libdyad_wrapper.so ../c_prod 10 ${DYAD_PATH_PRODUCER}/prod-cons
diff --git a/docs/ecp_feb_2023_tutorial.rst b/docs/ecp_feb_2023_tutorial.rst
index 72efdba8..46e742e2 100644
--- a/docs/ecp_feb_2023_tutorial.rst
+++ b/docs/ecp_feb_2023_tutorial.rst
@@ -2,5 +2,4 @@
 ECP Tutorial: February 10, 2023
 *******************************
 
-Material for the DYAD demo at Flux's ECP tutorial can be found `here <ECP Tutorial: February 10, 2023>`_.
-Additional information about the demo will be added soon.
+Material for the DYAD demo at Flux's ECP tutorial can be found `https://github.com/flux-framework/Tutorials/tree/master/2023/ECP  <https://github.com/flux-framework/Tutorials/tree/master/2023/ECP>`_.
diff --git a/docs/getting_started.rst b/docs/getting_started.rst
index 2af9fa8e..58460ba7 100644
--- a/docs/getting_started.rst
+++ b/docs/getting_started.rst
@@ -7,15 +7,22 @@ Prerequisites
 
 DYAD has the following minimum requirements to build and install:
 
-* A C99-compliant C compiler
-* A C++11-compliant C++ compiler
-* Autoconf 2.63
-* Automake
-* Libtool
-* Make
+* A C11-compliant C compiler
+* A C++17-compliant C++ compiler
+* CNake 3.12
 * pkg-config
-* Jansson 2.10
-* flux-core
+* `flux-core <https://github.com/flux-framework/flux-core.git>`_
+* `jansson 2.10 or newer <https://github.com/akheron/jansson.git>`_
+* flux-python
+
+Optionally, DYAD leverages:
+
+* `mochi-margo <https://github.com/mochi-hpc/mochi-margo.git>`_ to enable libfabric-based data transport layer (DTL).
+* `ucx <https://github.com/openucx/ucx.git>`_ to enable ucx-based DTL.
+* `dftracer <https://github.com/llnl/dftracer.git>`_ for performance tracing of Python-based applications.
+* cpp-logger for logging operational details to aid debugging.
+* `perflow-aspect <https://perfflowaspect.readthedocs.io/en/latest/>`_ for visualizing function-level events to aid workflow performance diagnosis.
+* `caliper <https://github.com/llnl/Caliper.git>`_ for collecting performance profiling.
 
 Installation
 ############
@@ -23,15 +30,6 @@ Installation
 Manual Installation
 *******************
 
-.. attention::
-
-   Currently, DYAD can only be installed manually. This page will be updated as additional
-   methods of installation are added.
-
-.. note::
-
-   Recommended for developers and contributors
-
 You can get DYAD from its `GitHub repository <https://github.com/flux-framework/dyad>`_ using
 these commands:
 
@@ -40,56 +38,139 @@ these commands:
    $ git clone https://github.com/flux-framework/dyad.git
    $ cd dyad
 
-DYAD uses the Autotools for building and installation. To start the build process, run
-the following command to generate the necessary configuration scripts using Autoconf:
+DYAD relies on cmake for both building and installation.
 
 .. code-block:: shell
 
-   $ ./auotgen.sh
+   $ mkdir build; cd build
+   $ cmake -DDYAD_ENABLE_MARGO_DATA=ON \
+           -DDYAD_LIBDIR_AS_LIB=ON \
+           -DCMAKE_INSTALL_PREFIX=${DYAD_INSTALL_PREFIX} \
+           ..
+   $ make -j install
 
-Next, configure DYAD using the following command:
+.. note::
 
-.. code-block:: shell
+   The cmake command above is provided as an example. Refer to the options below to customize configuration as needed.
+   Set the env variable `DYAD_INSTALL_PREFIX` to the desired installation directory.
 
-   $ ./configure --prefix=<INSTALL_PATH>
 
-Besides the normal configure script flags, DYAD's configure script also has the following
-flags:
+To enable the DYAD Python binding,
 
-+---------------------+-------------------------+--------------------------------------------+
-| Flag                | Type (default)          | Description                                |
-+=====================+=========================+============================================+
-| --enable-dyad-debug | Bool (true if provided) | if enabled, include debugging prints and   |
-|                     |                         | logs for DYAD at runtime                   |
-+---------------------+-------------------------+--------------------------------------------+
-| --enable-perfflow   | Bool (true if provided) | if enabled, build PerfFlow Aspect-based    |
-|                     |                         | performance measurement annotations for    |
-|                     |                         | DYAD                                       |
-+---------------------+-------------------------+--------------------------------------------+
+.. code-block:: shell
+
+   $ python3 -m venv .venv
+   $ source .venv/bin/activate
+   $ pip install flux-python=0,80.0
+   $ cd pydyad
+   $ pip install .
 
 .. note::
 
-   The installation prefix (i.e., :code:`--prefix`) is also used to try to locate flux-core.
-   First, :code:`configure` will look for flux-core in the installation prefix. If it is not
-   found there, :code:`configure` will then use :code:`pkg-config` to locate flux-core.
+   When installing *flux-python*, ensure that the version matches *flux-core*.
+
+
+
+There are several custom CMake options available to configure a DYAD build:
+
+.. This is how the list-table below would render ideally
+   +--------------------------+----------------------------+---------------------------------------------+
+   | Flag                     | Values (**default**)       | Description                                 |
+   +==========================+============================+=============================================+
+   | DYAD_ENABLE_MARGO_DATA   | ON, **OFF**                | Allow dynamic selection of Margo-based DTL  |
+   | DYAD_ENABLE_UCX_DATA     | ON, **OFF**                | Allow dynamic selection of UCX-based DTL    |
+   | DYAD_ENABLE_UCX_DATA_RMA | ON, **OFF**                | Allow dynamic selection of UCX-based RMA DTL|
+   +--------------------------+----------------------------+---------------------------------------------+
+   | DYAD_LOGGER              | FLUX, CPP_LOGGER, **NONE** | Choose the method to log stdout/stderr      |
+   | DYAD_LOGGER_LEVEL        | DEBUG, INFO, WARN,         | Choose the level of logging                 |
+   |                          | ERROR, **NONE**            |                                             |
+   +--------------------------+----------------------------+---------------------------------------------+
+   | DYAD_PROFILER            | PERFFLOW_ASPECT, CALIPER,  | Choose the performance profiler             |
+   |                          | DFTRACER, **NONE**         |                                             |
+   +--------------------------+----------------------------+---------------------------------------------+
+   | DYAD_ENABLE_TESTS        | ON, **OFF**                | Build unit tests                            |
+   | DYAD_LIBDIR_AS_LIB       | ON, **OFF**                | Force lib as library install dir (no lib64) |
+   | DYAD_USE_CLANG_LIBCXX    | ON, **OFF**                | Use clang's native runtime instead of gnu   |
+   | DYAD_WARNINGS_AS_ERRORS  | ON, **OFF**                | Turn compiler warning into error            |
+   +--------------------------+----------------------------+---------------------------------------------+
+
+
+.. list-table::
+   :header-rows: 1
+   :widths: 25 25 50
+
+   * - Flag
+     - Values (**default**)
+     - Description
+   * - **Data Transfer Options**
+     -
+     -
+   * - DYAD_ENABLE_MARGO_DATA
+     - ON, **OFF**
+     - Allow dynamic selection of Margo-based DTL
+   * - DYAD_ENABLE_UCX_DATA
+     - ON, **OFF**
+     - Allow dynamic selection of UCX-based DTL
+   * - DYAD_ENABLE_UCX_DATA_RMA
+     - ON, **OFF**
+     - Allow dynamic selection of UCX-based RMA DTL
+   * - **Logging and Profiling**
+     -
+     -
+   * - DYAD_LOGGER
+     - FLUX, CPP_LOGGER, **NONE**
+     - Choose the method to log stdout/stderr
+   * - DYAD_LOGGER_LEVEL
+     - DEBUG, INFO, WARN, ERROR, **NONE**
+     - Choose the level of logging
+   * - DYAD_PROFILER
+     - PERFFLOW_ASPECT, CALIPER, DFTRACER, **NONE**
+     - Choose the performance profiler
+   * - **Compiling/Linking Customization**
+     -
+     -
+   * - DYAD_ENABLE_TESTS
+     - ON, **OFF**
+     - Build unit tests
+   * - DYAD_LIBDIR_AS_LIB
+     - ON, **OFF**
+     - Force lib as library install dir (no lib64)
+   * - DYAD_USE_CLANG_LIBCXX
+     - ON, **OFF**
+     - Use clang's native runtime instead of GNU
+   * - DYAD_WARNINGS_AS_ERRORS
+     - ON, **OFF**
+     - Turn compiler warnings into errors
 
-Finally, build and install DYAD using the following commands:
 
-.. code-block:: shell
+.. note::
+
+   Mochi-Margo enables seamless adoption of various DTL types. Currently, DYAD
+   relies on it only for **libfabric**, but we plan to fully leverage the diverse
+   options it provides in the near future.
+
+   To enable a specific DTL type, DYAD requires the environment variable
+   ``DYAD_DTL_MODE`` to be set accordingly. At present, three values are
+   supported: ``MARGO``, ``UCX``, and ``FLUX_RPC``.
 
-   $ make [-j]
-   $ make install
+   - When ``DYAD_DTL_MODE`` is set to ``UCX`` and DYAD has been built with the
+     CMake option ``DYAD_ENABLE_UCX_DATA_RMA=ON``, data transfer is performed
+     asynchronously via **remote memory access (RMA)** to reduce communication
+     costs.
+   - When built with ``DYAD_ENABLE_UCX_DATA=ON``, data transfer is synchronous
+     using UCX. In other words, the choice between synchronous and RMA-based UCX
+     is mutually exclusive at compile time.
 
-Building with PerfFlow Aspect Support (Optional)
-************************************************
+   However, the selection between ``MARGO``, ``UCX``, and ``FLUX_RPC`` can be
+   made dynamically at launch time. Ensure that ``DYAD_DTL_MODE`` is set
+   consistently in both the service and client environments.
 
-DYAD has optional support for collecting cross-cutting performance data using
-`PerfFlow Aspect <https://perfflowaspect.readthedocs.io/en/latest/>`_. To enable this support,
-first build PerfFlow Aspect for C/C++ using
-`their instructions <https://perfflowaspect.readthedocs.io/en/latest/BuildingPerfFlowAspect.html#c-build>`_.
-Then, modify your method of choice for building DYAD as follows:
+   If none of the three DTL-related CMake options are set, DYAD defaults to
+   using **FLUX RPC** for data transfer. While DYAD currently supports four
+   different data transfer methods, the client relies only on FLUX RPC to send
+   transfer requests to the service. In the future, we plan to offer alternative,
+   portable RPC methods.
 
-* **Manual Installation**: add :code:`--enable-perfflow` to your invocation of `./configure`
 
 Using DYAD's APIs
 #################
@@ -98,6 +179,7 @@ Currently, DYAD provides APIs for the following programming languages:
 
 * C
 * C++
+* Python
 
 This section describes the basics of integrating them into an application.
 
@@ -120,11 +202,13 @@ C++ API
 *******
 
 DYAD's C++ API is implemented as a small library that wraps C++'s Standard Library file streams.
-To use DYAD's C++ API, first, add the following to your code:
+LD_PRELOAD-based function substitution does not apply to C++ streams because the
+symbols to be intercepted are not exposed. Instead, we provide a set of lightweight
+wrapper classes. To use DYAD's C++ API, first, add the following to your code:
 
 .. code-block:: cpp
 
-   #include <dyad_stream_api.hpp>
+   #include <dyad/stream/dyad_stream_api.hpp>
 
 This header defines thin wrappers around the file streams provided by the C++ Standard Library.
 More specifically, it provides the following classes:
@@ -168,6 +252,20 @@ you must link the associated library (i.e., :code:`libdyad_stream.so` or
 :code:`libdyad_stream.a`). This library can be found in the :code:`lib`
 subdirectory of the install prefix.
 
+
+Python
+*******
+
+We offer PyDYAD, a Python binding to the DYAD client library implemented in C.
+A producer-consumer example can be found at `tests/pydyad_spsc <https://github.com/flux-framework/dyad/tree/main/pydyad>`_.
+
+.. toctree::
+   :maxdepth: 1
+   :caption: An exmaple of integrating DYAD with PyTorch DataLoader
+
+   integration_with_PyTorch
+
+
 Running DYAD
 ############
 
@@ -251,39 +349,27 @@ do this by running:
 Configure and Run the DYAD-Enabled Applications
 ***********************************************
 
-Once the KVS namespace and DYAD module are set up, the DYAD-enabled applications can be run. To run a DYAD-enabled
-application, simply run your application as normal with certain environment variables set. A table containing the current
-environment variables recognized by DYAD is shown below.
-
-+--------------------------------+-----------------+--------------+---------+-----------------------------------------------------------------+
-| Name                           | Type            | Required?    | Default | Description                                                     |
-+================================+=================+==============+=========+=================================================================+
-| :code:`DYAD_KVS_NAMESPACE`     | String          | Yes          | N/A     | The Flux KVS namespace that DYAD will use to record or look     |
-|                                |                 |              |         |                                                                 |
-|                                |                 |              |         | for file information                                            |
-+--------------------------------+-----------------+--------------+---------+-----------------------------------------------------------------+
-| :code:`DYAD_PATH_PRODUCER`     | Directory Path  | Yes [#one]_  | N/A     | The producer-managed path of the application                    |
-+--------------------------------+                 +              +         +-----------------------------------------------------------------+
-| :code:`DYAD_PATH_CONSUMER`     |                 |              |         | The consumer-managed path of the application                    |
-+--------------------------------+-----------------+--------------+---------+-----------------------------------------------------------------+
-| :code:`DYAD_SHARED_STORAGE`    | 0 or 1          | No           | 0       | If 1 (i.e., true), only provide per-file synchronization of     |
-|                                |                 |              |         |                                                                 |
-|                                |                 |              |         | the consumer (i.e., no transfer)                                |
-+--------------------------------+-----------------+--------------+---------+-----------------------------------------------------------------+
-| :code:`DYAD_KEY_DEPTH` [#two]_ | Integer         | No           | 3       | The number of levels in Flux's hierarchical KVS to use          |
-|                                |                 |              |         |                                                                 |
-|                                |                 |              |         | within DYAD's namespace                                         |
-+--------------------------------+-----------------+--------------+---------+-----------------------------------------------------------------+
-| :code:`DYAD_KEY_BINS` [#two]_  | Integer         | No           | 1024    | The maximum number of unique values for the keys associated     |
-|                                |                 |              |         |                                                                 |
-|                                |                 |              |         | with any given level of Flux's hierarchical KVS within          |
-|                                |                 |              |         |                                                                 |
-|                                |                 |              |         | DYAD's namespace                                                |
-+--------------------------------+-----------------+--------------+---------+-----------------------------------------------------------------+
+In addition to the two essential variables discussed above—``DYAD_KVS_NAMESPACE``
+and ``DYAD_DTL_MODE``—two more variables, ``DYAD_PATH_PRODUCER`` and ``DYAD_PATH_CONSUMER``,
+are required to run DYAD-enabled applications.”
+The table below list the essential ones. For the full list of variables, refer to
+:doc:`runtime_configuration`.
+
+
++--------------------------------+-----------------+--------------+----------+-----------------------------------------------------------------+
+| Name                           | Type            | Required?    | Default  | Description                                                     |
++================================+=================+==============+==========+=================================================================+
+| :code:`DYAD_KVS_NAMESPACE`     | String          | Yes          | N/A      | The Flux KVS namespace that DYAD will use to record or look     |
+|                                |                 |              |          |                                                                 |
+|                                |                 |              |          | for file information                                            |
++--------------------------------+-----------------+--------------+----------+-----------------------------------------------------------------+
+| :code:`DYAD_PATH_PRODUCER`     | Directory Path  | Yes [#one]_  | N/A      | The producer-managed path of the application                    |
++--------------------------------+                 +              +          +-----------------------------------------------------------------+
+| :code:`DYAD_PATH_CONSUMER`     |                 |              |          | The consumer-managed path of the application                    |
++--------------------------------+-----------------+--------------+----------+-----------------------------------------------------------------+
+| :code:`DYAD_DTL_MODEE`         | String          | No           | FLUX_RPC | Choose data transfer method among MARGO, UCX, FLUX_RPC          |
++--------------------------------+-----------------+--------------+----------+-----------------------------------------------------------------+
 
 .. [#one] For DYAD to do anything, at least one of :code:`DYAD_PATH_PRODUCER` or :code:`DYAD_PATH_CONSUMER` must be provided.
    Applications will still work if neither are provided, but DYAD will not do anything.
 
-.. [#two] Since the Flux KVS is hierarchical, the number of KVS levels (controlled by :code:`DYAD_KEY_DEPTH`) and
-   the size of each KVS level (controlled by :code:`DYAD_KEY_BINS`) will affect the performance of DYAD. To obtain
-   optimal performance, tune these values for your use case.
diff --git a/docs/index.rst b/docs/index.rst
index 7161c34a..4ec45bad 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -31,11 +31,24 @@ In resolving these challenges, DYAD aims to provide the following to users:
 
    getting_started
 
+.. toctree::
+   :maxdepth: 1
+   :caption: Runtime Configuration
+
+   runtime_configuration
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Debugging
+
+   debugging
+
 .. toctree::
    :maxdepth: 2
    :caption: Tutorials
 
    ecp_feb_2023_tutorial
+   SCA-HPCAsia26_tutorial
 
 .. toctree::
    :maxdepth: 2
diff --git a/docs/integration_with_PyTorch.rst b/docs/integration_with_PyTorch.rst
new file mode 100644
index 00000000..aeecdb24
--- /dev/null
+++ b/docs/integration_with_PyTorch.rst
@@ -0,0 +1,14 @@
+An Example of Integrating DYAD into PyToch DataLoader
+*****************************************************
+
+.. literalinclude:: demos/SCA26/DL/dyad_torch_data_loader.py
+   :language: python
+   :linenos:
+   :caption: Unet3D DataLoader in DLIO
+
+The lines relevant to DYAD are:
+
+- The line 29 for importing PyDAYD
+- The lines 71-84 for initialization
+- The lines in ``__get_item__()``
+- `dlp.` lines are for profiler and not directly relevant to DYAD
diff --git a/docs/publications.rst b/docs/publications.rst
index 662f40a5..06cd9a9c 100644
--- a/docs/publications.rst
+++ b/docs/publications.rst
@@ -5,9 +5,14 @@ Publications and Presentations
 Papers
 ######
 
+* H. Devarajan, I. Lumsden, C. Wang, K. Georgouli, T. Scogland, J. Yeom, and M. Taufer,
+  "DYAD: Locality-aware Data Management for accelerating Deep Learning Training,"
+  *IEEE International Symposium on Computer Architecture and High Performance Computing (SBAC-PAD)*, Nov. 2024.
+  :download:`PDF <_static/Paper_2024_SBACPAD_DYAD.pdf>`
+
 * I. Lumsden, H. Devarajan, J. Marquez, S. Brink, D. Boehme, O. Pearce, J. Yeom, and M. Taufer,
   "Empirical Study of Molecular Dynamics Workflow Data Movement: DYAD vs. Traditional I/O Systems,"
-  in *2024 IEEE International Parallel and Distributed Processing Symposium Workshops (IPDPSW)*, 2024.
+  in *2024 IEEE International Parallel and Distributed Processing Symposium Workshops (IPDPSW)*, May 2024.
   :download:`PDF <_static/Paper_2024_IPDPS_HiCOMB_DYAD.pdf>`
 
 Posters
@@ -23,4 +28,4 @@ Posters
 * I. Lumsden, J. Yeom, H. Devarajan, K. Mohror, and M. Taufer,
   "Enabling Transparent, High-Throughput Data Movement for Scientific Workflows on HPC Systems,"
   presented in ACM Student Research Competition at SC' 23.
-  :download:`Extended Abstract PDF <_static/ExtendedAbstract_2023_SC_ACM_SRC_DYAD.pdf>` | :download:`Poster PDF <_static/Poster_2023_SC_ACM_SRC_DYAD.pdf>`
\ No newline at end of file
+  :download:`Extended Abstract PDF <_static/ExtendedAbstract_2023_SC_ACM_SRC_DYAD.pdf>` | :download:`Poster PDF <_static/Poster_2023_SC_ACM_SRC_DYAD.pdf>`
diff --git a/docs/requirements.txt b/docs/requirements.txt
index 46c59599..0e35777c 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -1,2 +1,4 @@
 Sphinx<7.0.0
 sphinx-rtd-theme
+myst-parser
+rst2pdf
diff --git a/docs/runtime_configuration.rst b/docs/runtime_configuration.rst
new file mode 100644
index 00000000..f9317546
--- /dev/null
+++ b/docs/runtime_configuration.rst
@@ -0,0 +1,46 @@
+******************************
+Runtime Configuration
+******************************
+
+At launch time, DYAD allows users to customize its behavior and make better use
+of the environment through various environment variables. The list of these 
+variables is shown below.
+
++--------------------------------+-----------------+--------------+----------+-----------------------------------------------------------------+
+| Name                           | Type            | Required?    | Default  | Description                                                     |
++================================+=================+==============+==========+=================================================================+
+| :code:`DYAD_KVS_NAMESPACE`     | String          | Yes          | N/A      | The Flux KVS namespace that DYAD will use to record or look     |
+|                                |                 |              |          |                                                                 |
+|                                |                 |              |          | for file information                                            |
++--------------------------------+-----------------+--------------+----------+-----------------------------------------------------------------+
+| :code:`DYAD_PATH_PRODUCER`     | Directory Path  | Yes [#two]_  | N/A      | The producer-managed path of the application                    |
++--------------------------------+                 +              +          +-----------------------------------------------------------------+
+| :code:`DYAD_PATH_CONSUMER`     |                 |              |          | The consumer-managed path of the application                    |
++--------------------------------+-----------------+--------------+----------+-----------------------------------------------------------------+
+| :code:`DYAD_DTL_MODEE`         | String          | No           | FLUX_RPC | Choose data transfer method among MARGO, UCX, FLUX_RPC          |
++--------------------------------+-----------------+--------------+----------+-----------------------------------------------------------------+
+| :code:`DYAD_PATH_RELATIVE`     | 0 or 1          | No           | 0        | The presence of this variable in the environment indicates that |
+|                                |                 |              |          |                                                                 |
+|                                |                 |              |          | DYAD treats relative paths as relative to the managed directory |
++--------------------------------+-----------------+--------------+----------+-----------------------------------------------------------------+
+| :code:`DYAD_SHARED_STORAGE`    | 0 or 1          | No           | 0        | 1: only per-file access synchronization for consumer            |
+|                                |                 |              |          |                                                                 |
+|                                |                 |              |          | but no transfer or the overhead associated with it              |
++--------------------------------+-----------------+--------------+----------+-----------------------------------------------------------------+
+| :code:`DYAD_ASYNC_PUBLISH`     | 0 or 1          | No           | 0        | Enable asynchronous metadata publishing by producers.           |
++--------------------------------+-----------------+--------------+----------+-----------------------------------------------------------------+
+| :code:`DYAD_SERVICE_MUX`       | integer >= 1    | No           | 1        | Number of Flux brokers sharing node-local storage.              |
++--------------------------------+-----------------+--------------+----------+-----------------------------------------------------------------+
+| :code:`DYAD_KEY_DEPTH` [#thr]_ | Integer         | No           | 3        | The number of levels in Flux's hierarchical KVS to use          |
+|                                |                 |              |          |                                                                 |
+|                                |                 |              |          | within DYAD's namespace                                         |
++--------------------------------+-----------------+--------------+----------+-----------------------------------------------------------------+
+| :code:`DYAD_KEY_BINS` [#thr]_  | Integer         | No           | 1024     | The maximum number of unique hash values per level in Flux’s    |
+|                                |                 |              |          |                                                                 |
+|                                |                 |              |          | hierarchical KVS within DYAD’s namespace.                       |
++--------------------------------+-----------------+--------------+----------+-----------------------------------------------------------------+
+
+.. [#two] For DYAD to do anything, at least one of :code:`DYAD_PATH_PRODUCER` or :code:`DYAD_PATH_CONSUMER` must be provided.
+   Applications will still work if neither are provided, but DYAD will not do anything.
+
+.. [#thr] The Flux KVS supports organized categorization of data through hierarchical `key structuring <https://flux-framework.readthedocs.io/projects/flux-core/en/latest/man1/flux-kvs.html>_`. DYAD leverages this capability to optimize hash lookup performance by increasing the likelihood of early search termination when no matching entry exists in the store. The search key is hashed across multiple levels using different seeds. A match is confirmed only if corresponding entries are found at all levels, with an exact key-string match at the final level.
diff --git a/src/dyad/client/dyad_client.c b/src/dyad/client/dyad_client.c
index 42dad921..98065985 100644
--- a/src/dyad/client/dyad_client.c
+++ b/src/dyad/client/dyad_client.c
@@ -580,9 +580,7 @@ dyad_rc_t dyad_produce (dyad_ctx_t *restrict ctx, const char *restrict fname)
     // If the producer-managed path is NULL or empty, then the context is not
     // valid for a producer operation. So, return DYAD_BADMANAGEDPATH
     if (ctx->prod_managed_path == NULL) {
-        DYAD_LOG_ERROR (ctx,
-                        "DYAD CLIENT: No or empty producer managed path was found %s",
-                        ctx->prod_managed_path);
+        DYAD_LOG_ERROR (ctx, "DYAD CLIENT: No or empty producer managed path was found");
         rc = DYAD_RC_BADMANAGEDPATH;
         goto produce_done;
     }
diff --git a/src/dyad/common/dyad_logging.h b/src/dyad/common/dyad_logging.h
index 77b974f9..9dc1c9bb 100644
--- a/src/dyad/common/dyad_logging.h
+++ b/src/dyad/common/dyad_logging.h
@@ -38,8 +38,8 @@ extern "C" {
 
 #ifdef DYAD_LOGGER_FLUX  // FLUX -----------------------------------------------
 #define DYAD_LOGGER_INIT() ;
-#define DYAD_LOG_STDOUT_REDIRECT(fpath) freopen ((fpath), "a+", stdout);
-#define DYAD_LOG_STDERR_REDIRECT(fpath) freopen ((fpath), "a+", stderr);
+#define DYAD_LOG_STDOUT_REDIRECT(fpath) (void)!freopen ((fpath), "a+", stdout);
+#define DYAD_LOG_STDERR_REDIRECT(fpath) (void)!freopen ((fpath), "a+", stderr);
 
 #ifdef DYAD_UTIL_LOGGER
 #ifdef DYAD_LOGGER_LEVEL_DEBUG
diff --git a/src/dyad/dtl/margo_dtl.c b/src/dyad/dtl/margo_dtl.c
index b757f84d..cc9b9d6e 100644
--- a/src/dyad/dtl/margo_dtl.c
+++ b/src/dyad/dtl/margo_dtl.c
@@ -370,7 +370,7 @@ dyad_rc_t dyad_dtl_margo_send (const dyad_ctx_t* ctx, void* buf, size_t buflen)
     margo_free_output (h, &resp);
     margo_destroy (h);
 
-    DYAD_LOG_DEBUG (ctx, "[MARGO DTL] margo_send completed.", buflen);
+    DYAD_LOG_DEBUG (ctx, "[MARGO DTL] margo_send completed, buflen: %lu", buflen);
 
     DYAD_C_FUNCTION_END ();
     return rc;
diff --git a/tests/urpc/test_urpc.cpp b/tests/urpc/test_urpc.cpp
deleted file mode 100644
index 4e2dc13c..00000000
--- a/tests/urpc/test_urpc.cpp
+++ /dev/null
@@ -1,60 +0,0 @@
-/************************************************************\
- * Copyright 2021 Lawrence Livermore National Security, LLC
- * (c.f. AUTHORS, NOTICE.LLNS, COPYING)
- *
- * This file is part of the Flux resource manager framework.
- * For details, see https://github.com/flux-framework.
- *
- * SPDX-License-Identifier: LGPL-3.0
-\************************************************************/
-
-#include <cstdlib>
-#include <iostream>
-#include <string>
-
-#include "read_all.h"
-#include "urpc_client.h"
-
-int main (int argc, char **argv)
-{
-    if ((argc != 4) && (argc != 5)) {
-        std::cout << "Usage: " << argv[0]
-                  << " command_str is_json(0|1) server_rank "
-                     "[stdout_result_file]"
-                  << std::endl;
-        return EXIT_FAILURE;
-    }
-
-    std::string cmd = argv[1];
-    int is_json = atoi (argv[2]);
-    uint32_t server_rank = static_cast<uint32_t> (atoi (argv[3]));
-    std::string result;
-
-    if (argc == 5) {
-        result = argv[4];
-    }
-
-    setenv ("URPC_CLIENT_DEBUG", "1", 0);
-
-    std::cout << "Running '" << cmd << "' on the flux broker of rank "
-              << server_rank << std::endl;
-    std::cout << "Result will be written into " << result << " at rank "
-              << urpc_get_my_rank () << std::endl;
-
-    if (is_json) {
-        void *inbuf = NULL;
-        FILE *fp = NULL;
-        fp = fopen (argv[1], "r");
-        if (fp == NULL) {
-            fprintf (stderr, "Unable to open a file: %s\n", argv[1]);
-            exit (-1);
-        }
-        read_all (fileno (fp), &inbuf);
-        fclose (fp);
-        cmd = reinterpret_cast<char *> (inbuf);
-        std::cout << cmd << std::endl;
-    }
-    urpc_client (server_rank, cmd.c_str (), result.c_str (), is_json);
-
-    return EXIT_SUCCESS;
-}