From 8dc34dfb56078f5992ebda178b93ada7ae17cde8 Mon Sep 17 00:00:00 2001
From: chasem <chasem@wustl.edu>
Date: Fri, 17 Apr 2026 15:41:09 -0500
Subject: [PATCH 01/16] adding performance logging

---
 configure_logger.py                           | 88 ++++++++++++-------
 docs/development.md                           | 23 ++++-
 production.yml                                |  4 +-
 tfbpshiny/__main__.py                         | 14 +++
 tfbpshiny/app.py                              | 18 +++-
 tfbpshiny/modules/binding/server/workspace.py | 64 +++++++++-----
 .../modules/comparison/server/workspace.py    | 52 +++++++----
 .../modules/perturbation/server/workspace.py  | 59 +++++++++----
 .../select_datasets/server/workspace.py       | 38 +++++++-
 tfbpshiny/utils/profiler.py                   | 48 ++++++++++
 tfbpshiny/utils/vdb_init.py                   | 52 ++++++++---
 11 files changed, 354 insertions(+), 106 deletions(-)
 create mode 100644 tfbpshiny/utils/profiler.py

diff --git a/configure_logger.py b/configure_logger.py
index 0fb62a0..c8acb22 100644
--- a/configure_logger.py
+++ b/configure_logger.py
@@ -2,6 +2,8 @@
 from enum import Enum
 from typing import Literal
 
+_PROFILE_FORMAT = "%(message)s"
+
 
 class LogLevel(Enum):
     DEBUG = logging.DEBUG
@@ -31,34 +33,22 @@ def from_string(cls, level_str: str) -> Enum:
 
 def configure_logger(
     name: str,
-    level: int = logging.DEBUG,  # Use int type hint here
+    level: int = logging.DEBUG,
     format: str = "%(asctime)s - %(name)s - %(levelname)s - %(message)s",
     handler_type: Literal["console", "file"] = "console",
     log_file: str = "tfbpmodeling.log",
 ) -> logging.Logger:
     """
-    Configures a logger.
-
-    :param name: Name of the logger
-    :type name: str
-    :param level: Logging level, must be one of logging.DEBUG,
-        logging.INFO, logging.WARNING, logging.ERROR
-    :type level: int
-    :param format: Logging format
-    :type format: str
-    :param handler_type: Type of handler, either 'console' or 'file'
-    :type handler_type: Literal["console", "file"]
-    :param log_file: Path to log file, required if handler_type is 'file'.
-        Default is 'tfbpmodeling.log'
-    :type log_file: str
-
-    :return: Configured logger
+    Configure a logger with a single handler.
+
+    :param name: Logger name.
+    :param level: Logging level (``logging.DEBUG``, ``logging.INFO``, etc.).
+    :param format: Log record format string.
+    :param handler_type: Destination — ``"console"`` or ``"file"``.
+    :param log_file: Path used when ``handler_type="file"``.
+    :returns: Configured logger.
     :rtype: logging.Logger
-
-    :raises ValueError: If any of the parameters have invalid datatypes
-
-    example usage:
-    >>> logger = configure_logger("my_logger", level=logging.INFO)
+    :raises ValueError: If any parameter is invalid.
 
     """
     if not isinstance(name, str):
@@ -69,7 +59,7 @@ def configure_logger(
         raise ValueError("Invalid logging level")
     if not isinstance(format, str):
         raise ValueError("format must be a string")
-    if handler_type not in ["console", "file"]:
+    if handler_type not in ("console", "file"):
         raise ValueError("handler_type must be 'console' or 'file'")
     if handler_type == "file" and not log_file:
         raise ValueError("log_file must be specified for file handler")
@@ -77,22 +67,52 @@ def configure_logger(
     logger = logging.getLogger(name)
     logger.setLevel(level)
 
-    # Remove all handlers associated with the logger object to avoid duplicate logs
-    for handler in logger.handlers[:]:
-        logger.removeHandler(handler)
+    # Remove existing handlers to avoid duplicates on re-configuration.
+    for h in logger.handlers[:]:
+        logger.removeHandler(h)
 
     if handler_type == "console":
-        handler = logging.StreamHandler()
-    elif handler_type == "file":
-        if not log_file:
-            raise ValueError("log_file must be specified for file handler")
-        handler = logging.FileHandler(log_file)
+        handler: logging.Handler = logging.StreamHandler()
     else:
-        raise ValueError("Invalid handler_type. Must be 'console' or 'file'.")
+        handler = logging.FileHandler(log_file)
 
     handler.setLevel(level)
-    formatter = logging.Formatter(format)
-    handler.setFormatter(formatter)
+    handler.setFormatter(logging.Formatter(format))
     logger.addHandler(handler)
 
     return logger
+
+
+def configure_profile_logger(
+    handler_type: Literal["console", "file"] = "console",
+    level: int = logging.DEBUG,
+    log_file: str = "tfbpshiny_profile.log",
+    enabled: bool = True,
+) -> logging.Logger:
+    """
+    Configure and return the ``"profiler"`` logger for timing instrumentation.
+
+    The profiler logger uses a bare ``%(message)s`` format because all
+    structure is embedded in the message by
+    :func:`tfbpshiny.utils.profiler.profile_span`.
+    It never propagates to the root or ``"shiny"`` logger.
+
+    :param handler_type: Destination for profile records — ``"console"`` or ``"file"``.
+    :param level: Log level; ignored (set to ``CRITICAL``) when ``enabled=False``.
+    :param log_file: Path used when ``handler_type="file"``.
+    :param enabled: When ``False``, silences the logger by setting its level
+        to ``CRITICAL``.
+    :returns: Configured ``"profiler"`` logger.
+    :rtype: logging.Logger
+
+    """
+    effective_level = level if enabled else logging.CRITICAL
+    logger = configure_logger(
+        "profiler",
+        level=effective_level,
+        format=_PROFILE_FORMAT,
+        handler_type=handler_type,
+        log_file=log_file,
+    )
+    logger.propagate = False
+    return logger
diff --git a/docs/development.md b/docs/development.md
index a0a02f3..ca330d6 100644
--- a/docs/development.md
+++ b/docs/development.md
@@ -45,4 +45,25 @@ callbacks are necessary, then either a different plotting library or a custom
 implementation of some sort will likely be necessary. Would be nice to explore
 other options, including custom implementations of plots using d3 and then creating
 the shiny widget manually. With AI, that might be more achievable (for me at least)
-than it would be otherwise b/c we can give the AI the docs for all three as context.
\ No newline at end of file
+than it would be otherwise b/c we can give the AI the docs for all three as context.
+
+There is both a general log and profiling log
+
+Both loggers write to stdout/stderr, Docker's awslogs driver captures everything into the shinyapp stream under /tfbpshiny/production. To later separate PROFILE lines from main log lines when parsing:
+
+```{python}
+import pandas as pd
+
+df = pd.read_csv("exported.log", sep="|", header=None, skipinitialspace=True,
+                 names=["marker","timestamp","elapsed_s","op","module","dataset","context"])
+profile = df[df["marker"].str.strip() == "PROFILE"]
+```
+
+
+Or in CloudWatch Logs Insights:
+
+```{raw}
+fields @timestamp, @message
+| filter @message like /^PROFILE/
+| parse @message "PROFILE | * | * | * | * | * | *" as timestamp, elapsed_s, op, module, dataset, context
+```
\ No newline at end of file
diff --git a/production.yml b/production.yml
index ede0f0d..44f2335 100644
--- a/production.yml
+++ b/production.yml
@@ -12,6 +12,8 @@ services:
       - HF_HOME=/hf-cache
       - HF_TOKEN
       - DOCKER_ENV
+      - TFBPSHINY_LOG_HANDLER=console
+      - TFBPSHINY_PROFILE_HANDLER=console
     volumes:
       - hf_cache:/hf-cache
     expose:
@@ -62,7 +64,7 @@ services:
       driver: awslogs
       options:
         awslogs-group: /tfbpshiny/production
-        awslogs-region: us-east-1
+        awslogs-region: us-east-2
         awslogs-stream: traefik
         awslogs-create-group: "true"
     restart: unless-stopped
diff --git a/tfbpshiny/__main__.py b/tfbpshiny/__main__.py
index 3b4e78c..cb16872 100644
--- a/tfbpshiny/__main__.py
+++ b/tfbpshiny/__main__.py
@@ -38,6 +38,18 @@ def make_parser() -> argparse.ArgumentParser:
         choices=["console", "file"],
         help="Set log handler type",
     )
+    parser.add_argument(
+        "--profile-handler",
+        type=str,
+        default="console",
+        choices=["console", "file"],
+        help="Handler for the profiler logger",
+    )
+    parser.add_argument(
+        "--no-profile",
+        action="store_true",
+        help="Disable the profiler logger entirely",
+    )
 
     subparsers = parser.add_subparsers(dest="command", required=True)
 
@@ -78,6 +90,8 @@ def main() -> None:
     # convert to int when configuring the logger
     os.environ["TFBPSHINY_LOG_LEVEL"] = str(log_level.value)
     os.environ["TFBPSHINY_LOG_HANDLER"] = args.log_handler
+    os.environ["TFBPSHINY_PROFILE_HANDLER"] = args.profile_handler
+    os.environ["TFBPSHINY_PROFILE_ENABLED"] = "0" if args.no_profile else "1"
     args.func(args)
 
 
diff --git a/tfbpshiny/app.py b/tfbpshiny/app.py
index 60e4eed..7068c21 100644
--- a/tfbpshiny/app.py
+++ b/tfbpshiny/app.py
@@ -9,7 +9,7 @@
 from dotenv import load_dotenv
 from shiny import App, reactive, render, ui
 
-from configure_logger import configure_logger
+from configure_logger import configure_logger, configure_profile_logger
 from tfbpshiny.components import github_badge, nav_button
 from tfbpshiny.modules.binding.server import (
     binding_sidebar_server,
@@ -60,6 +60,14 @@
     log_file=log_file,
 )
 
+profile_logger = configure_profile_logger(
+    handler_type=cast(
+        Literal["console", "file"],
+        os.getenv("TFBPSHINY_PROFILE_HANDLER", "console"),
+    ),
+    enabled=os.getenv("TFBPSHINY_PROFILE_ENABLED", "1") == "1",
+)
+
 # instantiate the virtualDB and compute app-level dataset metadata
 
 virtualdb_config = os.getenv(
@@ -67,7 +75,9 @@
 )
 hf_token: str | None = os.getenv("HF_TOKEN")
 logger.info(f"Loading VirtualDB with config: {virtualdb_config}")
-vdb, app_datasets = initialize_data(virtualdb_config, hf_token)
+vdb, app_datasets = initialize_data(
+    virtualdb_config, hf_token, profile_logger=profile_logger
+)
 
 app_ui = ui.page_fillable(
     ui.include_css((Path(__file__).parent / "app.css").resolve()),
@@ -121,6 +131,7 @@ def app_server(input: Any, output: Any, session: Any) -> None:
         dataset_filters=dataset_filters,
         vdb=vdb,
         logger=logger,
+        profile_logger=profile_logger,
     )
 
     corr_type, col_preference = binding_sidebar_server(
@@ -138,6 +149,7 @@ def app_server(input: Any, output: Any, session: Any) -> None:
         dataset_filters=dataset_filters,
         vdb=vdb,
         logger=logger,
+        profile_logger=profile_logger,
     )
 
     corr_type_p, col_preference_p = perturbation_sidebar_server(
@@ -155,6 +167,7 @@ def app_server(input: Any, output: Any, session: Any) -> None:
         dataset_filters=dataset_filters,
         vdb=vdb,
         logger=logger,
+        profile_logger=profile_logger,
     )
 
     top_n, effect_threshold, pvalue_threshold, facet_by = comparison_sidebar_server(
@@ -175,6 +188,7 @@ def app_server(input: Any, output: Any, session: Any) -> None:
         facet_by=facet_by,
         vdb=vdb,
         logger=logger,
+        profile_logger=profile_logger,
     )
 
     # set the active module when a nav button is clicked
diff --git a/tfbpshiny/modules/binding/server/workspace.py b/tfbpshiny/modules/binding/server/workspace.py
index fcb731b..652b9b8 100644
--- a/tfbpshiny/modules/binding/server/workspace.py
+++ b/tfbpshiny/modules/binding/server/workspace.py
@@ -16,6 +16,7 @@
     get_measurement_column,
     regulator_scatter_sql,
 )
+from tfbpshiny.utils.profiler import profile_span
 from tfbpshiny.utils.vdb_init import get_regulator_display_name
 
 
@@ -30,6 +31,7 @@ def binding_workspace_server(
     dataset_filters: reactive.Value[dict[str, Any]],
     vdb: VirtualDB,
     logger: Logger,
+    profile_logger: Logger,
 ) -> None:
     """
     Render the binding correlation rows: pairwise distributions
@@ -107,17 +109,24 @@ def _all_corr_data() -> dict[tuple[str, str], pd.DataFrame]:
                 logger.debug(
                     f"Correlating {db_a}({col_a}) vs {db_b}({col_b}) ({method})"
                 )
-                result[(db_a, db_b)] = corr_pair_sql(
-                    vdb,
-                    db_a,
-                    col_a,
-                    filters.get(db_a),
-                    db_b,
-                    col_b,
-                    filters.get(db_b),
-                    method,
-                    prefix=f"p{i}_",
-                )
+                with profile_span(
+                    profile_logger,
+                    "vdb.query",
+                    module="binding",
+                    dataset=f"{db_a}x{db_b}",
+                    context="_all_corr_data",
+                ):
+                    result[(db_a, db_b)] = corr_pair_sql(
+                        vdb,
+                        db_a,
+                        col_a,
+                        filters.get(db_a),
+                        db_b,
+                        col_b,
+                        filters.get(db_b),
+                        method,
+                        prefix=f"p{i}_",
+                    )
             except Exception as exc:
                 logger.error(
                     f"Failed to correlate {db_a} vs {db_b}: {exc}", exc_info=True
@@ -249,14 +258,16 @@ def distributions_plot() -> ui.Tag:
             "  });"
             "})();"
         )
-        return ui.HTML(
-            to_html(
-                fig,
-                include_plotlyjs="cdn",
-                full_html=False,
-                post_script=post_script,
+        with profile_span(
+            profile_logger,
+            "plot.build",
+            module="binding",
+            context="distributions_plot",
+        ):
+            html = to_html(
+                fig, include_plotlyjs="cdn", full_html=False, post_script=post_script
             )
-        )
+        return ui.HTML(html)
 
     @render.ui
     def regulator_selector() -> ui.Tag:
@@ -385,7 +396,13 @@ def _strip_reg(f: dict | None) -> dict | None:
                     reg,
                     idx,
                 )
-                merged = vdb.query(scatter_sql, **scatter_params)
+                with profile_span(
+                    profile_logger,
+                    "vdb.query",
+                    module="binding",
+                    context="scatter",
+                ):
+                    merged = vdb.query(scatter_sql, **scatter_params)
                 logger.debug(
                     f"scatter {db_a}/{db_b} reg={reg!r} "
                     f"rows={len(merged)} fa={fa!r} fb={fb!r}"
@@ -447,9 +464,16 @@ def _strip_reg(f: dict | None) -> dict | None:
                 width=400,
                 height=400,
             )
+            with profile_span(
+                profile_logger,
+                "plot.build",
+                module="binding",
+                context="regulator_plot",
+            ):
+                plot_html = to_html(fig, include_plotlyjs=False, full_html=False)
             plot_divs.append(
                 ui.div(
-                    ui.HTML(to_html(fig, include_plotlyjs=False, full_html=False)),
+                    ui.HTML(plot_html),
                     style="flex: 0 0 auto;",
                 )
             )
diff --git a/tfbpshiny/modules/comparison/server/workspace.py b/tfbpshiny/modules/comparison/server/workspace.py
index f71eb0a..a872cb6 100644
--- a/tfbpshiny/modules/comparison/server/workspace.py
+++ b/tfbpshiny/modules/comparison/server/workspace.py
@@ -20,6 +20,7 @@
     PERTURBATION_LABEL_MAP,
     topn_responsive_ratio,
 )
+from tfbpshiny.utils.profiler import profile_span
 from tfbpshiny.utils.vdb_init import get_regulator_display_name
 
 # color palettes
@@ -70,6 +71,7 @@ def comparison_workspace_server(
     facet_by: Callable[[], str],
     vdb: VirtualDB,
     logger: Logger,
+    profile_logger: Logger,
 ) -> None:
     """Render the Top-N by Binding workspace plot."""
 
@@ -133,19 +135,26 @@ def _topn_data() -> pd.DataFrame:
                     p_filters = (
                         None if p_cfg.get("hackett_time_filter") else filters.get(p_db)
                     )
-                    result = topn_responsive_ratio(
-                        vdb=vdb,
-                        binding_view=b_db,
-                        perturbation_view=p_db,
-                        top_n=n,
-                        effect_threshold=eff,
-                        pvalue_threshold=pval,
-                        binding_filters=filters.get(b_db),
-                        perturbation_filters=p_filters,
-                        param_prefix=f"{b_db}_{p_db}",
-                        **b_cfg,
-                        **p_cfg,
-                    )
+                    with profile_span(
+                        profile_logger,
+                        "vdb.query",
+                        module="comparison",
+                        dataset=f"{b_db}x{p_db}",
+                        context="_topn_data",
+                    ):
+                        result = topn_responsive_ratio(
+                            vdb=vdb,
+                            binding_view=b_db,
+                            perturbation_view=p_db,
+                            top_n=n,
+                            effect_threshold=eff,
+                            pvalue_threshold=pval,
+                            binding_filters=filters.get(b_db),
+                            perturbation_filters=p_filters,
+                            param_prefix=f"{b_db}_{p_db}",
+                            **b_cfg,
+                            **p_cfg,
+                        )
                     assert isinstance(result, pd.DataFrame)
                     result["binding_source"] = b_label
                     result["perturbation_source"] = p_label
@@ -162,7 +171,13 @@ def _topn_data() -> pd.DataFrame:
 
         if not results:
             return pd.DataFrame()
-        out = pd.concat(results, ignore_index=True)
+        with profile_span(
+            profile_logger,
+            "df.concat",
+            module="comparison",
+            context="_topn_data",
+        ):
+            out = pd.concat(results, ignore_index=True)
         out["percent_responsive"] = out["responsive_ratio"] * 100
         return out
 
@@ -268,7 +283,14 @@ def topn_plot() -> ui.Tag:
             legend_title=legend_title,
             margin=dict(l=50, r=20, t=80, b=30),
         )
-        return ui.HTML(to_html(fig, include_plotlyjs="cdn", full_html=False))
+        with profile_span(
+            profile_logger,
+            "plot.build",
+            module="comparison",
+            context="topn_plot",
+        ):
+            html = to_html(fig, include_plotlyjs="cdn", full_html=False)
+        return ui.HTML(html)
 
 
 __all__ = ["comparison_workspace_server"]
diff --git a/tfbpshiny/modules/perturbation/server/workspace.py b/tfbpshiny/modules/perturbation/server/workspace.py
index d799979..5218b98 100644
--- a/tfbpshiny/modules/perturbation/server/workspace.py
+++ b/tfbpshiny/modules/perturbation/server/workspace.py
@@ -16,6 +16,7 @@
     get_measurement_column,
     regulator_scatter_sql,
 )
+from tfbpshiny.utils.profiler import profile_span
 from tfbpshiny.utils.vdb_init import get_regulator_display_name
 
 
@@ -30,6 +31,7 @@ def perturbation_workspace_server(
     dataset_filters: reactive.Value[dict[str, Any]],
     vdb: VirtualDB,
     logger: Logger,
+    profile_logger: Logger,
 ) -> None:
     """
     Render the perturbation correlation rows: pairwise distributions
@@ -105,17 +107,24 @@ def _all_corr_data() -> dict[tuple[str, str], pd.DataFrame]:
                 logger.debug(
                     f"Correlating {db_a}({col_a}) vs {db_b}({col_b}) ({method})"
                 )
-                result[(db_a, db_b)] = corr_pair_sql(
-                    vdb,
-                    db_a,
-                    col_a,
-                    filters.get(db_a),
-                    db_b,
-                    col_b,
-                    filters.get(db_b),
-                    method,
-                    prefix=f"p{i}_",
-                )
+                with profile_span(
+                    profile_logger,
+                    "vdb.query",
+                    module="perturbation",
+                    dataset=f"{db_a}x{db_b}",
+                    context="_all_corr_data",
+                ):
+                    result[(db_a, db_b)] = corr_pair_sql(
+                        vdb,
+                        db_a,
+                        col_a,
+                        filters.get(db_a),
+                        db_b,
+                        col_b,
+                        filters.get(db_b),
+                        method,
+                        prefix=f"p{i}_",
+                    )
             except Exception as exc:
                 logger.error(
                     f"Failed to correlate {db_a} vs {db_b}: {exc}", exc_info=True
@@ -247,14 +256,19 @@ def distributions_plot() -> ui.Tag:
             "  });"
             "})();"
         )
-        return ui.HTML(
-            to_html(
+        with profile_span(
+            profile_logger,
+            "plot.build",
+            module="perturbation",
+            context="distributions_plot",
+        ):
+            html = to_html(
                 fig,
                 include_plotlyjs="cdn",
                 full_html=False,
                 post_script=post_script,
             )
-        )
+        return ui.HTML(html)
 
     @render.ui
     def regulator_selector() -> ui.Tag:
@@ -383,7 +397,13 @@ def _strip_reg(f: dict | None) -> dict | None:
                     reg,
                     idx,
                 )
-                merged = vdb.query(scatter_sql, **scatter_params)
+                with profile_span(
+                    profile_logger,
+                    "vdb.query",
+                    module="perturbation",
+                    context="scatter",
+                ):
+                    merged = vdb.query(scatter_sql, **scatter_params)
                 logger.debug(
                     f"scatter {db_a}/{db_b} reg={reg!r} "
                     f"rows={len(merged)} fa={fa!r} fb={fb!r}"
@@ -445,9 +465,16 @@ def _strip_reg(f: dict | None) -> dict | None:
                 width=400,
                 height=400,
             )
+            with profile_span(
+                profile_logger,
+                "plot.build",
+                module="perturbation",
+                context="regulator_plot",
+            ):
+                plot_html = to_html(fig, include_plotlyjs=False, full_html=False)
             plot_divs.append(
                 ui.div(
-                    ui.HTML(to_html(fig, include_plotlyjs=False, full_html=False)),
+                    ui.HTML(plot_html),
                     style="flex: 0 0 auto;",
                 )
             )
diff --git a/tfbpshiny/modules/select_datasets/server/workspace.py b/tfbpshiny/modules/select_datasets/server/workspace.py
index 90042e8..43e7b8d 100644
--- a/tfbpshiny/modules/select_datasets/server/workspace.py
+++ b/tfbpshiny/modules/select_datasets/server/workspace.py
@@ -24,6 +24,7 @@
     diagonal_cell_modal_ui,
     off_diagonal_cell_modal_ui,
 )
+from tfbpshiny.utils.profiler import profile_span
 from tfbpshiny.utils.ratelimit import debounce
 from tfbpshiny.utils.vdb_init import HIDDEN_FILTER_FIELDS
 
@@ -38,6 +39,7 @@ def select_datasets_workspace_server(
     dataset_filters: reactive.Value[dict[str, Any]],
     vdb: VirtualDB,
     logger: Logger,
+    profile_logger: Logger,
 ) -> None:
     """Render the sample-count matrix for all active datasets."""
 
@@ -83,12 +85,26 @@ def _matrix_data() -> dict[str, Any]:
             db_filters = filters.get(db_name)
 
             sql, params = regulator_locus_tags_query(db_name, db_filters)
-            reg_df = vdb.query(sql, **params)
+            with profile_span(
+                profile_logger,
+                "vdb.query",
+                module="select_datasets",
+                dataset=db_name,
+                context="_matrix_data",
+            ):
+                reg_df = vdb.query(sql, **params)
             regulators = set(reg_df["regulator_locus_tag"].dropna().astype(str))
             regulator_sets[db_name] = regulators
 
             sql, params = sample_count_query(db_name, db_filters)
-            n_samples = int(vdb.query(sql, **params).iloc[0, 0])
+            with profile_span(
+                profile_logger,
+                "vdb.query",
+                module="select_datasets",
+                dataset=db_name,
+                context="_matrix_data",
+            ):
+                n_samples = int(vdb.query(sql, **params).iloc[0, 0])
 
             diagonal[db_name] = {"regulators": len(regulators), "samples": n_samples}
 
@@ -105,8 +121,22 @@ def _matrix_data() -> dict[str, Any]:
                 sql_b, params_b = sample_count_query(
                     db_b, filters.get(db_b), restrict_to_regulators=common_list
                 )
-                n_a = int(vdb.query(sql_a, **params_a).iloc[0, 0])
-                n_b = int(vdb.query(sql_b, **params_b).iloc[0, 0])
+                with profile_span(
+                    profile_logger,
+                    "vdb.query",
+                    module="select_datasets",
+                    dataset=f"{db_a}x{db_b}",
+                    context="_matrix_data",
+                ):
+                    n_a = int(vdb.query(sql_a, **params_a).iloc[0, 0])
+                with profile_span(
+                    profile_logger,
+                    "vdb.query",
+                    module="select_datasets",
+                    dataset=f"{db_a}x{db_b}",
+                    context="_matrix_data",
+                ):
+                    n_b = int(vdb.query(sql_b, **params_b).iloc[0, 0])
 
                 cross_dataset[(db_a, db_b)] = {
                     "common_regulators": len(common),
diff --git a/tfbpshiny/utils/profiler.py b/tfbpshiny/utils/profiler.py
new file mode 100644
index 0000000..91b0a30
--- /dev/null
+++ b/tfbpshiny/utils/profiler.py
@@ -0,0 +1,48 @@
+"""Timing context manager for the profiler logger."""
+
+from __future__ import annotations
+
+import time
+from collections.abc import Generator
+from contextlib import contextmanager
+from datetime import datetime, timezone
+from logging import Logger
+
+
+@contextmanager
+def profile_span(
+    logger: Logger,
+    op: str,
+    module: str = "",
+    dataset: str = "",
+    context: str = "",
+) -> Generator[None]:
+    """
+    Time a block of code and emit one fixed-schema pipe-delimited record to *logger*.
+
+    The record always has exactly seven pipe-separated columns so it can be read
+    without regex using ``pandas.read_csv(sep="|")``:
+
+    ::
+
+        PROFILE | timestamp | elapsed_s | op | module | dataset | context
+
+    :param logger: The ``"profiler"`` logger.
+    :param op: Operation label, e.g. ``"vdb.query"`` or ``"plot.build"``.
+    :param module: Shiny module name, e.g. ``"binding"``. Empty for init-time spans.
+    :param dataset: Dataset(s) involved, e.g. ``"harbison"`` or ``"harbison x rossi"``.
+    :param context: Function or plot name providing location, e.g. ``"_all_corr_data"``.
+
+    """
+    t0 = time.perf_counter()
+    try:
+        yield
+    finally:
+        elapsed = time.perf_counter() - t0
+        ts = datetime.now(timezone.utc).isoformat(timespec="milliseconds")
+        logger.debug(
+            " | ".join(["PROFILE", ts, f"{elapsed:.4f}", op, module, dataset, context])
+        )
+
+
+__all__ = ["profile_span"]
diff --git a/tfbpshiny/utils/vdb_init.py b/tfbpshiny/utils/vdb_init.py
index de62974..e177be1 100644
--- a/tfbpshiny/utils/vdb_init.py
+++ b/tfbpshiny/utils/vdb_init.py
@@ -2,11 +2,15 @@
 
 from __future__ import annotations
 
+import logging
 from dataclasses import dataclass
+from logging import Logger
 
 import pandas as pd
 from labretriever import VirtualDB
 
+from tfbpshiny.utils.profiler import profile_span
+
 # Metadata fields to suppress from the filter UI, keyed by db_name.
 # Use "*" for fields hidden across all datasets; use the db_name key for
 # dataset-specific exclusions. The effective hidden set for a given dataset
@@ -140,7 +144,9 @@ def _filter_hackett_views(vdb: VirtualDB) -> None:
         )
 
 
-def ensure_hackett_analysis_set(vdb: VirtualDB) -> None:
+def ensure_hackett_analysis_set(
+    vdb: VirtualDB, profile_logger: Logger | None = None
+) -> None:
     """
     Build the ``hackett_analysis_set`` table and permanently filter the ``hackett_meta``
     and ``hackett`` views to include only those samples.
@@ -148,9 +154,12 @@ def ensure_hackett_analysis_set(vdb: VirtualDB) -> None:
     Safe to call multiple times; uses ``CREATE OR REPLACE``.
 
     :param vdb: The application VirtualDB instance.
+    :param profile_logger: Optional profiler logger; timing is skipped when ``None``.
 
     """
-    vdb._conn.execute(_HACKETT_ANALYSIS_SET_SQL)
+    _pl = profile_logger or logging.getLogger("profiler")
+    with profile_span(_pl, "init.hackett_set"):
+        vdb._conn.execute(_HACKETT_ANALYSIS_SET_SQL)
     _filter_hackett_views(vdb)
 
 
@@ -174,7 +183,9 @@ def ensure_hackett_analysis_set(vdb: VirtualDB) -> None:
 """
 
 
-def _build_regulator_display_names(vdb: VirtualDB) -> None:
+def _build_regulator_display_names(
+    vdb: VirtualDB, profile_logger: Logger | None = None
+) -> None:
     """
     Build the ``regulator_display_names`` DuckDB table from all dataset meta views.
 
@@ -184,6 +195,7 @@ def _build_regulator_display_names(vdb: VirtualDB) -> None:
     non-empty symbol different from the tag is present; otherwise it equals the tag.
 
     :param vdb: The application VirtualDB instance.
+    :param profile_logger: Optional profiler logger; timing is skipped when ``None``.
 
     """
     db_names = [
@@ -201,12 +213,15 @@ def _build_regulator_display_names(vdb: VirtualDB) -> None:
         table=_REGULATOR_DISPLAY_NAME_TABLE,
         union_sql=union_sql,
     )
-    vdb._conn.execute(sql)
+    _pl = profile_logger or logging.getLogger("profiler")
+    with profile_span(_pl, "init.regulator_table"):
+        vdb._conn.execute(sql)
 
 
 def get_regulator_display_name(
     vdb: VirtualDB,
     locus_tags: list[str] | None = None,
+    profile_logger: Logger | None = None,
 ) -> pd.DataFrame:
     """
     Return a DataFrame of regulator display names from the pre-built lookup table.
@@ -219,13 +234,22 @@ def get_regulator_display_name(
     :rtype: pandas.DataFrame
 
     """
-    if locus_tags is None:
-        return vdb._conn.execute(f"SELECT * FROM {_REGULATOR_DISPLAY_NAME_TABLE}").df()
-    return vdb._conn.execute(
-        f"SELECT * FROM {_REGULATOR_DISPLAY_NAME_TABLE} "
-        f"WHERE regulator_locus_tag = ANY(?)",
-        [locus_tags],
-    ).df()
+    _pl = profile_logger or logging.getLogger("profiler")
+    with profile_span(
+        _pl,
+        "vdb.execute",
+        dataset=_REGULATOR_DISPLAY_NAME_TABLE,
+        context="get_regulator_display_name",
+    ):
+        if locus_tags is None:
+            return vdb._conn.execute(
+                f"SELECT * FROM {_REGULATOR_DISPLAY_NAME_TABLE}"
+            ).df()
+        return vdb._conn.execute(
+            f"SELECT * FROM {_REGULATOR_DISPLAY_NAME_TABLE} "
+            f"WHERE regulator_locus_tag = ANY(?)",
+            [locus_tags],
+        ).df()
 
 
 @dataclass
@@ -252,19 +276,21 @@ class AppDatasets:
 def initialize_data(
     virtualdb_config: str,
     hf_token: str | None = None,
+    profile_logger: Logger | None = None,
 ) -> tuple[VirtualDB, AppDatasets]:
     """
     Construct the VirtualDB, run one-time setup, and compute app-level dataset metadata.
 
     :param virtualdb_config: Path to the VirtualDB YAML config file.
     :param hf_token: Optional HuggingFace token for private repo access.
+    :param profile_logger: Optional profiler logger for timing instrumentation.
     :returns: Tuple of ``(vdb, app_datasets)``.
     :rtype: tuple[VirtualDB, AppDatasets]
 
     """
     vdb = VirtualDB(virtualdb_config, token=hf_token)
-    ensure_hackett_analysis_set(vdb)
-    _build_regulator_display_names(vdb)
+    ensure_hackett_analysis_set(vdb, profile_logger=profile_logger)
+    _build_regulator_display_names(vdb, profile_logger=profile_logger)
 
     condition_cols: dict[str, list[str]] = {}
     upstream_cols: dict[str, list[str]] = {}

From 09f863b9b9d9d1274b462431a4f3f0a35ad34144 Mon Sep 17 00:00:00 2001
From: chasem <chasem@wustl.edu>
Date: Fri, 17 Apr 2026 16:09:48 -0500
Subject: [PATCH 02/16] adding session id to track unique visits

---
 tfbpshiny/app.py                              |  9 +++++
 tfbpshiny/modules/binding/server/workspace.py |  5 +++
 .../modules/comparison/server/workspace.py    |  4 ++
 .../modules/perturbation/server/workspace.py  |  5 +++
 .../select_datasets/server/workspace.py       |  5 +++
 tfbpshiny/utils/profiler.py                   | 39 +++++++++++++++++--
 6 files changed, 63 insertions(+), 4 deletions(-)

diff --git a/tfbpshiny/app.py b/tfbpshiny/app.py
index 7068c21..c73634f 100644
--- a/tfbpshiny/app.py
+++ b/tfbpshiny/app.py
@@ -41,6 +41,7 @@
     selection_matrix_ui,
     selection_sidebar_ui,
 )
+from tfbpshiny.utils.profiler import log_session_event
 from tfbpshiny.utils.vdb_init import initialize_data
 
 if not os.getenv("DOCKER_ENV"):
@@ -110,6 +111,10 @@
 def app_server(input: Any, output: Any, session: Any) -> None:
     """Create shared reactive state and call all module servers."""
 
+    sid: str = session.id
+    log_session_event(profile_logger, "START", sid)
+    session.on_ended(lambda: log_session_event(profile_logger, "END", sid))
+
     # this stores the name of the currently active module, ie
     # "home", "selection", "binding", "perturbation", or "comparison"
     active_module: reactive.Value[str] = reactive.value("home")
@@ -132,6 +137,7 @@ def app_server(input: Any, output: Any, session: Any) -> None:
         vdb=vdb,
         logger=logger,
         profile_logger=profile_logger,
+        session_id=sid,
     )
 
     corr_type, col_preference = binding_sidebar_server(
@@ -150,6 +156,7 @@ def app_server(input: Any, output: Any, session: Any) -> None:
         vdb=vdb,
         logger=logger,
         profile_logger=profile_logger,
+        session_id=sid,
     )
 
     corr_type_p, col_preference_p = perturbation_sidebar_server(
@@ -168,6 +175,7 @@ def app_server(input: Any, output: Any, session: Any) -> None:
         vdb=vdb,
         logger=logger,
         profile_logger=profile_logger,
+        session_id=sid,
     )
 
     top_n, effect_threshold, pvalue_threshold, facet_by = comparison_sidebar_server(
@@ -189,6 +197,7 @@ def app_server(input: Any, output: Any, session: Any) -> None:
         vdb=vdb,
         logger=logger,
         profile_logger=profile_logger,
+        session_id=sid,
     )
 
     # set the active module when a nav button is clicked
diff --git a/tfbpshiny/modules/binding/server/workspace.py b/tfbpshiny/modules/binding/server/workspace.py
index 652b9b8..9a8a5b3 100644
--- a/tfbpshiny/modules/binding/server/workspace.py
+++ b/tfbpshiny/modules/binding/server/workspace.py
@@ -32,6 +32,7 @@ def binding_workspace_server(
     vdb: VirtualDB,
     logger: Logger,
     profile_logger: Logger,
+    session_id: str = "",
 ) -> None:
     """
     Render the binding correlation rows: pairwise distributions
@@ -115,6 +116,7 @@ def _all_corr_data() -> dict[tuple[str, str], pd.DataFrame]:
                     module="binding",
                     dataset=f"{db_a}x{db_b}",
                     context="_all_corr_data",
+                    session_id=session_id,
                 ):
                     result[(db_a, db_b)] = corr_pair_sql(
                         vdb,
@@ -263,6 +265,7 @@ def distributions_plot() -> ui.Tag:
             "plot.build",
             module="binding",
             context="distributions_plot",
+            session_id=session_id,
         ):
             html = to_html(
                 fig, include_plotlyjs="cdn", full_html=False, post_script=post_script
@@ -401,6 +404,7 @@ def _strip_reg(f: dict | None) -> dict | None:
                     "vdb.query",
                     module="binding",
                     context="scatter",
+                    session_id=session_id,
                 ):
                     merged = vdb.query(scatter_sql, **scatter_params)
                 logger.debug(
@@ -469,6 +473,7 @@ def _strip_reg(f: dict | None) -> dict | None:
                 "plot.build",
                 module="binding",
                 context="regulator_plot",
+                session_id=session_id,
             ):
                 plot_html = to_html(fig, include_plotlyjs=False, full_html=False)
             plot_divs.append(
diff --git a/tfbpshiny/modules/comparison/server/workspace.py b/tfbpshiny/modules/comparison/server/workspace.py
index a872cb6..28e0c8d 100644
--- a/tfbpshiny/modules/comparison/server/workspace.py
+++ b/tfbpshiny/modules/comparison/server/workspace.py
@@ -72,6 +72,7 @@ def comparison_workspace_server(
     vdb: VirtualDB,
     logger: Logger,
     profile_logger: Logger,
+    session_id: str = "",
 ) -> None:
     """Render the Top-N by Binding workspace plot."""
 
@@ -141,6 +142,7 @@ def _topn_data() -> pd.DataFrame:
                         module="comparison",
                         dataset=f"{b_db}x{p_db}",
                         context="_topn_data",
+                        session_id=session_id,
                     ):
                         result = topn_responsive_ratio(
                             vdb=vdb,
@@ -176,6 +178,7 @@ def _topn_data() -> pd.DataFrame:
             "df.concat",
             module="comparison",
             context="_topn_data",
+            session_id=session_id,
         ):
             out = pd.concat(results, ignore_index=True)
         out["percent_responsive"] = out["responsive_ratio"] * 100
@@ -288,6 +291,7 @@ def topn_plot() -> ui.Tag:
             "plot.build",
             module="comparison",
             context="topn_plot",
+            session_id=session_id,
         ):
             html = to_html(fig, include_plotlyjs="cdn", full_html=False)
         return ui.HTML(html)
diff --git a/tfbpshiny/modules/perturbation/server/workspace.py b/tfbpshiny/modules/perturbation/server/workspace.py
index 5218b98..ebfc249 100644
--- a/tfbpshiny/modules/perturbation/server/workspace.py
+++ b/tfbpshiny/modules/perturbation/server/workspace.py
@@ -32,6 +32,7 @@ def perturbation_workspace_server(
     vdb: VirtualDB,
     logger: Logger,
     profile_logger: Logger,
+    session_id: str = "",
 ) -> None:
     """
     Render the perturbation correlation rows: pairwise distributions
@@ -113,6 +114,7 @@ def _all_corr_data() -> dict[tuple[str, str], pd.DataFrame]:
                     module="perturbation",
                     dataset=f"{db_a}x{db_b}",
                     context="_all_corr_data",
+                    session_id=session_id,
                 ):
                     result[(db_a, db_b)] = corr_pair_sql(
                         vdb,
@@ -261,6 +263,7 @@ def distributions_plot() -> ui.Tag:
             "plot.build",
             module="perturbation",
             context="distributions_plot",
+            session_id=session_id,
         ):
             html = to_html(
                 fig,
@@ -402,6 +405,7 @@ def _strip_reg(f: dict | None) -> dict | None:
                     "vdb.query",
                     module="perturbation",
                     context="scatter",
+                    session_id=session_id,
                 ):
                     merged = vdb.query(scatter_sql, **scatter_params)
                 logger.debug(
@@ -470,6 +474,7 @@ def _strip_reg(f: dict | None) -> dict | None:
                 "plot.build",
                 module="perturbation",
                 context="regulator_plot",
+                session_id=session_id,
             ):
                 plot_html = to_html(fig, include_plotlyjs=False, full_html=False)
             plot_divs.append(
diff --git a/tfbpshiny/modules/select_datasets/server/workspace.py b/tfbpshiny/modules/select_datasets/server/workspace.py
index 43e7b8d..14e05b4 100644
--- a/tfbpshiny/modules/select_datasets/server/workspace.py
+++ b/tfbpshiny/modules/select_datasets/server/workspace.py
@@ -40,6 +40,7 @@ def select_datasets_workspace_server(
     vdb: VirtualDB,
     logger: Logger,
     profile_logger: Logger,
+    session_id: str = "",
 ) -> None:
     """Render the sample-count matrix for all active datasets."""
 
@@ -91,6 +92,7 @@ def _matrix_data() -> dict[str, Any]:
                 module="select_datasets",
                 dataset=db_name,
                 context="_matrix_data",
+                session_id=session_id,
             ):
                 reg_df = vdb.query(sql, **params)
             regulators = set(reg_df["regulator_locus_tag"].dropna().astype(str))
@@ -103,6 +105,7 @@ def _matrix_data() -> dict[str, Any]:
                 module="select_datasets",
                 dataset=db_name,
                 context="_matrix_data",
+                session_id=session_id,
             ):
                 n_samples = int(vdb.query(sql, **params).iloc[0, 0])
 
@@ -127,6 +130,7 @@ def _matrix_data() -> dict[str, Any]:
                     module="select_datasets",
                     dataset=f"{db_a}x{db_b}",
                     context="_matrix_data",
+                    session_id=session_id,
                 ):
                     n_a = int(vdb.query(sql_a, **params_a).iloc[0, 0])
                 with profile_span(
@@ -135,6 +139,7 @@ def _matrix_data() -> dict[str, Any]:
                     module="select_datasets",
                     dataset=f"{db_a}x{db_b}",
                     context="_matrix_data",
+                    session_id=session_id,
                 ):
                     n_b = int(vdb.query(sql_b, **params_b).iloc[0, 0])
 
diff --git a/tfbpshiny/utils/profiler.py b/tfbpshiny/utils/profiler.py
index 91b0a30..ece6fc5 100644
--- a/tfbpshiny/utils/profiler.py
+++ b/tfbpshiny/utils/profiler.py
@@ -16,22 +16,25 @@ def profile_span(
     module: str = "",
     dataset: str = "",
     context: str = "",
+    session_id: str = "",
 ) -> Generator[None]:
     """
     Time a block of code and emit one fixed-schema pipe-delimited record to *logger*.
 
-    The record always has exactly seven pipe-separated columns so it can be read
+    The record always has exactly eight pipe-separated columns so it can be read
     without regex using ``pandas.read_csv(sep="|")``:
 
     ::
 
-        PROFILE | timestamp | elapsed_s | op | module | dataset | context
+        PROFILE | timestamp | elapsed_s | op | module | dataset | context | session_id
 
     :param logger: The ``"profiler"`` logger.
     :param op: Operation label, e.g. ``"vdb.query"`` or ``"plot.build"``.
     :param module: Shiny module name, e.g. ``"binding"``. Empty for init-time spans.
     :param dataset: Dataset(s) involved, e.g. ``"harbison"`` or ``"harbison x rossi"``.
     :param context: Function or plot name providing location, e.g. ``"_all_corr_data"``.
+    :param session_id: Shiny session ID, used to correlate spans and count concurrent
+        users. Empty for init-time spans that run outside a session.
 
     """
     t0 = time.perf_counter()
@@ -41,8 +44,36 @@ def profile_span(
         elapsed = time.perf_counter() - t0
         ts = datetime.now(timezone.utc).isoformat(timespec="milliseconds")
         logger.debug(
-            " | ".join(["PROFILE", ts, f"{elapsed:.4f}", op, module, dataset, context])
+            " | ".join(
+                [
+                    "PROFILE",
+                    ts,
+                    f"{elapsed:.4f}",
+                    op,
+                    module,
+                    dataset,
+                    context,
+                    session_id,
+                ]
+            )
         )
 
 
-__all__ = ["profile_span"]
+def log_session_event(logger: Logger, event: str, session_id: str) -> None:
+    """
+    Emit a SESSION_START or SESSION_END record to *logger*.
+
+    Record format (five pipe-separated columns)::
+
+        SESSION | timestamp | event | session_id
+
+    :param logger: The ``"profiler"`` logger.
+    :param event: ``"START"`` or ``"END"``.
+    :param session_id: Shiny session ID.
+
+    """
+    ts = datetime.now(timezone.utc).isoformat(timespec="milliseconds")
+    logger.debug(" | ".join(["SESSION", ts, event, session_id]))
+
+
+__all__ = ["profile_span", "log_session_event"]

From 2f3b1bb5ada8b5696482f7395375641c78a7cdbb Mon Sep 17 00:00:00 2001
From: chasem <chasem@wustl.edu>
Date: Fri, 17 Apr 2026 16:28:27 -0500
Subject: [PATCH 03/16] updating terraform to add cloudwatch dashboard

---
 terraform/cloudwatch.tf | 166 ++++++++++++++++++++++++++++++++++++++++
 terraform/main.tf       |  10 +++
 2 files changed, 176 insertions(+)
 create mode 100644 terraform/cloudwatch.tf

diff --git a/terraform/cloudwatch.tf b/terraform/cloudwatch.tf
new file mode 100644
index 0000000..1a234d0
--- /dev/null
+++ b/terraform/cloudwatch.tf
@@ -0,0 +1,166 @@
+locals {
+  log_group  = "/tfbpshiny/production"
+  log_stream = "shinyapp"
+  namespace  = "TFBPShiny"
+}
+
+# ---------------------------------------------------------------------------
+# Metric filters — extract numeric values from pipe-delimited log records
+# ---------------------------------------------------------------------------
+
+# Extracts elapsed_s from every PROFILE record.
+# Pattern captures the second numeric field after "PROFILE | <ts> | ".
+resource "aws_cloudwatch_log_metric_filter" "profile_latency" {
+  name           = "tfbpshiny-profile-latency"
+  log_group_name = local.log_group
+  pattern        = "[marker=PROFILE, ts, elapsed_s, op, module, dataset, context, sid]"
+
+  metric_transformation {
+    name          = "ProfileLatencySeconds"
+    namespace     = local.namespace
+    value         = "$elapsed_s"
+    default_value = 0
+    unit          = "Seconds"
+  }
+}
+
+# Counts SESSION START records — one per new user connection.
+resource "aws_cloudwatch_log_metric_filter" "session_start" {
+  name           = "tfbpshiny-session-start"
+  log_group_name = local.log_group
+  pattern        = "[marker=SESSION, ts, event=START, sid]"
+
+  metric_transformation {
+    name          = "SessionStart"
+    namespace     = local.namespace
+    value         = "1"
+    default_value = 0
+    unit          = "Count"
+  }
+}
+
+# ---------------------------------------------------------------------------
+# Alarms
+# ---------------------------------------------------------------------------
+
+# Fires when avg vdb query latency exceeds 30 s over a 15-minute window.
+# Threshold is intentionally high while the app is known to be slow —
+# lower it toward 5–10 s once query performance improves.
+resource "aws_cloudwatch_metric_alarm" "high_latency" {
+  alarm_name          = "tfbpshiny-high-latency"
+  alarm_description   = "Avg PROFILE latency > 30 s over 15 min — investigate vdb query bottlenecks."
+  namespace           = local.namespace
+  metric_name         = "ProfileLatencySeconds"
+  statistic           = "Average"
+  period              = 900  # 15 minutes
+  evaluation_periods  = 1
+  threshold           = 30
+  comparison_operator = "GreaterThanThreshold"
+  treat_missing_data  = "notBreaching"
+}
+
+# Fires when more than 20 sessions start within a 5-minute window.
+# Adjust threshold based on observed normal traffic once the app is live.
+resource "aws_cloudwatch_metric_alarm" "high_session_count" {
+  alarm_name          = "tfbpshiny-high-session-count"
+  alarm_description   = "More than 20 new sessions in 5 min — unexpected traffic spike."
+  namespace           = local.namespace
+  metric_name         = "SessionStart"
+  statistic           = "Sum"
+  period              = 300  # 5 minutes
+  evaluation_periods  = 1
+  threshold           = 20
+  comparison_operator = "GreaterThanThreshold"
+  treat_missing_data  = "notBreaching"
+}
+
+# ---------------------------------------------------------------------------
+# Dashboard
+# ---------------------------------------------------------------------------
+
+resource "aws_cloudwatch_dashboard" "tfbpshiny" {
+  dashboard_name = "tfbpshiny-production"
+
+  dashboard_body = jsonencode({
+    widgets = [
+      {
+        type   = "log"
+        x      = 0
+        y      = 0
+        width  = 24
+        height = 6
+        properties = {
+          title         = "Avg and max latency by operation"
+          region        = var.aws_region
+          view          = "table"
+          logGroupNames = [local.log_group]
+          queryString   ="fields @timestamp, @message | filter @message like /^PROFILE/ | parse @message 'PROFILE | * | * | * | * | * | * | *' as ts, elapsed_s, op, module, dataset, context, sid | stats avg(elapsed_s) as avg_s, max(elapsed_s) as max_s, count() as n by op, module, context | sort avg_s desc"
+        }
+      },
+      {
+        type   = "log"
+        x      = 0
+        y      = 6
+        width  = 12
+        height = 6
+        properties = {
+          title         = "Concurrent sessions started (5-min bins)"
+          region        = var.aws_region
+          view          = "timeSeries"
+          logGroupNames = [local.log_group]
+          queryString   ="fields @timestamp, @message | filter @message like /^SESSION/ | filter @message like /START/ | parse @message 'SESSION | * | * | *' as ts, event, sid | stats count(sid) as sessions_started by bin(5m) | sort @timestamp asc"
+        }
+      },
+      {
+        type   = "log"
+        x      = 12
+        y      = 6
+        width  = 12
+        height = 6
+        properties = {
+          title         = "Unique visitors per day"
+          region        = var.aws_region
+          view          = "table"
+          logGroupNames = [local.log_group]
+          queryString   ="fields @timestamp, @message | filter @message like /^SESSION/ | filter @message like /START/ | parse @message 'SESSION | * | * | *' as ts, event, sid | stats count_distinct(sid) as unique_visitors by datefloor(@timestamp, 1d) | sort datefloor_timestamp asc"
+        }
+      },
+      {
+        type   = "log"
+        x      = 0
+        y      = 12
+        width  = 24
+        height = 6
+        properties = {
+          title         = "Latency by operation over time (5-min bins)"
+          region        = var.aws_region
+          view          = "timeSeries"
+          logGroupNames = [local.log_group]
+          queryString   ="fields @timestamp, @message | filter @message like /^PROFILE/ | parse @message 'PROFILE | * | * | * | * | * | * | *' as ts, elapsed_s, op, module, dataset, context, sid | stats avg(elapsed_s) as avg_s by bin(5m), op | sort @timestamp asc"
+        }
+      },
+      {
+        type   = "alarm"
+        x      = 0
+        y      = 18
+        width  = 12
+        height = 3
+        properties = {
+          title  = "High latency alarm"
+          alarms = [aws_cloudwatch_metric_alarm.high_latency.arn]
+        }
+      },
+      {
+        type   = "alarm"
+        x      = 12
+        y      = 18
+        width  = 12
+        height = 3
+        properties = {
+          title  = "High session count alarm"
+          alarms = [aws_cloudwatch_metric_alarm.high_session_count.arn]
+        }
+      },
+    ]
+  })
+}
diff --git a/terraform/main.tf b/terraform/main.tf
index 7c841aa..20cf7e6 100644
--- a/terraform/main.tf
+++ b/terraform/main.tf
@@ -102,6 +102,16 @@ resource "aws_instance" "tfbpshiny" {
 
   user_data = file("${path.module}/user_data.sh")
 
+  # NOTE: ami and user_data are pinned via ignore_changes. This means that
+  # if the Amazon Linux 2023 AMI is updated or user_data.sh is modified,
+  # terraform apply will NOT replace the instance. This is intentional —
+  # both changes force instance termination, which would take down production.
+  # To deploy a new AMI or updated user_data, terminate the instance manually,
+  # remove this lifecycle block, and run terraform apply to reprovision.
+  lifecycle {
+    ignore_changes = [ami, user_data]
+  }
+
   tags = {
     Name = "tfbpshiny-production"
   }

From 8d18802889b1b9da024e73fc802777c908a0209d Mon Sep 17 00:00:00 2001
From: chasem <chasem@wustl.edu>
Date: Fri, 17 Apr 2026 16:51:42 -0500
Subject: [PATCH 04/16] removing traefik from shiny log

---
 production.yml          |   2 +-
 terraform/cloudwatch.tf | 152 +++++++++++++++++++++++++++++-----------
 2 files changed, 114 insertions(+), 40 deletions(-)

diff --git a/production.yml b/production.yml
index 44f2335..2585d92 100644
--- a/production.yml
+++ b/production.yml
@@ -63,7 +63,7 @@ services:
     logging:
       driver: awslogs
       options:
-        awslogs-group: /tfbpshiny/production
+        awslogs-group: /tfbpshiny/production/traefik
         awslogs-region: us-east-2
         awslogs-stream: traefik
         awslogs-create-group: "true"
diff --git a/terraform/cloudwatch.tf b/terraform/cloudwatch.tf
index 1a234d0..e0b3c0a 100644
--- a/terraform/cloudwatch.tf
+++ b/terraform/cloudwatch.tf
@@ -1,34 +1,79 @@
 locals {
-  log_group  = "/tfbpshiny/production"
-  log_stream = "shinyapp"
-  namespace  = "TFBPShiny"
+  log_group_app     = "/tfbpshiny/production"
+  log_group_traefik = "/tfbpshiny/production/traefik"
+  namespace         = "TFBPShiny"
+}
+
+# ---------------------------------------------------------------------------
+# Log groups
+# ---------------------------------------------------------------------------
+# Declaring these explicitly so Terraform owns retention/lifecycle. If either
+# already exists in the account, import it with:
+#   terraform import aws_cloudwatch_log_group.app /tfbpshiny/production
+#   terraform import aws_cloudwatch_log_group.traefik /tfbpshiny/production/traefik
+#
+# NOTE: After applying, update the Docker awslogs driver for the Traefik
+# container to use awslogs-group=/tfbpshiny/production/traefik so its noisy
+# network-discovery warnings stop polluting the Shiny app's log group.
+
+resource "aws_cloudwatch_log_group" "app" {
+  name              = local.log_group_app
+  retention_in_days = 30
+}
+
+resource "aws_cloudwatch_log_group" "traefik" {
+  name              = local.log_group_traefik
+  retention_in_days = 14
 }
 
 # ---------------------------------------------------------------------------
 # Metric filters — extract numeric values from pipe-delimited log records
 # ---------------------------------------------------------------------------
+# Bracket-pattern filters tokenize on whitespace, so every pipe character is
+# its own token (p1..p7). Field positions below reflect that.
+#
+# Record format emitted by the app:
+#   PROFILE | <ts> | <elapsed_s> | <op> | <module> | <dataset> | <context> | <sid>
+#   SESSION | <ts> | START|END   | <sid>
+#
+# default_value is intentionally omitted. If it were set to 0, every minute
+# without a PROFILE event would emit a zero, dragging the latency average
+# toward zero and suppressing the alarm during low traffic.
 
-# Extracts elapsed_s from every PROFILE record.
-# Pattern captures the second numeric field after "PROFILE | <ts> | ".
+# All PROFILE records — used for dashboard widgets that show latency across
+# every operation. Not used for the vdb.query-specific alarm.
 resource "aws_cloudwatch_log_metric_filter" "profile_latency" {
   name           = "tfbpshiny-profile-latency"
-  log_group_name = local.log_group
-  pattern        = "[marker=PROFILE, ts, elapsed_s, op, module, dataset, context, sid]"
+  log_group_name = aws_cloudwatch_log_group.app.name
+  pattern        = "[marker=PROFILE, p1, ts, p2, elapsed_s, p3, op, p4, module, p5, dataset, p6, context, p7, sid]"
 
   metric_transformation {
-    name          = "ProfileLatencySeconds"
-    namespace     = local.namespace
-    value         = "$elapsed_s"
-    default_value = 0
-    unit          = "Seconds"
+    name      = "ProfileLatencySeconds"
+    namespace = local.namespace
+    value     = "$elapsed_s"
+    unit      = "Seconds"
+  }
+}
+
+# Narrow filter that only matches op=vdb.query. Drives the latency alarm.
+resource "aws_cloudwatch_log_metric_filter" "vdb_query_latency" {
+  name           = "tfbpshiny-vdb-query-latency"
+  log_group_name = aws_cloudwatch_log_group.app.name
+  pattern        = "[marker=PROFILE, p1, ts, p2, elapsed_s, p3, op=vdb.query, p4, module, p5, dataset, p6, context, p7, sid]"
+
+  metric_transformation {
+    name      = "VdbQueryLatencySeconds"
+    namespace = local.namespace
+    value     = "$elapsed_s"
+    unit      = "Seconds"
   }
 }
 
 # Counts SESSION START records — one per new user connection.
 resource "aws_cloudwatch_log_metric_filter" "session_start" {
   name           = "tfbpshiny-session-start"
-  log_group_name = local.log_group
-  pattern        = "[marker=SESSION, ts, event=START, sid]"
+  log_group_name = aws_cloudwatch_log_group.app.name
+  pattern        = "[marker=SESSION, p1, ts, p2, event=START, p3, sid]"
 
   metric_transformation {
     name          = "SessionStart"
@@ -42,19 +87,19 @@ resource "aws_cloudwatch_log_metric_filter" "session_start" {
 # ---------------------------------------------------------------------------
 # Alarms
 # ---------------------------------------------------------------------------
-
-# Fires when avg vdb query latency exceeds 30 s over a 15-minute window.
-# Threshold is intentionally high while the app is known to be slow —
-# lower it toward 5–10 s once query performance improves.
-resource "aws_cloudwatch_metric_alarm" "high_latency" {
-  alarm_name          = "tfbpshiny-high-latency"
-  alarm_description   = "Avg PROFILE latency > 30 s over 15 min — investigate vdb query bottlenecks."
+# Fires when avg vdb.query latency exceeds 5 s over 15 min (3 × 5-min periods).
+# Raise the threshold to 10–30 s while the app is known to be slow; drop it
+# toward 2–5 s once query performance improves.
+resource "aws_cloudwatch_metric_alarm" "high_vdb_query_latency" {
+  alarm_name          = "tfbpshiny-high-vdb-query-latency"
+  alarm_description   = "Avg vdb.query latency > 5 s over 15 min — investigate vector DB bottlenecks."
   namespace           = local.namespace
-  metric_name         = "ProfileLatencySeconds"
+  metric_name         = "VdbQueryLatencySeconds"
   statistic           = "Average"
-  period              = 900  # 15 minutes
-  evaluation_periods  = 1
-  threshold           = 30
+  period              = 300
+  evaluation_periods  = 3
+  datapoints_to_alarm = 3
+  threshold           = 5
   comparison_operator = "GreaterThanThreshold"
   treat_missing_data  = "notBreaching"
 }
@@ -67,7 +112,7 @@ resource "aws_cloudwatch_metric_alarm" "high_session_count" {
   namespace           = local.namespace
   metric_name         = "SessionStart"
   statistic           = "Sum"
-  period              = 300  # 5 minutes
+  period              = 300
   evaluation_periods  = 1
   threshold           = 20
   comparison_operator = "GreaterThanThreshold"
@@ -77,10 +122,13 @@ resource "aws_cloudwatch_metric_alarm" "high_session_count" {
 # ---------------------------------------------------------------------------
 # Dashboard
 # ---------------------------------------------------------------------------
+# All Logs Insights queries target only the app log group (Traefik lives
+# elsewhere now), so the PROFILE/SESSION filters aren't competing with
+# reverse-proxy noise. Parse patterns use the exact pipe-with-spaces format
+# the app emits; if you ever change that format, update these in lockstep.
 
 resource "aws_cloudwatch_dashboard" "tfbpshiny" {
   dashboard_name = "tfbpshiny-production"
-
   dashboard_body = jsonencode({
     widgets = [
       {
@@ -93,8 +141,14 @@ resource "aws_cloudwatch_dashboard" "tfbpshiny" {
           title         = "Avg and max latency by operation"
           region        = var.aws_region
           view          = "table"
-          logGroupNames = [local.log_group]
-          queryString   ="fields @timestamp, @message | filter @message like /^PROFILE/ | parse @message 'PROFILE | * | * | * | * | * | * | *' as ts, elapsed_s, op, module, dataset, context, sid | stats avg(elapsed_s) as avg_s, max(elapsed_s) as max_s, count() as n by op, module, context | sort avg_s desc"
+          logGroupNames = [local.log_group_app]
+          queryString   = <<-EOT
+            fields @timestamp, @message
+            | filter @message like /^PROFILE \|/
+            | parse @message "PROFILE | * | * | * | * | * | * | *" as ts, elapsed_s, op, module, dataset, context, sid
+            | stats avg(elapsed_s) as avg_s, max(elapsed_s) as max_s, count() as n by op, module, context
+            | sort avg_s desc
+          EOT
         }
       },
       {
@@ -104,11 +158,18 @@ resource "aws_cloudwatch_dashboard" "tfbpshiny" {
         width  = 12
         height = 6
         properties = {
-          title         = "Concurrent sessions started (5-min bins)"
+          title         = "Sessions started (5-min bins)"
           region        = var.aws_region
           view          = "timeSeries"
-          logGroupNames = [local.log_group]
-          queryString   ="fields @timestamp, @message | filter @message like /^SESSION/ | filter @message like /START/ | parse @message 'SESSION | * | * | *' as ts, event, sid | stats count(sid) as sessions_started by bin(5m) | sort @timestamp asc"
+          logGroupNames = [local.log_group_app]
+          queryString   = <<-EOT
+            fields @timestamp, @message
+            | filter @message like /^SESSION \|/
+            | parse @message "SESSION | * | * | *" as ts, event, sid
+            | filter event = "START"
+            | stats count(*) as sessions_started by bin(5m)
+            | sort @timestamp asc
+          EOT
         }
       },
       {
@@ -120,9 +181,16 @@ resource "aws_cloudwatch_dashboard" "tfbpshiny" {
         properties = {
           title         = "Unique visitors per day"
           region        = var.aws_region
-          view          = "table"
-          logGroupNames = [local.log_group]
-          queryString   ="fields @timestamp, @message | filter @message like /^SESSION/ | filter @message like /START/ | parse @message 'SESSION | * | * | *' as ts, event, sid | stats count_distinct(sid) as unique_visitors by datefloor(@timestamp, 1d) | sort datefloor_timestamp asc"
+          view          = "bar"
+          logGroupNames = [local.log_group_app]
+          queryString   = <<-EOT
+            fields @timestamp, @message
+            | filter @message like /^SESSION \|/
+            | parse @message "SESSION | * | * | *" as ts, event, sid
+            | filter event = "START"
+            | stats count_distinct(sid) as unique_visitors by bin(1d)
+            | sort @timestamp asc
+          EOT
         }
       },
       {
@@ -135,8 +203,14 @@ resource "aws_cloudwatch_dashboard" "tfbpshiny" {
           title         = "Latency by operation over time (5-min bins)"
           region        = var.aws_region
           view          = "timeSeries"
-          logGroupNames = [local.log_group]
-          queryString   ="fields @timestamp, @message | filter @message like /^PROFILE/ | parse @message 'PROFILE | * | * | * | * | * | * | *' as ts, elapsed_s, op, module, dataset, context, sid | stats avg(elapsed_s) as avg_s by bin(5m), op | sort @timestamp asc"
+          logGroupNames = [local.log_group_app]
+          queryString   = <<-EOT
+            fields @timestamp, @message
+            | filter @message like /^PROFILE \|/
+            | parse @message "PROFILE | * | * | * | * | * | * | *" as ts, elapsed_s, op, module, dataset, context, sid
+            | stats avg(elapsed_s) as avg_s by bin(5m), op
+            | sort @timestamp asc
+          EOT
         }
       },
       {
@@ -146,8 +220,8 @@ resource "aws_cloudwatch_dashboard" "tfbpshiny" {
         width  = 12
         height = 3
         properties = {
-          title  = "High latency alarm"
-          alarms = [aws_cloudwatch_metric_alarm.high_latency.arn]
+          title  = "vdb.query latency alarm"
+          alarms = [aws_cloudwatch_metric_alarm.high_vdb_query_latency.arn]
         }
       },
       {

From 39c27679bf72f2d04fd4e7b3651ed33ed49ce9d5 Mon Sep 17 00:00:00 2001
From: chasem <chasem@wustl.edu>
Date: Fri, 17 Apr 2026 17:00:09 -0500
Subject: [PATCH 05/16] trying to get cloudwatch dashboard working

---
 terraform/cloudwatch.tf     | 76 ++++++++++++++-----------------------
 tfbpshiny/utils/profiler.py | 54 +++++++++++++++-----------
 2 files changed, 61 insertions(+), 69 deletions(-)

diff --git a/terraform/cloudwatch.tf b/terraform/cloudwatch.tf
index e0b3c0a..78af86e 100644
--- a/terraform/cloudwatch.tf
+++ b/terraform/cloudwatch.tf
@@ -11,10 +11,6 @@ locals {
 # already exists in the account, import it with:
 #   terraform import aws_cloudwatch_log_group.app /tfbpshiny/production
 #   terraform import aws_cloudwatch_log_group.traefik /tfbpshiny/production/traefik
-#
-# NOTE: After applying, update the Docker awslogs driver for the Traefik
-# container to use awslogs-group=/tfbpshiny/production/traefik so its noisy
-# network-discovery warnings stop polluting the Shiny app's log group.
 
 resource "aws_cloudwatch_log_group" "app" {
   name              = local.log_group_app
@@ -27,44 +23,41 @@ resource "aws_cloudwatch_log_group" "traefik" {
 }
 
 # ---------------------------------------------------------------------------
-# Metric filters — extract numeric values from pipe-delimited log records
+# Metric filters — JSON format, no tokenization ambiguity
 # ---------------------------------------------------------------------------
-# Bracket-pattern filters tokenize on whitespace, so every pipe character is
-# its own token (p1..p7). Field positions below reflect that.
+# Records emitted by the app are single-line JSON objects:
+#   PROFILE: {"event":"PROFILE","ts":"...","elapsed_s":0.1234,"op":"...","module":"...","dataset":"...","context":"...","session_id":"..."}
+#   SESSION: {"event":"SESSION","ts":"...","lifecycle":"START"|"END","session_id":"..."}
 #
-# Record format emitted by the app:
-#   PROFILE | <ts> | <elapsed_s> | <op> | <module> | <dataset> | <context> | <sid>
-#   SESSION | <ts> | START|END   | <sid>
+# CloudWatch JSON metric filters match on field values and extract numeric
+# fields directly, with no whitespace-tokenization issues.
 #
-# default_value is intentionally omitted. If it were set to 0, every minute
-# without a PROFILE event would emit a zero, dragging the latency average
-# toward zero and suppressing the alarm during low traffic.
+# default_value is intentionally omitted on latency filters. Setting it to 0
+# would drag the average toward zero during quiet periods and suppress alarms.
 
-# All PROFILE records — used for dashboard widgets that show latency across
-# every operation. Not used for the vdb.query-specific alarm.
 resource "aws_cloudwatch_log_metric_filter" "profile_latency" {
   name           = "tfbpshiny-profile-latency"
   log_group_name = aws_cloudwatch_log_group.app.name
-  pattern        = "[marker=PROFILE, p1, ts, p2, elapsed_s, p3, op, p4, module, p5, dataset, p6, context, p7, sid]"
+  pattern        = "{ $.event = \"PROFILE\" }"
 
   metric_transformation {
     name      = "ProfileLatencySeconds"
     namespace = local.namespace
-    value     = "$elapsed_s"
+    value     = "$.elapsed_s"
     unit      = "Seconds"
   }
 }
 
-# Narrow filter that only matches op=vdb.query. Drives the latency alarm.
+# Narrow filter for vdb.query only — drives the latency alarm.
 resource "aws_cloudwatch_log_metric_filter" "vdb_query_latency" {
   name           = "tfbpshiny-vdb-query-latency"
   log_group_name = aws_cloudwatch_log_group.app.name
-  pattern        = "[marker=PROFILE, p1, ts, p2, elapsed_s, p3, op=vdb.query, p4, module, p5, dataset, p6, context, p7, sid]"
+  pattern        = "{ $.event = \"PROFILE\" && $.op = \"vdb.query\" }"
 
   metric_transformation {
     name      = "VdbQueryLatencySeconds"
     namespace = local.namespace
-    value     = "$elapsed_s"
+    value     = "$.elapsed_s"
     unit      = "Seconds"
   }
 }
@@ -73,7 +66,7 @@ resource "aws_cloudwatch_log_metric_filter" "vdb_query_latency" {
 resource "aws_cloudwatch_log_metric_filter" "session_start" {
   name           = "tfbpshiny-session-start"
   log_group_name = aws_cloudwatch_log_group.app.name
-  pattern        = "[marker=SESSION, p1, ts, p2, event=START, p3, sid]"
+  pattern        = "{ $.event = \"SESSION\" && $.lifecycle = \"START\" }"
 
   metric_transformation {
     name          = "SessionStart"
@@ -87,25 +80,22 @@ resource "aws_cloudwatch_log_metric_filter" "session_start" {
 # ---------------------------------------------------------------------------
 # Alarms
 # ---------------------------------------------------------------------------
-# Fires when avg vdb.query latency exceeds 5 s over 15 min (3 × 5-min periods).
-# Raise the threshold to 10–30 s while the app is known to be slow; drop it
-# toward 2–5 s once query performance improves.
+# Raise threshold while the app is known to be slow; lower toward 2-5 s
+# once query performance improves.
 resource "aws_cloudwatch_metric_alarm" "high_vdb_query_latency" {
   alarm_name          = "tfbpshiny-high-vdb-query-latency"
-  alarm_description   = "Avg vdb.query latency > 5 s over 15 min — investigate vector DB bottlenecks."
+  alarm_description   = "Avg vdb.query latency > 30 s over 15 min — investigate query bottlenecks."
   namespace           = local.namespace
   metric_name         = "VdbQueryLatencySeconds"
   statistic           = "Average"
   period              = 300
   evaluation_periods  = 3
   datapoints_to_alarm = 3
-  threshold           = 5
+  threshold           = 30
   comparison_operator = "GreaterThanThreshold"
   treat_missing_data  = "notBreaching"
 }
 
-# Fires when more than 20 sessions start within a 5-minute window.
-# Adjust threshold based on observed normal traffic once the app is live.
 resource "aws_cloudwatch_metric_alarm" "high_session_count" {
   alarm_name          = "tfbpshiny-high-session-count"
   alarm_description   = "More than 20 new sessions in 5 min — unexpected traffic spike."
@@ -122,10 +112,8 @@ resource "aws_cloudwatch_metric_alarm" "high_session_count" {
 # ---------------------------------------------------------------------------
 # Dashboard
 # ---------------------------------------------------------------------------
-# All Logs Insights queries target only the app log group (Traefik lives
-# elsewhere now), so the PROFILE/SESSION filters aren't competing with
-# reverse-proxy noise. Parse patterns use the exact pipe-with-spaces format
-# the app emits; if you ever change that format, update these in lockstep.
+# Logs Insights auto-discovers JSON fields, so no parse directive needed.
+# Queries filter on $.event to separate PROFILE from SESSION records.
 
 resource "aws_cloudwatch_dashboard" "tfbpshiny" {
   dashboard_name = "tfbpshiny-production"
@@ -143,9 +131,8 @@ resource "aws_cloudwatch_dashboard" "tfbpshiny" {
           view          = "table"
           logGroupNames = [local.log_group_app]
           queryString   = <<-EOT
-            fields @timestamp, @message
-            | filter @message like /^PROFILE \|/
-            | parse @message "PROFILE | * | * | * | * | * | * | *" as ts, elapsed_s, op, module, dataset, context, sid
+            fields @timestamp, op, module, dataset, context, elapsed_s
+            | filter event = "PROFILE"
             | stats avg(elapsed_s) as avg_s, max(elapsed_s) as max_s, count() as n by op, module, context
             | sort avg_s desc
           EOT
@@ -163,10 +150,8 @@ resource "aws_cloudwatch_dashboard" "tfbpshiny" {
           view          = "timeSeries"
           logGroupNames = [local.log_group_app]
           queryString   = <<-EOT
-            fields @timestamp, @message
-            | filter @message like /^SESSION \|/
-            | parse @message "SESSION | * | * | *" as ts, event, sid
-            | filter event = "START"
+            fields @timestamp, session_id
+            | filter event = "SESSION" and lifecycle = "START"
             | stats count(*) as sessions_started by bin(5m)
             | sort @timestamp asc
           EOT
@@ -184,11 +169,9 @@ resource "aws_cloudwatch_dashboard" "tfbpshiny" {
           view          = "bar"
           logGroupNames = [local.log_group_app]
           queryString   = <<-EOT
-            fields @timestamp, @message
-            | filter @message like /^SESSION \|/
-            | parse @message "SESSION | * | * | *" as ts, event, sid
-            | filter event = "START"
-            | stats count_distinct(sid) as unique_visitors by bin(1d)
+            fields @timestamp, session_id
+            | filter event = "SESSION" and lifecycle = "START"
+            | stats count_distinct(session_id) as unique_visitors by bin(1d)
             | sort @timestamp asc
           EOT
         }
@@ -205,9 +188,8 @@ resource "aws_cloudwatch_dashboard" "tfbpshiny" {
           view          = "timeSeries"
           logGroupNames = [local.log_group_app]
           queryString   = <<-EOT
-            fields @timestamp, @message
-            | filter @message like /^PROFILE \|/
-            | parse @message "PROFILE | * | * | * | * | * | * | *" as ts, elapsed_s, op, module, dataset, context, sid
+            fields @timestamp, op, elapsed_s
+            | filter event = "PROFILE"
             | stats avg(elapsed_s) as avg_s by bin(5m), op
             | sort @timestamp asc
           EOT
diff --git a/tfbpshiny/utils/profiler.py b/tfbpshiny/utils/profiler.py
index ece6fc5..fa971c3 100644
--- a/tfbpshiny/utils/profiler.py
+++ b/tfbpshiny/utils/profiler.py
@@ -2,6 +2,7 @@
 
 from __future__ import annotations
 
+import json
 import time
 from collections.abc import Generator
 from contextlib import contextmanager
@@ -19,19 +20,18 @@ def profile_span(
     session_id: str = "",
 ) -> Generator[None]:
     """
-    Time a block of code and emit one fixed-schema pipe-delimited record to *logger*.
+    Time a block of code and emit one structured JSON record to *logger*.
 
-    The record always has exactly eight pipe-separated columns so it can be read
-    without regex using ``pandas.read_csv(sep="|")``:
+    Each record has the following keys::
 
-    ::
-
-        PROFILE | timestamp | elapsed_s | op | module | dataset | context | session_id
+        {"event": "PROFILE", "ts": "...", "elapsed_s": 0.1234,
+         "op": "...", "module": "...", "dataset": "...",
+         "context": "...", "session_id": "..."}
 
     :param logger: The ``"profiler"`` logger.
     :param op: Operation label, e.g. ``"vdb.query"`` or ``"plot.build"``.
     :param module: Shiny module name, e.g. ``"binding"``. Empty for init-time spans.
-    :param dataset: Dataset(s) involved, e.g. ``"harbison"`` or ``"harbison x rossi"``.
+    :param dataset: Dataset(s) involved, e.g. ``"harbison"`` or ``"harbisонxrossi"``.
     :param context: Function or plot name providing location, e.g. ``"_all_corr_data"``.
     :param session_id: Shiny session ID, used to correlate spans and count concurrent
         users. Empty for init-time spans that run outside a session.
@@ -44,28 +44,29 @@ def profile_span(
         elapsed = time.perf_counter() - t0
         ts = datetime.now(timezone.utc).isoformat(timespec="milliseconds")
         logger.debug(
-            " | ".join(
-                [
-                    "PROFILE",
-                    ts,
-                    f"{elapsed:.4f}",
-                    op,
-                    module,
-                    dataset,
-                    context,
-                    session_id,
-                ]
+            json.dumps(
+                {
+                    "event": "PROFILE",
+                    "ts": ts,
+                    "elapsed_s": round(elapsed, 4),
+                    "op": op,
+                    "module": module,
+                    "dataset": dataset,
+                    "context": context,
+                    "session_id": session_id,
+                }
             )
         )
 
 
 def log_session_event(logger: Logger, event: str, session_id: str) -> None:
     """
-    Emit a SESSION_START or SESSION_END record to *logger*.
+    Emit a session lifecycle record to *logger*.
 
-    Record format (five pipe-separated columns)::
+    Each record has the following keys::
 
-        SESSION | timestamp | event | session_id
+        {"event": "SESSION", "ts": "...",
+        "lifecycle": "START"|"END", "session_id": "..."}
 
     :param logger: The ``"profiler"`` logger.
     :param event: ``"START"`` or ``"END"``.
@@ -73,7 +74,16 @@ def log_session_event(logger: Logger, event: str, session_id: str) -> None:
 
     """
     ts = datetime.now(timezone.utc).isoformat(timespec="milliseconds")
-    logger.debug(" | ".join(["SESSION", ts, event, session_id]))
+    logger.debug(
+        json.dumps(
+            {
+                "event": "SESSION",
+                "ts": ts,
+                "lifecycle": event,
+                "session_id": session_id,
+            }
+        )
+    )
 
 
 __all__ = ["profile_span", "log_session_event"]

From 0e6af754bb7cf8e4d03290c74a41fe3ec29b24a5 Mon Sep 17 00:00:00 2001
From: chasem <chasem@wustl.edu>
Date: Fri, 17 Apr 2026 17:12:47 -0500
Subject: [PATCH 06/16] giving up and did cloudwatch dashboard by hand

---
 terraform/cloudwatch.tf | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/terraform/cloudwatch.tf b/terraform/cloudwatch.tf
index 78af86e..1b79dde 100644
--- a/terraform/cloudwatch.tf
+++ b/terraform/cloudwatch.tf
@@ -1,3 +1,7 @@
+# NOTE! THIS isn't working to create the dashboard. But I could paste in queries
+# by hand to each panel and that did work so the logging is producing correct
+# values
+
 locals {
   log_group_app     = "/tfbpshiny/production"
   log_group_traefik = "/tfbpshiny/production/traefik"

From 91eb249beab6488203b7e6a593507749f490467c Mon Sep 17 00:00:00 2001
From: chasem <chasem@wustl.edu>
Date: Tue, 21 Apr 2026 09:30:55 -0500
Subject: [PATCH 07/16] sorting imports

---
 tfbpshiny/modules/binding/server/workspace.py      | 1 -
 tfbpshiny/modules/perturbation/server/workspace.py | 1 -
 2 files changed, 2 deletions(-)

diff --git a/tfbpshiny/modules/binding/server/workspace.py b/tfbpshiny/modules/binding/server/workspace.py
index 5e7db1a..4d6f5d3 100644
--- a/tfbpshiny/modules/binding/server/workspace.py
+++ b/tfbpshiny/modules/binding/server/workspace.py
@@ -17,7 +17,6 @@
     get_measurement_column,
     regulator_scatter_sql,
 )
-
 from tfbpshiny.utils.profiler import profile_span
 from tfbpshiny.utils.sample_conditions import fetch_sample_condition_map
 from tfbpshiny.utils.vdb_init import AppDatasets, get_regulator_display_name
diff --git a/tfbpshiny/modules/perturbation/server/workspace.py b/tfbpshiny/modules/perturbation/server/workspace.py
index f24eafe..e7f9baa 100644
--- a/tfbpshiny/modules/perturbation/server/workspace.py
+++ b/tfbpshiny/modules/perturbation/server/workspace.py
@@ -17,7 +17,6 @@
     get_measurement_column,
     regulator_scatter_sql,
 )
-
 from tfbpshiny.utils.profiler import profile_span
 from tfbpshiny.utils.sample_conditions import fetch_sample_condition_map
 from tfbpshiny.utils.vdb_init import AppDatasets, get_regulator_display_name

From 67e9d46f6e96e3045ea2504400b0253157145896 Mon Sep 17 00:00:00 2001
From: MackLiao <g.liao@wustl.edu>
Date: Sun, 26 Apr 2026 17:47:59 -0500
Subject: [PATCH 08/16] fix: eliminate spurious _topn_data reactivity bursts in
 comparison sidebar

The comparison module's sidebar_controls @render.ui re-rendered all input
widgets (top_n, effect_threshold, pvalue_threshold, facet_by) whenever the
active dataset selection changed. Recreating the input widgets caused Shiny
to emit spurious change events, invalidating the threshold reactive.calc
wrappers and re-triggering _topn_data() 2-5x per user action. Per the
performance report, ~78% of all _topn_data queries were intra-burst
duplicates (264 fired vs ~50 needed).

Move the four input widgets out of the dynamic @render.ui and into the
static comparison_sidebar_ui module UI, seeded with DEFAULT_* constants.
The dynamic block is replaced with a small empty_state_message render that
emits only the empty-state banner. Inputs now have stable DOM identity
across dataset toggles, so no spurious change events fire.

Also adds DEFAULT_FACET_BY="binding" to queries.py so all four sidebar
defaults live in one place, and routes the empty-state banner through
components.empty_state(compact=True) per the CLAUDE.md convention.

Verified against tmp/time_log.txt: comparison/_topn_data re-run factor
drops from 2.4x avg / 4.2x worst to 1.0x exactly, and total DB time on
the operation falls from 73.68s to 6.34s (~12x speedup).
---
 tfbpshiny/modules/comparison/queries.py       |  3 +
 .../modules/comparison/server/sidebar.py      | 61 +++++--------------
 tfbpshiny/modules/comparison/ui.py            | 49 ++++++++++++++-
 3 files changed, 66 insertions(+), 47 deletions(-)

diff --git a/tfbpshiny/modules/comparison/queries.py b/tfbpshiny/modules/comparison/queries.py
index 6e998f2..0c9867f 100644
--- a/tfbpshiny/modules/comparison/queries.py
+++ b/tfbpshiny/modules/comparison/queries.py
@@ -29,6 +29,9 @@
 #: Default p-value threshold (pvalue must be below this to be "responsive")
 DEFAULT_PVALUE_THRESHOLD = 0.05
 
+#: Default facet-by orientation for the top-N plot
+DEFAULT_FACET_BY = "binding"
+
 # ---------------------------------------------------------------------------
 # DTO query
 # ---------------------------------------------------------------------------
diff --git a/tfbpshiny/modules/comparison/server/sidebar.py b/tfbpshiny/modules/comparison/server/sidebar.py
index 4648290..cef4741 100644
--- a/tfbpshiny/modules/comparison/server/sidebar.py
+++ b/tfbpshiny/modules/comparison/server/sidebar.py
@@ -9,9 +9,10 @@
 from labretriever import VirtualDB
 from shiny import module, reactive, render, ui
 
-from tfbpshiny.components import sidebar_label
+from tfbpshiny.components import empty_state
 from tfbpshiny.modules.comparison.queries import (
     DEFAULT_EFFECT_THRESHOLD,
+    DEFAULT_FACET_BY,
     DEFAULT_PVALUE_THRESHOLD,
     DEFAULT_TOP_N,
 )
@@ -95,59 +96,27 @@ def facet_by() -> str:
         try:
             return str(input.facet_by())
         except Exception:
-            return "binding"
+            return DEFAULT_FACET_BY
 
     @render.ui
-    def sidebar_controls() -> ui.Tag:
-        binding = active_binding_datasets()
-        perturbation = active_perturbation_datasets()
+    def empty_state_message() -> ui.Tag | None:
+        """
+        Empty-state banner shown when either dataset axis is empty.
+
+        :trigger active_binding_datasets: re-runs on dataset selection change.
+        :trigger active_perturbation_datasets: re-runs on dataset selection change.
+        :returns: The banner div, or ``None`` when both axes have selections.
 
-        if not binding or not perturbation:
-            return ui.div(
-                {"class": "empty-state compact"},
+        """
+        if not active_binding_datasets() or not active_perturbation_datasets():
+            return empty_state(
                 ui.p(
                     "Select at least one binding and one perturbation dataset "
                     "from the Select Datasets page."
                 ),
+                compact=True,
             )
-
-        return ui.div(
-            ui.input_numeric(
-                "top_n",
-                "Top N",
-                value=top_n(),
-                min=1,
-                max=500,
-                step=5,
-            ),
-            sidebar_label("Responsive threshold"),
-            ui.input_slider(
-                "effect_threshold",
-                "Min |effect|",
-                min=0.0,
-                max=5.0,
-                value=effect_threshold(),
-                step=0.1,
-            ),
-            ui.input_slider(
-                "pvalue_threshold",
-                "Max p-value",
-                min=0.001,
-                max=1.0,
-                value=pvalue_threshold(),
-                step=0.001,
-            ),
-            sidebar_label("Facet by"),
-            ui.input_radio_buttons(
-                "facet_by",
-                label=None,
-                choices={
-                    "binding": "Binding source",
-                    "perturbation": "Perturbation source",
-                },
-                selected=facet_by(),
-            ),
-        )
+        return None
 
     return top_n, effect_threshold, pvalue_threshold, facet_by
 
diff --git a/tfbpshiny/modules/comparison/ui.py b/tfbpshiny/modules/comparison/ui.py
index 4f0391d..96c5ee8 100644
--- a/tfbpshiny/modules/comparison/ui.py
+++ b/tfbpshiny/modules/comparison/ui.py
@@ -6,18 +6,65 @@
 
 from tfbpshiny.components import (
     sidebar_heading,
+    sidebar_label,
     sidebar_shell,
     workspace_heading,
     workspace_shell,
 )
+from tfbpshiny.modules.comparison.queries import (
+    DEFAULT_EFFECT_THRESHOLD,
+    DEFAULT_FACET_BY,
+    DEFAULT_PVALUE_THRESHOLD,
+    DEFAULT_TOP_N,
+)
 
 
 @module.ui
 def comparison_sidebar_ui() -> ui.Tag:
+    # Inputs are declared statically so they keep stable DOM identity across
+    # dataset-toggle re-renders. The empty-state banner is the only reactive
+    # piece — see comparison/server/sidebar.py for why this matters.
     return sidebar_shell(
         "comparison-sidebar",
         header=sidebar_heading("Comparison"),
-        body=ui.output_ui("sidebar_controls"),
+        body=ui.div(
+            ui.output_ui("empty_state_message"),
+            ui.input_numeric(
+                "top_n",
+                "Top N",
+                value=DEFAULT_TOP_N,
+                min=1,
+                max=500,
+                step=5,
+            ),
+            sidebar_label("Responsive threshold"),
+            ui.input_slider(
+                "effect_threshold",
+                "Min |effect|",
+                min=0.0,
+                max=5.0,
+                value=DEFAULT_EFFECT_THRESHOLD,
+                step=0.1,
+            ),
+            ui.input_slider(
+                "pvalue_threshold",
+                "Max p-value",
+                min=0.001,
+                max=1.0,
+                value=DEFAULT_PVALUE_THRESHOLD,
+                step=0.001,
+            ),
+            sidebar_label("Facet by"),
+            ui.input_radio_buttons(
+                "facet_by",
+                label=None,
+                choices={
+                    "binding": "Binding source",
+                    "perturbation": "Perturbation source",
+                },
+                selected=DEFAULT_FACET_BY,
+            ),
+        ),
     )
 
 

From 64d6165b93237227dd578ca87c0d31b446953db2 Mon Sep 17 00:00:00 2001
From: MackLiao <g.liao@wustl.edu>
Date: Sun, 26 Apr 2026 17:48:55 -0500
Subject: [PATCH 09/16] feat: TFBPSHINY_PROFILE_LOG_FILE env var and startup
 destination log
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The profile logger's log file was hardcoded to tfbpshiny_profile.log in the
project root, which forced an extra cp step before every analysis run that
expected tmp/time_log.txt. Add a TFBPSHINY_PROFILE_LOG_FILE env var that
overrides the path so captures can land directly where the analyzer reads.

Also emit one INFO line at startup naming both logger destinations
("Logger destinations — shiny: handler=... file=... | profiler: ..."), so
env-var configuration is verifiable from the first lines of stdout instead
of having to grep for output files. Defaults are unchanged: profile log
still goes to tfbpshiny_profile.log when no env var is set.
---
 tfbpshiny/app.py | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/tfbpshiny/app.py b/tfbpshiny/app.py
index ad690cf..baa1f36 100644
--- a/tfbpshiny/app.py
+++ b/tfbpshiny/app.py
@@ -61,13 +61,24 @@
     log_file=log_file,
 )
 
+profile_handler_type = cast(
+    Literal["console", "file"],
+    os.getenv("TFBPSHINY_PROFILE_HANDLER", "console"),
+)
+profile_log_file = os.getenv("TFBPSHINY_PROFILE_LOG_FILE", "tfbpshiny_profile.log")
 profile_logger = configure_profile_logger(
-    handler_type=cast(
-        Literal["console", "file"],
-        os.getenv("TFBPSHINY_PROFILE_HANDLER", "console"),
-    ),
+    handler_type=profile_handler_type,
+    log_file=profile_log_file,
     enabled=os.getenv("TFBPSHINY_PROFILE_ENABLED", "1") == "1",
 )
+# Surface where each logger is writing so the env-var configuration is verifiable
+# from the first lines of stdout instead of having to grep for log files.
+logger.info(
+    f"Logger destinations — shiny: handler={handler_type} "
+    f"file={log_file if handler_type == 'file' else 'n/a'} | "
+    f"profiler: handler={profile_handler_type} "
+    f"file={profile_log_file if profile_handler_type == 'file' else 'n/a'}"
+)
 
 # instantiate the virtualDB and compute app-level dataset metadata
 

From 9e2da44b6e50357ea9f4422980c4f1a7c05dde3a Mon Sep 17 00:00:00 2001
From: MackLiao <g.liao@wustl.edu>
Date: Mon, 27 Apr 2026 15:26:50 -0500
Subject: [PATCH 10/16] fix: eliminate spurious reactivity bursts in
 binding/perturbation sidebars
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Apply the same static-input restructure to the binding and perturbation
modules that was used for the comparison sidebar in 67e9d46. Both modules
had identical bug patterns: a sidebar_controls @render.ui that recreated
input_radio_buttons("col_preference") and input_radio_buttons("corr_type")
whenever active_*_datasets() changed. The recreated widgets emitted
spurious change events, invalidating the corr_type / col_preference
reactive.calc wrappers and re-triggering downstream calcs unnecessarily.

Per the latest profile capture, binding/scatter ran at 6x re-run factor
and perturbation/scatter at 15x re-run factor — same root cause as
comparison/_topn_data, but the bursts are smaller in absolute time so
they were not the headline of the original report.

The fix:
- Move col_preference and corr_type radio buttons from the dynamic
  @render.ui in server/sidebar.py into the static module UI in ui.py,
  seeded with new DEFAULT_CORR_TYPE / DEFAULT_COL_PREFERENCE constants
  in each module's queries.py (mirroring the comparison/queries.py
  pattern).
- Replace sidebar_controls with a small empty_state_message that emits
  only the empty-state banner when no datasets are active, routed
  through components.empty_state(compact=True).

Verified: 64 unit tests still pass, all four module imports clean.
Expect re-run factor on binding/scatter and perturbation/scatter to drop
to 1.0x in the next profile capture, plus likely downstream improvement
on binding/_all_corr_data and perturbation/_all_corr_data which depend
on the same reactive wrappers.
---
 tfbpshiny/modules/binding/queries.py          |  6 +++
 tfbpshiny/modules/binding/server/sidebar.py   | 45 ++++++++-----------
 tfbpshiny/modules/binding/ui.py               | 28 +++++++++++-
 tfbpshiny/modules/perturbation/queries.py     |  6 +++
 .../modules/perturbation/server/sidebar.py    | 45 ++++++++-----------
 tfbpshiny/modules/perturbation/ui.py          | 28 +++++++++++-
 6 files changed, 102 insertions(+), 56 deletions(-)

diff --git a/tfbpshiny/modules/binding/queries.py b/tfbpshiny/modules/binding/queries.py
index 8a1564f..15704b3 100644
--- a/tfbpshiny/modules/binding/queries.py
+++ b/tfbpshiny/modules/binding/queries.py
@@ -7,6 +7,12 @@
 import pandas as pd
 from labretriever import VirtualDB
 
+#: Default correlation method for the binding analysis sidebar
+DEFAULT_CORR_TYPE = "pearson"
+
+#: Default measurement column preference for the binding analysis sidebar
+DEFAULT_COL_PREFERENCE = "effect"
+
 # Map of db_name -> (effect_col, pvalue_col).
 # pvalue_col is empty string for datasets that have no pvalue column.
 # TODO: this information should be moved to virtualdb config.
diff --git a/tfbpshiny/modules/binding/server/sidebar.py b/tfbpshiny/modules/binding/server/sidebar.py
index 98b38f4..137c8fa 100644
--- a/tfbpshiny/modules/binding/server/sidebar.py
+++ b/tfbpshiny/modules/binding/server/sidebar.py
@@ -9,7 +9,11 @@
 from labretriever import VirtualDB
 from shiny import module, reactive, render, ui
 
-from tfbpshiny.components import sidebar_label
+from tfbpshiny.components import empty_state
+from tfbpshiny.modules.binding.queries import (
+    DEFAULT_COL_PREFERENCE,
+    DEFAULT_CORR_TYPE,
+)
 
 
 @module.server
@@ -46,7 +50,7 @@ def corr_type() -> str:
         try:
             return str(input.corr_type())
         except Exception:
-            return "pearson"
+            return DEFAULT_CORR_TYPE
 
     @reactive.calc
     def col_preference() -> str:
@@ -62,36 +66,23 @@ def col_preference() -> str:
         try:
             return str(input.col_preference())
         except Exception:
-            return "effect"
+            return DEFAULT_COL_PREFERENCE
 
     @render.ui
-    def sidebar_controls() -> ui.Tag:
-        active = active_binding_datasets()
+    def empty_state_message() -> ui.Tag | None:
+        """
+        Empty-state banner shown when no binding datasets are selected.
+
+        :trigger active_binding_datasets: re-runs on dataset selection change.
+        :returns: The banner div, or ``None`` when at least one dataset is active.
 
-        if not active:
-            return ui.div(
-                {"class": "empty-state compact"},
+        """
+        if not active_binding_datasets():
+            return empty_state(
                 ui.p("Select binding datasets from the Select Datasets page."),
+                compact=True,
             )
-
-        return ui.div(
-            sidebar_label("Column"),
-            ui.input_radio_buttons(
-                "col_preference",
-                label=None,
-                choices={"effect": "Effect", "pvalue": "P-value"},
-                selected=col_preference(),
-                inline=True,
-            ),
-            sidebar_label("Correlation"),
-            ui.input_radio_buttons(
-                "corr_type",
-                label=None,
-                choices={"pearson": "Pearson", "spearman": "Spearman"},
-                selected=corr_type(),
-                inline=True,
-            ),
-        )
+        return None
 
     return corr_type, col_preference
 
diff --git a/tfbpshiny/modules/binding/ui.py b/tfbpshiny/modules/binding/ui.py
index aecaa54..81c13ca 100644
--- a/tfbpshiny/modules/binding/ui.py
+++ b/tfbpshiny/modules/binding/ui.py
@@ -6,18 +6,44 @@
 
 from tfbpshiny.components import (
     sidebar_heading,
+    sidebar_label,
     sidebar_shell,
     workspace_heading,
     workspace_shell,
 )
+from tfbpshiny.modules.binding.queries import (
+    DEFAULT_COL_PREFERENCE,
+    DEFAULT_CORR_TYPE,
+)
 
 
 @module.ui
 def binding_sidebar_ui() -> ui.Tag:
+    # Inputs are declared statically so they keep stable DOM identity across
+    # dataset-toggle re-renders. The empty-state banner is the only reactive
+    # piece — see binding/server/sidebar.py for why this matters.
     return sidebar_shell(
         "binding-sidebar",
         header=sidebar_heading("Binding"),
-        body=ui.output_ui("sidebar_controls"),
+        body=ui.div(
+            ui.output_ui("empty_state_message"),
+            sidebar_label("Column"),
+            ui.input_radio_buttons(
+                "col_preference",
+                label=None,
+                choices={"effect": "Effect", "pvalue": "P-value"},
+                selected=DEFAULT_COL_PREFERENCE,
+                inline=True,
+            ),
+            sidebar_label("Correlation"),
+            ui.input_radio_buttons(
+                "corr_type",
+                label=None,
+                choices={"pearson": "Pearson", "spearman": "Spearman"},
+                selected=DEFAULT_CORR_TYPE,
+                inline=True,
+            ),
+        ),
     )
 
 
diff --git a/tfbpshiny/modules/perturbation/queries.py b/tfbpshiny/modules/perturbation/queries.py
index 296b11e..9084620 100644
--- a/tfbpshiny/modules/perturbation/queries.py
+++ b/tfbpshiny/modules/perturbation/queries.py
@@ -9,6 +9,12 @@
 
 from tfbpshiny.modules.binding.queries import _corr_pair_sql_impl
 
+#: Default correlation method for the perturbation analysis sidebar
+DEFAULT_CORR_TYPE = "pearson"
+
+#: Default measurement column preference for the perturbation analysis sidebar
+DEFAULT_COL_PREFERENCE = "effect"
+
 # Map of db_name -> (effect_col, pvalue_col).
 # pvalue_col is empty string for datasets that have no pvalue column.
 # TODO: this information should be moved to virtualdb config.
diff --git a/tfbpshiny/modules/perturbation/server/sidebar.py b/tfbpshiny/modules/perturbation/server/sidebar.py
index ff7352a..a9bf333 100644
--- a/tfbpshiny/modules/perturbation/server/sidebar.py
+++ b/tfbpshiny/modules/perturbation/server/sidebar.py
@@ -8,7 +8,11 @@
 
 from shiny import module, reactive, render, ui
 
-from tfbpshiny.components import sidebar_label
+from tfbpshiny.components import empty_state
+from tfbpshiny.modules.perturbation.queries import (
+    DEFAULT_COL_PREFERENCE,
+    DEFAULT_CORR_TYPE,
+)
 
 
 @module.server
@@ -45,7 +49,7 @@ def corr_type() -> str:
         try:
             return str(input.corr_type())
         except Exception:
-            return "pearson"
+            return DEFAULT_CORR_TYPE
 
     @reactive.calc
     def col_preference() -> str:
@@ -61,36 +65,23 @@ def col_preference() -> str:
         try:
             return str(input.col_preference())
         except Exception:
-            return "effect"
+            return DEFAULT_COL_PREFERENCE
 
     @render.ui
-    def sidebar_controls() -> ui.Tag:
-        active = active_perturbation_datasets()
+    def empty_state_message() -> ui.Tag | None:
+        """
+        Empty-state banner shown when no perturbation datasets are selected.
+
+        :trigger active_perturbation_datasets: re-runs on dataset selection change.
+        :returns: The banner div, or ``None`` when at least one dataset is active.
 
-        if not active:
-            return ui.div(
-                {"class": "empty-state compact"},
+        """
+        if not active_perturbation_datasets():
+            return empty_state(
                 ui.p("Select perturbation datasets from the Select Datasets page."),
+                compact=True,
             )
-
-        return ui.div(
-            sidebar_label("Column"),
-            ui.input_radio_buttons(
-                "col_preference",
-                label=None,
-                choices={"effect": "Effect", "pvalue": "P-value"},
-                selected=col_preference(),
-                inline=True,
-            ),
-            sidebar_label("Correlation"),
-            ui.input_radio_buttons(
-                "corr_type",
-                label=None,
-                choices={"pearson": "Pearson", "spearman": "Spearman"},
-                selected=corr_type(),
-                inline=True,
-            ),
-        )
+        return None
 
     return corr_type, col_preference
 
diff --git a/tfbpshiny/modules/perturbation/ui.py b/tfbpshiny/modules/perturbation/ui.py
index b2574b8..9850ad7 100644
--- a/tfbpshiny/modules/perturbation/ui.py
+++ b/tfbpshiny/modules/perturbation/ui.py
@@ -6,18 +6,44 @@
 
 from tfbpshiny.components import (
     sidebar_heading,
+    sidebar_label,
     sidebar_shell,
     workspace_heading,
     workspace_shell,
 )
+from tfbpshiny.modules.perturbation.queries import (
+    DEFAULT_COL_PREFERENCE,
+    DEFAULT_CORR_TYPE,
+)
 
 
 @module.ui
 def perturbation_sidebar_ui() -> ui.Tag:
+    # Inputs are declared statically so they keep stable DOM identity across
+    # dataset-toggle re-renders. The empty-state banner is the only reactive
+    # piece — see perturbation/server/sidebar.py for why this matters.
     return sidebar_shell(
         "perturbation-sidebar",
         header=sidebar_heading("Perturbation"),
-        body=ui.output_ui("sidebar_controls"),
+        body=ui.div(
+            ui.output_ui("empty_state_message"),
+            sidebar_label("Column"),
+            ui.input_radio_buttons(
+                "col_preference",
+                label=None,
+                choices={"effect": "Effect", "pvalue": "P-value"},
+                selected=DEFAULT_COL_PREFERENCE,
+                inline=True,
+            ),
+            sidebar_label("Correlation"),
+            ui.input_radio_buttons(
+                "corr_type",
+                label=None,
+                choices={"pearson": "Pearson", "spearman": "Spearman"},
+                selected=DEFAULT_CORR_TYPE,
+                inline=True,
+            ),
+        ),
     )
 
 

From 20c74d5ef5c034a57bc50e1ee24834ca9a6990cf Mon Sep 17 00:00:00 2001
From: MackLiao <g.liao@wustl.edu>
Date: Mon, 27 Apr 2026 15:32:17 -0500
Subject: [PATCH 11/16] =?UTF-8?q?perf:=20bump=20=5Fsettled=5Fdatasets=20de?=
 =?UTF-8?q?bounce=200.3s=20=E2=86=92=201.0s?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Profile capture showed select_datasets/_matrix_data recomputing up to 5x
per user action (10x re-run factor on the worst burst — 280 queries fired
where ~28 would have sufficed). Root cause: the prior 0.3s debounce on
_settled_datasets only coalesced keyboard-mash speed clicks. Normal mouse
toggling has a click cadence of ~400-700ms, so each click fell outside
the debounce window and fired a full N-dataset query matrix recompute.

Bumping to 1.0s catches the typical multi-toggle session at the cost of
~0.7s extra latency before the matrix updates after the user stops
clicking. Expected impact: collapses the 3-10x re-run bursts down to ~1x
and saves ~12s of wasted DB time per session given the previous capture.

Note: there is still a natural 2.0x floor inside _matrix_data because
each pair fires two profile_span events (one per restricted-sample
query). Eliminating that requires per-dataset / per-pair @reactive.calc
memoization (PDF report rec #2) — left as a separate larger change.
---
 tfbpshiny/modules/select_datasets/server/workspace.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/tfbpshiny/modules/select_datasets/server/workspace.py b/tfbpshiny/modules/select_datasets/server/workspace.py
index 14e05b4..64ce777 100644
--- a/tfbpshiny/modules/select_datasets/server/workspace.py
+++ b/tfbpshiny/modules/select_datasets/server/workspace.py
@@ -49,12 +49,16 @@ def select_datasets_workspace_server(
         for db_name in vdb.get_datasets()
     }
 
-    @debounce(0.3)
+    @debounce(1.0)
     @reactive.calc
     def _settled_datasets() -> list[str]:
         """
         Combined list of all active datasets, debounced to coalesce rapid toggle clicks.
 
+        Window is 1.0s so normal-speed multi-toggle sessions (typical click cadence
+        ~400-700ms) coalesce into a single ``_matrix_data`` recompute instead of
+        firing one full N-dataset query matrix per click.
+
         :trigger: ``active_binding_datasets``, ``active_perturbation_datasets`` —
             re-runs whenever either list changes, but downstream is only notified
             after a specified quiet period.

From 79fe93142447d32c3845bed0f05569f3d7d52c5d Mon Sep 17 00:00:00 2001
From: MackLiao <g.liao@wustl.edu>
Date: Mon, 27 Apr 2026 15:42:39 -0500
Subject: [PATCH 12/16] fix: debounce dataset_filters read in _matrix_data to
 fix toggle-off race

Toggle-OFF fires two reactive writes in succession from
dataset_row._on_toggle:
  1. toggle_state.set(...)         - debounced 1s by _settled_datasets
  2. dataset_filters.set(...)      - clears the dataset's filters,
                                     immediately invalidates _matrix_data

Because _matrix_data was reading dataset_filters() directly (no debounce),
the second write triggered a recompute with the pre-toggle dataset list
(still containing the just-toggled-off dataset, since _settled_datasets
hadn't yet expired) but the post-toggle filters. 1s later, when the
debounce expired, the matrix recomputed AGAIN with the correct list. UX
manifestation: the matrix flickering back and forth before settling, plus
asyncio CancelledError tracebacks from Shiny canceling the first
in-flight recompute when the second invalidation arrived.

Toggle-ON does not exhibit the race because it does not fire the second
filter write.

Fix: introduce a parallel _settled_filters debounced calc and have
_matrix_data read from it. Both invalidation sources now share the same
1s settle window, so toggle-off filter-clear and dataset-list updates
coalesce into a single matrix recompute. Other dataset_filters()
consumers (modal handlers, filter button styling) keep instant response.
---
 .../select_datasets/server/workspace.py       | 33 +++++++++++++++++--
 1 file changed, 31 insertions(+), 2 deletions(-)

diff --git a/tfbpshiny/modules/select_datasets/server/workspace.py b/tfbpshiny/modules/select_datasets/server/workspace.py
index 64ce777..f51cda0 100644
--- a/tfbpshiny/modules/select_datasets/server/workspace.py
+++ b/tfbpshiny/modules/select_datasets/server/workspace.py
@@ -67,6 +67,33 @@ def _settled_datasets() -> list[str]:
         """
         return active_binding_datasets() + active_perturbation_datasets()
 
+    @debounce(1.0)
+    @reactive.calc
+    def _settled_filters() -> dict[str, Any]:
+        """
+        Debounced view of ``dataset_filters`` for ``_matrix_data`` consumption.
+
+        Toggling a dataset OFF fires two reactive writes in quick succession from
+        ``dataset_row._on_toggle``: a ``toggle_state.set`` (caught by
+        ``_settled_datasets``'s debounce) and a ``dataset_filters.set`` to clear
+        the dataset's filters. Without this second debounce, the filter write
+        invalidates ``_matrix_data`` immediately while ``_settled_datasets`` is
+        still showing the pre-toggle list — causing the matrix to recompute with
+        an inconsistent (old list, new filters) view, then again 1s later when
+        the list settles. That manifests as the matrix flashing back and forth.
+
+        Both reads are now coalesced through the same 1s window. Other
+        ``dataset_filters()`` consumers (modal handlers, filter buttons) are
+        unaffected and keep instant response.
+
+        :trigger: ``dataset_filters`` — re-runs when any filter changes,
+            but downstream is only notified after the same quiet period as
+            ``_settled_datasets``.
+        :returns: Current filter dict snapshot.
+
+        """
+        return dataset_filters()
+
     @reactive.calc
     def _matrix_data() -> dict[str, Any]:
         """
@@ -74,14 +101,16 @@ def _matrix_data() -> dict[str, Any]:
         counts.
 
         :trigger: ``_settled_datasets`` — re-runs after rapid toggle changes settle.
-            ``dataset_filters`` — re-runs when any filter changes.
+            ``_settled_filters`` — re-runs after filter changes settle (same
+            debounce window, so toggle-off filter-clear and dataset-list updates
+            are coalesced into one recompute).
         :returns: Dict with keys ``"diagonal"`` — ``{db_name: {"regulators": int,
             "samples": int}}``; ``"cross_dataset"`` — ``{(db_i, db_j):
             {"common_regulators": int, "samples_a": int, "samples_b": int}}``.
 
         """
         active = _settled_datasets()
-        filters = dataset_filters()
+        filters = _settled_filters()
 
         regulator_sets: dict[str, set[str]] = {}
         diagonal: dict[str, dict[str, int]] = {}

From 82c765464bd9a549ade6a358e6db7c7de3a78a53 Mon Sep 17 00:00:00 2001
From: MackLiao <g.liao@wustl.edu>
Date: Mon, 27 Apr 2026 15:50:36 -0500
Subject: [PATCH 13/16] fix: stabilize regulator_selector DOM via
 update_selectize in binding/perturbation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The previous fix moved the binding/perturbation SIDEBAR inputs to a static
declaration, but the regulator dropdown lives in the WORKSPACE and was
still being recreated by a @render.ui every time _all_corr_data
invalidated. Each recreation fired a spurious input.selected_regulator
change event, cascading into 6× and 15× re-runs of the downstream
scatter/regulator_plot queries (per the new analyzer capture).

Same root cause as the comparison sidebar fix and the prior binding/
perturbation sidebar fix: a dynamic @render.ui returning a freshly
constructed input widget. Same fix template, applied at the workspace
layer this time:

- Declare ui.input_selectize("selected_regulator", ...) statically in
  binding/ui.py and perturbation/ui.py.
- Replace @render.ui regulator_selector with @reactive.effect
  _update_regulator_choices that calls ui.update_selectize(...) to push
  new choices and preserve the user's selection across data refreshes.

The pattern follows select_datasets/server/sidebar.py's existing use of
ui.update_selectize. The widget DOM persists across invalidations, so
spurious change events disappear and downstream calcs only re-run on
genuine user actions or genuine data changes.

Expected impact in the next profile capture:
- binding/scatter:           6× → 1× re-run factor
- binding/regulator_plot:    3× → 1× re-run factor
- perturbation/scatter:     15× → 1× re-run factor
- perturbation/regulator_plot: 15× → 1× re-run factor

UX side effect: when no datasets are selected the regulator dropdown is
visible-but-empty rather than absent. The empty-state messaging in the
sidebar already explains the user needs to select datasets, so this
should not be confusing.
---
 tfbpshiny/modules/binding/server/workspace.py | 44 +++++++++----------
 tfbpshiny/modules/binding/ui.py               | 10 ++++-
 .../modules/perturbation/server/workspace.py  | 44 +++++++++----------
 tfbpshiny/modules/perturbation/ui.py          | 10 ++++-
 4 files changed, 62 insertions(+), 46 deletions(-)

diff --git a/tfbpshiny/modules/binding/server/workspace.py b/tfbpshiny/modules/binding/server/workspace.py
index 4d6f5d3..ba7c557 100644
--- a/tfbpshiny/modules/binding/server/workspace.py
+++ b/tfbpshiny/modules/binding/server/workspace.py
@@ -328,24 +328,27 @@ def distributions_plot() -> ui.Tag:
             )
         return ui.HTML(html)
 
-    @render.ui
-    def regulator_selector() -> ui.Tag:
+    @reactive.effect
+    def _update_regulator_choices() -> None:
         """
-        Dropdown of regulators present in at least one pair's correlation data.
+        Push regulator choices to the static ``selected_regulator`` selectize.
 
-        Choices are keyed by locus tag and labelled by gene symbol where available. The
-        previously selected regulator is preserved across re-renders if it is still
-        present in the new choice set.
+        ``selected_regulator`` is declared statically in ``binding/ui.py``;
+        this effect mutates its choices/selected value via ``ui.update_selectize``
+        whenever the correlation data changes. The prior ``@render.ui`` pattern
+        recreated the widget on every ``_all_corr_data`` invalidation, which
+        fired spurious change events on ``input.selected_regulator`` and
+        cascaded into 6-15× re-runs of the downstream scatter / regulator_plot
+        queries. Updating in place preserves DOM identity.
 
-        :trigger _all_corr_data: re-renders when correlation data changes (new
-        datasets selected, filters applied, or column/method changed). :trigger
-        active_binding_datasets: re-renders to refresh the symbol map     when the
-        active dataset set changes.
+        :trigger _all_corr_data: choices refresh when correlation data changes
+            (new datasets, filters applied, or column/method changed).
 
         """
         corr_data = _all_corr_data()
         if not corr_data:
-            return ui.span()
+            ui.update_selectize("selected_regulator", choices={}, selected=None)
+            return
 
         # sym_map is built at server init from the pre-computed lookup table
         all_regs: set[str] = set()
@@ -354,22 +357,19 @@ def regulator_selector() -> ui.Tag:
                 all_regs |= set(df["regulator_locus_tag"].dropna().unique())
 
         if not all_regs:
-            return ui.span()
+            ui.update_selectize("selected_regulator", choices={}, selected=None)
+            return
         choices = {r: sym_map.get(r, r) for r in all_regs}
         choices = dict(sorted(choices.items(), key=lambda kv: kv[1].lower()))
 
-        try:
-            current = str(input.selected_regulator())
-        except Exception:
-            current = ""
+        with reactive.isolate():
+            try:
+                current = str(input.selected_regulator())
+            except Exception:
+                current = ""
         default = current if current in choices else next(iter(choices))
 
-        return ui.input_selectize(
-            "selected_regulator",
-            "Regulator",
-            choices=choices,
-            selected=default,
-        )
+        ui.update_selectize("selected_regulator", choices=choices, selected=default)
 
     @render.ui
     def regulator_plots() -> ui.Tag:
diff --git a/tfbpshiny/modules/binding/ui.py b/tfbpshiny/modules/binding/ui.py
index 81c13ca..9445832 100644
--- a/tfbpshiny/modules/binding/ui.py
+++ b/tfbpshiny/modules/binding/ui.py
@@ -49,13 +49,21 @@ def binding_sidebar_ui() -> ui.Tag:
 
 @module.ui
 def binding_workspace_ui() -> ui.Tag:
+    # selected_regulator is declared statically so its DOM identity persists
+    # across data refreshes. Choices are pushed via ui.update_selectize from
+    # binding/server/workspace.py — see that file for why this matters.
     return workspace_shell(
         "binding-workspace",
         header=workspace_heading("Binding Correlation"),
         body=ui.div(
             ui.output_ui("distributions_plot"),
             ui.hr(),
-            ui.output_ui("regulator_selector"),
+            ui.input_selectize(
+                "selected_regulator",
+                "Regulator",
+                choices=[],
+                selected=None,
+            ),
             ui.output_ui("regulator_plots"),
         ),
     )
diff --git a/tfbpshiny/modules/perturbation/server/workspace.py b/tfbpshiny/modules/perturbation/server/workspace.py
index e7f9baa..aefcc35 100644
--- a/tfbpshiny/modules/perturbation/server/workspace.py
+++ b/tfbpshiny/modules/perturbation/server/workspace.py
@@ -329,24 +329,27 @@ def distributions_plot() -> ui.Tag:
             )
         return ui.HTML(html)
 
-    @render.ui
-    def regulator_selector() -> ui.Tag:
+    @reactive.effect
+    def _update_regulator_choices() -> None:
         """
-        Dropdown of regulators present in at least one pair's correlation data.
+        Push regulator choices to the static ``selected_regulator`` selectize.
 
-        Choices are keyed by locus tag and labelled by gene symbol where available. The
-        previously selected regulator is preserved across re-renders if it is still
-        present in the new choice set.
+        ``selected_regulator`` is declared statically in ``perturbation/ui.py``;
+        this effect mutates its choices/selected value via ``ui.update_selectize``
+        whenever the correlation data changes. The prior ``@render.ui`` pattern
+        recreated the widget on every ``_all_corr_data`` invalidation, which
+        fired spurious change events on ``input.selected_regulator`` and
+        cascaded into 6-15× re-runs of the downstream scatter / regulator_plot
+        queries. Updating in place preserves DOM identity.
 
-        :trigger _all_corr_data: re-renders when correlation data changes (new
-        datasets selected, filters applied, or column/method changed). :trigger
-        active_perturbation_datasets: re-renders to refresh the symbol     map when the
-        active dataset set changes.
+        :trigger _all_corr_data: choices refresh when correlation data changes
+            (new datasets, filters applied, or column/method changed).
 
         """
         corr_data = _all_corr_data()
         if not corr_data:
-            return ui.span()
+            ui.update_selectize("selected_regulator", choices={}, selected=None)
+            return
 
         # sym_map is built at server init from the pre-computed lookup table
         all_regs: set[str] = set()
@@ -355,22 +358,19 @@ def regulator_selector() -> ui.Tag:
                 all_regs |= set(df["regulator_locus_tag"].dropna().unique())
 
         if not all_regs:
-            return ui.span()
+            ui.update_selectize("selected_regulator", choices={}, selected=None)
+            return
         choices = {r: sym_map.get(r, r) for r in all_regs}
         choices = dict(sorted(choices.items(), key=lambda kv: kv[1].lower()))
 
-        try:
-            current = str(input.selected_regulator())
-        except Exception:
-            current = ""
+        with reactive.isolate():
+            try:
+                current = str(input.selected_regulator())
+            except Exception:
+                current = ""
         default = current if current in choices else next(iter(choices))
 
-        return ui.input_selectize(
-            "selected_regulator",
-            "Regulator",
-            choices=choices,
-            selected=default,
-        )
+        ui.update_selectize("selected_regulator", choices=choices, selected=default)
 
     @render.ui
     def regulator_plots() -> ui.Tag:
diff --git a/tfbpshiny/modules/perturbation/ui.py b/tfbpshiny/modules/perturbation/ui.py
index 9850ad7..1ce8aed 100644
--- a/tfbpshiny/modules/perturbation/ui.py
+++ b/tfbpshiny/modules/perturbation/ui.py
@@ -49,13 +49,21 @@ def perturbation_sidebar_ui() -> ui.Tag:
 
 @module.ui
 def perturbation_workspace_ui() -> ui.Tag:
+    # selected_regulator is declared statically so its DOM identity persists
+    # across data refreshes. Choices are pushed via ui.update_selectize from
+    # perturbation/server/workspace.py — see that file for why this matters.
     return workspace_shell(
         "perturbation-workspace",
         header=workspace_heading("Perturbation Analysis"),
         body=ui.div(
             ui.output_ui("distributions_plot"),
             ui.hr(),
-            ui.output_ui("regulator_selector"),
+            ui.input_selectize(
+                "selected_regulator",
+                "Regulator",
+                choices=[],
+                selected=None,
+            ),
             ui.output_ui("regulator_plots"),
         ),
     )

From 18205657508906114bd83cf488c7b80c8fa3dbfb Mon Sep 17 00:00:00 2001
From: MackLiao <g.liao@wustl.edu>
Date: Mon, 27 Apr 2026 15:55:22 -0500
Subject: [PATCH 14/16] fix: convert regulator-choices effect to lazy render to
 stop eager correlation queries
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The previous regulator_selector fix used a @reactive.effect to push
choices via ui.update_selectize. Effects run eagerly regardless of which
page is mounted, so every dataset toggle on the Selection page caused
both binding and perturbation modules to evaluate _all_corr_data() —
firing the expensive correlation queries even when the user wasn't on
those pages. That swamped the Selection page's debounce work and made
toggle-off feel laggy again.

Move the choices-push into a hidden @render.ui placeholder that is only
mounted when the binding/perturbation workspace is active. The render
machinery is lazy: when the placeholder isn't in the DOM, _all_corr_data
isn't read, so no correlation queries fire.

The placeholder returns an invisible span purely as a side-effect
carrier; the real work (ui.update_selectize) happens inside the render
function. The static input_selectize widget in workspace_ui still owns
the DOM identity, so spurious change events stay eliminated when the
user IS on the binding/perturbation page.

Net effect:
- Selection-page dataset toggles: no longer trigger binding/perturbation
  correlation queries (regression fix).
- Binding/perturbation page interactions: still benefit from the static
  widget DOM (no spurious change events on _all_corr_data invalidation).
---
 tfbpshiny/modules/binding/server/workspace.py | 31 ++++++++++++-------
 tfbpshiny/modules/binding/ui.py               |  3 ++
 .../modules/perturbation/server/workspace.py  | 31 ++++++++++++-------
 tfbpshiny/modules/perturbation/ui.py          |  3 ++
 4 files changed, 46 insertions(+), 22 deletions(-)

diff --git a/tfbpshiny/modules/binding/server/workspace.py b/tfbpshiny/modules/binding/server/workspace.py
index ba7c557..f1cb6d1 100644
--- a/tfbpshiny/modules/binding/server/workspace.py
+++ b/tfbpshiny/modules/binding/server/workspace.py
@@ -328,27 +328,35 @@ def distributions_plot() -> ui.Tag:
             )
         return ui.HTML(html)
 
-    @reactive.effect
-    def _update_regulator_choices() -> None:
+    @render.ui
+    def _regulator_choices_trigger() -> ui.Tag:
         """
-        Push regulator choices to the static ``selected_regulator`` selectize.
+        Hidden render that pushes regulator choices to the static selectize.
 
         ``selected_regulator`` is declared statically in ``binding/ui.py``;
-        this effect mutates its choices/selected value via ``ui.update_selectize``
-        whenever the correlation data changes. The prior ``@render.ui`` pattern
-        recreated the widget on every ``_all_corr_data`` invalidation, which
-        fired spurious change events on ``input.selected_regulator`` and
-        cascaded into 6-15× re-runs of the downstream scatter / regulator_plot
-        queries. Updating in place preserves DOM identity.
+        this render mutates its choices/selected value via
+        ``ui.update_selectize`` whenever the correlation data changes. The
+        prior ``@render.ui regulator_selector`` pattern recreated the widget
+        on every ``_all_corr_data`` invalidation, which fired spurious change
+        events on ``input.selected_regulator`` and cascaded into 6-15× re-runs
+        of the downstream scatter / regulator_plot queries. Updating in place
+        preserves DOM identity.
+
+        Implemented as a render rather than a ``@reactive.effect`` so the
+        ``_all_corr_data`` read is **lazy** — the placeholder only mounts when
+        this workspace is active, so dataset toggles on the Selection page do
+        not incur correlation queries here.
 
         :trigger _all_corr_data: choices refresh when correlation data changes
             (new datasets, filters applied, or column/method changed).
+        :returns: An invisible span; output is purely a side-effect carrier.
 
         """
         corr_data = _all_corr_data()
+        marker = ui.tags.span(style="display:none")
         if not corr_data:
             ui.update_selectize("selected_regulator", choices={}, selected=None)
-            return
+            return marker
 
         # sym_map is built at server init from the pre-computed lookup table
         all_regs: set[str] = set()
@@ -358,7 +366,7 @@ def _update_regulator_choices() -> None:
 
         if not all_regs:
             ui.update_selectize("selected_regulator", choices={}, selected=None)
-            return
+            return marker
         choices = {r: sym_map.get(r, r) for r in all_regs}
         choices = dict(sorted(choices.items(), key=lambda kv: kv[1].lower()))
 
@@ -370,6 +378,7 @@ def _update_regulator_choices() -> None:
         default = current if current in choices else next(iter(choices))
 
         ui.update_selectize("selected_regulator", choices=choices, selected=default)
+        return marker
 
     @render.ui
     def regulator_plots() -> ui.Tag:
diff --git a/tfbpshiny/modules/binding/ui.py b/tfbpshiny/modules/binding/ui.py
index 9445832..9acf659 100644
--- a/tfbpshiny/modules/binding/ui.py
+++ b/tfbpshiny/modules/binding/ui.py
@@ -64,6 +64,9 @@ def binding_workspace_ui() -> ui.Tag:
                 choices=[],
                 selected=None,
             ),
+            # Hidden placeholder; mounted only when this workspace is active so
+            # the choices-update effect stays lazy. See server/workspace.py.
+            ui.output_ui("_regulator_choices_trigger"),
             ui.output_ui("regulator_plots"),
         ),
     )
diff --git a/tfbpshiny/modules/perturbation/server/workspace.py b/tfbpshiny/modules/perturbation/server/workspace.py
index aefcc35..4c9e2be 100644
--- a/tfbpshiny/modules/perturbation/server/workspace.py
+++ b/tfbpshiny/modules/perturbation/server/workspace.py
@@ -329,27 +329,35 @@ def distributions_plot() -> ui.Tag:
             )
         return ui.HTML(html)
 
-    @reactive.effect
-    def _update_regulator_choices() -> None:
+    @render.ui
+    def _regulator_choices_trigger() -> ui.Tag:
         """
-        Push regulator choices to the static ``selected_regulator`` selectize.
+        Hidden render that pushes regulator choices to the static selectize.
 
         ``selected_regulator`` is declared statically in ``perturbation/ui.py``;
-        this effect mutates its choices/selected value via ``ui.update_selectize``
-        whenever the correlation data changes. The prior ``@render.ui`` pattern
-        recreated the widget on every ``_all_corr_data`` invalidation, which
-        fired spurious change events on ``input.selected_regulator`` and
-        cascaded into 6-15× re-runs of the downstream scatter / regulator_plot
-        queries. Updating in place preserves DOM identity.
+        this render mutates its choices/selected value via
+        ``ui.update_selectize`` whenever the correlation data changes. The
+        prior ``@render.ui regulator_selector`` pattern recreated the widget
+        on every ``_all_corr_data`` invalidation, which fired spurious change
+        events on ``input.selected_regulator`` and cascaded into 6-15× re-runs
+        of the downstream scatter / regulator_plot queries. Updating in place
+        preserves DOM identity.
+
+        Implemented as a render rather than a ``@reactive.effect`` so the
+        ``_all_corr_data`` read is **lazy** — the placeholder only mounts when
+        this workspace is active, so dataset toggles on the Selection page do
+        not incur correlation queries here.
 
         :trigger _all_corr_data: choices refresh when correlation data changes
             (new datasets, filters applied, or column/method changed).
+        :returns: An invisible span; output is purely a side-effect carrier.
 
         """
         corr_data = _all_corr_data()
+        marker = ui.tags.span(style="display:none")
         if not corr_data:
             ui.update_selectize("selected_regulator", choices={}, selected=None)
-            return
+            return marker
 
         # sym_map is built at server init from the pre-computed lookup table
         all_regs: set[str] = set()
@@ -359,7 +367,7 @@ def _update_regulator_choices() -> None:
 
         if not all_regs:
             ui.update_selectize("selected_regulator", choices={}, selected=None)
-            return
+            return marker
         choices = {r: sym_map.get(r, r) for r in all_regs}
         choices = dict(sorted(choices.items(), key=lambda kv: kv[1].lower()))
 
@@ -371,6 +379,7 @@ def _update_regulator_choices() -> None:
         default = current if current in choices else next(iter(choices))
 
         ui.update_selectize("selected_regulator", choices=choices, selected=default)
+        return marker
 
     @render.ui
     def regulator_plots() -> ui.Tag:
diff --git a/tfbpshiny/modules/perturbation/ui.py b/tfbpshiny/modules/perturbation/ui.py
index 1ce8aed..6a301ae 100644
--- a/tfbpshiny/modules/perturbation/ui.py
+++ b/tfbpshiny/modules/perturbation/ui.py
@@ -64,6 +64,9 @@ def perturbation_workspace_ui() -> ui.Tag:
                 choices=[],
                 selected=None,
             ),
+            # Hidden placeholder; mounted only when this workspace is active so
+            # the choices-update effect stays lazy. See server/workspace.py.
+            ui.output_ui("_regulator_choices_trigger"),
             ui.output_ui("regulator_plots"),
         ),
     )

From c9048eee54636e1fdad954cd5c03ff51075ee399 Mon Sep 17 00:00:00 2001
From: MackLiao <g.liao@wustl.edu>
Date: Mon, 27 Apr 2026 16:02:22 -0500
Subject: [PATCH 15/16] fix: pin _regulator_choices_trigger with
 suspend_when_hidden=False
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Shiny's default output suspension policy pauses @render.ui evaluation
when the output is detected as hidden. Today the workspace_region in
app.py wholesale-replaces its child output, so the trigger is either
fully unmounted (correctly inactive) or fully mounted (correctly firing).
But if a future layout change wraps the workspace in navset_tab,
conditional_panel, or any display:none toggle, Shiny may classify the
trigger as "hidden" and silently stop firing — breaking the regulator
choices update with no error.

Decorating the render with @output(suspend_when_hidden=False) opts out
of that default and pins the side-effect to fire whenever its reactive
deps invalidate AND the output is registered, regardless of visibility
heuristics. The added comment also warns future contributors not to
"clean up" by converting back to @reactive.effect, which would
re-introduce the eager-execution regression that 1820565 fixed.

Defensive change suggested by the multi-perspective review of the
preceding three commits. No behavior change today; safety net for
future layout work.
---
 tfbpshiny/modules/binding/server/workspace.py      | 5 +++++
 tfbpshiny/modules/perturbation/server/workspace.py | 5 +++++
 2 files changed, 10 insertions(+)

diff --git a/tfbpshiny/modules/binding/server/workspace.py b/tfbpshiny/modules/binding/server/workspace.py
index f1cb6d1..5e8b3b0 100644
--- a/tfbpshiny/modules/binding/server/workspace.py
+++ b/tfbpshiny/modules/binding/server/workspace.py
@@ -328,6 +328,11 @@ def distributions_plot() -> ui.Tag:
             )
         return ui.HTML(html)
 
+    # Disable Shiny's default suspend-when-hidden behavior so the side-effect
+    # render keeps firing reliably even if a future layout wraps the workspace
+    # in a navset_tab / conditional_panel / display:none toggle.
+    # DO NOT convert this back to @reactive.effect — see CLAUDE.md.
+    @output(suspend_when_hidden=False)
     @render.ui
     def _regulator_choices_trigger() -> ui.Tag:
         """
diff --git a/tfbpshiny/modules/perturbation/server/workspace.py b/tfbpshiny/modules/perturbation/server/workspace.py
index 4c9e2be..29bc992 100644
--- a/tfbpshiny/modules/perturbation/server/workspace.py
+++ b/tfbpshiny/modules/perturbation/server/workspace.py
@@ -329,6 +329,11 @@ def distributions_plot() -> ui.Tag:
             )
         return ui.HTML(html)
 
+    # Disable Shiny's default suspend-when-hidden behavior so the side-effect
+    # render keeps firing reliably even if a future layout wraps the workspace
+    # in a navset_tab / conditional_panel / display:none toggle.
+    # DO NOT convert this back to @reactive.effect — see CLAUDE.md.
+    @output(suspend_when_hidden=False)
     @render.ui
     def _regulator_choices_trigger() -> ui.Tag:
         """

From 86818d3f20c244cef0354ed5b50d9b6a87afa955 Mon Sep 17 00:00:00 2001
From: MackLiao <g.liao@wustl.edu>
Date: Mon, 27 Apr 2026 16:33:36 -0500
Subject: [PATCH 16/16] refactor: extract _SETTLED_WINDOW_SEC and update stale
 sidebar docstrings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two cleanups from the tech-debt audit of the reactivity-fix branch:

1. Extract _SETTLED_WINDOW_SEC = 1.0 module constant in
   select_datasets/server/workspace.py and reference it from both
   _settled_datasets and _settled_filters debounce decorators. The two
   windows MUST stay equal for the toggle-off coalesce property to hold;
   making the coupling syntactically visible prevents accidental drift if
   either is retuned later.

2. Replace the stale "Render ... sidebar controls" summary in all three
   correlation-style module sidebar servers (comparison, binding,
   perturbation) with an accurate description. After the static-input
   refactor, these servers no longer render the controls — the static UI
   does. They only expose reactive accessors and render the empty-state
   banner.

No behavior change.
---
 tfbpshiny/modules/binding/server/sidebar.py   |  8 ++++++-
 .../modules/comparison/server/sidebar.py      |  9 ++++++-
 .../modules/perturbation/server/sidebar.py    |  8 ++++++-
 .../select_datasets/server/workspace.py       | 24 ++++++++++++-------
 4 files changed, 38 insertions(+), 11 deletions(-)

diff --git a/tfbpshiny/modules/binding/server/sidebar.py b/tfbpshiny/modules/binding/server/sidebar.py
index 137c8fa..4300729 100644
--- a/tfbpshiny/modules/binding/server/sidebar.py
+++ b/tfbpshiny/modules/binding/server/sidebar.py
@@ -30,7 +30,13 @@ def binding_sidebar_server(
     Callable[[], str],  # col_preference: "effect" | "pvalue"
 ]:
     """
-    Render binding analysis sidebar controls; return reactive selections.
+    Wire up reactive accessors for the statically-declared binding sidebar controls and
+    render the empty-state banner.
+
+    The actual input widgets (``corr_type``, ``col_preference``) are declared
+    in ``binding_sidebar_ui`` so their DOM identity persists across
+    dataset-toggle re-renders. This server only exposes reactive accessors for
+    those inputs and conditionally renders the dataset-required banner.
 
     :return: Tuple of (corr_type, col_preference).
 
diff --git a/tfbpshiny/modules/comparison/server/sidebar.py b/tfbpshiny/modules/comparison/server/sidebar.py
index cef4741..25b7123 100644
--- a/tfbpshiny/modules/comparison/server/sidebar.py
+++ b/tfbpshiny/modules/comparison/server/sidebar.py
@@ -34,7 +34,14 @@ def comparison_sidebar_server(
     Callable[[], str],  # facet_by: "binding" | "perturbation"
 ]:
     """
-    Render comparison sidebar controls and return reactive selections.
+    Wire up reactive accessors for the statically-declared comparison sidebar controls
+    and render the empty-state banner.
+
+    The actual input widgets (``top_n``, ``effect_threshold``,
+    ``pvalue_threshold``, ``facet_by``) are declared in ``comparison_sidebar_ui``
+    so their DOM identity persists across dataset-toggle re-renders. This server
+    only exposes reactive accessors for those inputs and conditionally renders
+    the dataset-required banner.
 
     :returns: Tuple of (top_n, effect_threshold, pvalue_threshold, facet_by).
 
diff --git a/tfbpshiny/modules/perturbation/server/sidebar.py b/tfbpshiny/modules/perturbation/server/sidebar.py
index a9bf333..ec04d81 100644
--- a/tfbpshiny/modules/perturbation/server/sidebar.py
+++ b/tfbpshiny/modules/perturbation/server/sidebar.py
@@ -29,7 +29,13 @@ def perturbation_sidebar_server(
     Callable[[], str],  # col_preference: "effect" | "pvalue"
 ]:
     """
-    Render perturbation analysis sidebar controls; return reactive selections.
+    Wire up reactive accessors for the statically-declared perturbation sidebar controls
+    and render the empty-state banner.
+
+    The actual input widgets (``corr_type``, ``col_preference``) are declared
+    in ``perturbation_sidebar_ui`` so their DOM identity persists across
+    dataset-toggle re-renders. This server only exposes reactive accessors for
+    those inputs and conditionally renders the dataset-required banner.
 
     :return: Tuple of (corr_type, col_preference).
 
diff --git a/tfbpshiny/modules/select_datasets/server/workspace.py b/tfbpshiny/modules/select_datasets/server/workspace.py
index f51cda0..d9abb1c 100644
--- a/tfbpshiny/modules/select_datasets/server/workspace.py
+++ b/tfbpshiny/modules/select_datasets/server/workspace.py
@@ -28,6 +28,13 @@
 from tfbpshiny.utils.ratelimit import debounce
 from tfbpshiny.utils.vdb_init import HIDDEN_FILTER_FIELDS
 
+#: Quiet window (seconds) for coalescing rapid dataset-toggle / filter-change
+#: bursts before invalidating ``_matrix_data``. Tuned for typical mouse click
+#: cadence of ~400-700ms; both ``_settled_datasets`` and ``_settled_filters``
+#: must use the same value so toggle-off (which writes both reactive sources)
+#: produces a single coalesced recompute instead of two flickered ones.
+_SETTLED_WINDOW_SEC = 1.0
+
 
 @module.server
 def select_datasets_workspace_server(
@@ -49,15 +56,16 @@ def select_datasets_workspace_server(
         for db_name in vdb.get_datasets()
     }
 
-    @debounce(1.0)
+    @debounce(_SETTLED_WINDOW_SEC)
     @reactive.calc
     def _settled_datasets() -> list[str]:
         """
         Combined list of all active datasets, debounced to coalesce rapid toggle clicks.
 
-        Window is 1.0s so normal-speed multi-toggle sessions (typical click cadence
-        ~400-700ms) coalesce into a single ``_matrix_data`` recompute instead of
-        firing one full N-dataset query matrix per click.
+        Window is ``_SETTLED_WINDOW_SEC`` so normal-speed multi-toggle sessions
+        (typical click cadence ~400-700ms) coalesce into a single
+        ``_matrix_data`` recompute instead of firing one full N-dataset query
+        matrix per click.
 
         :trigger: ``active_binding_datasets``, ``active_perturbation_datasets`` —
             re-runs whenever either list changes, but downstream is only notified
@@ -67,7 +75,7 @@ def _settled_datasets() -> list[str]:
         """
         return active_binding_datasets() + active_perturbation_datasets()
 
-    @debounce(1.0)
+    @debounce(_SETTLED_WINDOW_SEC)
     @reactive.calc
     def _settled_filters() -> dict[str, Any]:
         """
@@ -82,9 +90,9 @@ def _settled_filters() -> dict[str, Any]:
         an inconsistent (old list, new filters) view, then again 1s later when
         the list settles. That manifests as the matrix flashing back and forth.
 
-        Both reads are now coalesced through the same 1s window. Other
-        ``dataset_filters()`` consumers (modal handlers, filter buttons) are
-        unaffected and keep instant response.
+        Both reads are now coalesced through the same ``_SETTLED_WINDOW_SEC``
+        window. Other ``dataset_filters()`` consumers (modal handlers, filter
+        buttons) are unaffected and keep instant response.
 
         :trigger: ``dataset_filters`` — re-runs when any filter changes,
             but downstream is only notified after the same quiet period as