diff --git a/configure_logger.py b/configure_logger.py index 0fb62a0..c8acb22 100644 --- a/configure_logger.py +++ b/configure_logger.py @@ -2,6 +2,8 @@ from enum import Enum from typing import Literal +_PROFILE_FORMAT = "%(message)s" + class LogLevel(Enum): DEBUG = logging.DEBUG @@ -31,34 +33,22 @@ def from_string(cls, level_str: str) -> Enum: def configure_logger( name: str, - level: int = logging.DEBUG, # Use int type hint here + level: int = logging.DEBUG, format: str = "%(asctime)s - %(name)s - %(levelname)s - %(message)s", handler_type: Literal["console", "file"] = "console", log_file: str = "tfbpmodeling.log", ) -> logging.Logger: """ - Configures a logger. - - :param name: Name of the logger - :type name: str - :param level: Logging level, must be one of logging.DEBUG, - logging.INFO, logging.WARNING, logging.ERROR - :type level: int - :param format: Logging format - :type format: str - :param handler_type: Type of handler, either 'console' or 'file' - :type handler_type: Literal["console", "file"] - :param log_file: Path to log file, required if handler_type is 'file'. - Default is 'tfbpmodeling.log' - :type log_file: str - - :return: Configured logger + Configure a logger with a single handler. + + :param name: Logger name. + :param level: Logging level (``logging.DEBUG``, ``logging.INFO``, etc.). + :param format: Log record format string. + :param handler_type: Destination — ``"console"`` or ``"file"``. + :param log_file: Path used when ``handler_type="file"``. + :returns: Configured logger. :rtype: logging.Logger - - :raises ValueError: If any of the parameters have invalid datatypes - - example usage: - >>> logger = configure_logger("my_logger", level=logging.INFO) + :raises ValueError: If any parameter is invalid. """ if not isinstance(name, str): @@ -69,7 +59,7 @@ def configure_logger( raise ValueError("Invalid logging level") if not isinstance(format, str): raise ValueError("format must be a string") - if handler_type not in ["console", "file"]: + if handler_type not in ("console", "file"): raise ValueError("handler_type must be 'console' or 'file'") if handler_type == "file" and not log_file: raise ValueError("log_file must be specified for file handler") @@ -77,22 +67,52 @@ def configure_logger( logger = logging.getLogger(name) logger.setLevel(level) - # Remove all handlers associated with the logger object to avoid duplicate logs - for handler in logger.handlers[:]: - logger.removeHandler(handler) + # Remove existing handlers to avoid duplicates on re-configuration. + for h in logger.handlers[:]: + logger.removeHandler(h) if handler_type == "console": - handler = logging.StreamHandler() - elif handler_type == "file": - if not log_file: - raise ValueError("log_file must be specified for file handler") - handler = logging.FileHandler(log_file) + handler: logging.Handler = logging.StreamHandler() else: - raise ValueError("Invalid handler_type. Must be 'console' or 'file'.") + handler = logging.FileHandler(log_file) handler.setLevel(level) - formatter = logging.Formatter(format) - handler.setFormatter(formatter) + handler.setFormatter(logging.Formatter(format)) logger.addHandler(handler) return logger + + +def configure_profile_logger( + handler_type: Literal["console", "file"] = "console", + level: int = logging.DEBUG, + log_file: str = "tfbpshiny_profile.log", + enabled: bool = True, +) -> logging.Logger: + """ + Configure and return the ``"profiler"`` logger for timing instrumentation. + + The profiler logger uses a bare ``%(message)s`` format because all + structure is embedded in the message by + :func:`tfbpshiny.utils.profiler.profile_span`. + It never propagates to the root or ``"shiny"`` logger. + + :param handler_type: Destination for profile records — ``"console"`` or ``"file"``. + :param level: Log level; ignored (set to ``CRITICAL``) when ``enabled=False``. + :param log_file: Path used when ``handler_type="file"``. + :param enabled: When ``False``, silences the logger by setting its level + to ``CRITICAL``. + :returns: Configured ``"profiler"`` logger. + :rtype: logging.Logger + + """ + effective_level = level if enabled else logging.CRITICAL + logger = configure_logger( + "profiler", + level=effective_level, + format=_PROFILE_FORMAT, + handler_type=handler_type, + log_file=log_file, + ) + logger.propagate = False + return logger diff --git a/docs/development.md b/docs/development.md index a0a02f3..ca330d6 100644 --- a/docs/development.md +++ b/docs/development.md @@ -45,4 +45,25 @@ callbacks are necessary, then either a different plotting library or a custom implementation of some sort will likely be necessary. Would be nice to explore other options, including custom implementations of plots using d3 and then creating the shiny widget manually. With AI, that might be more achievable (for me at least) -than it would be otherwise b/c we can give the AI the docs for all three as context. \ No newline at end of file +than it would be otherwise b/c we can give the AI the docs for all three as context. + +There is both a general log and profiling log + +Both loggers write to stdout/stderr, Docker's awslogs driver captures everything into the shinyapp stream under /tfbpshiny/production. To later separate PROFILE lines from main log lines when parsing: + +```{python} +import pandas as pd + +df = pd.read_csv("exported.log", sep="|", header=None, skipinitialspace=True, + names=["marker","timestamp","elapsed_s","op","module","dataset","context"]) +profile = df[df["marker"].str.strip() == "PROFILE"] +``` + + +Or in CloudWatch Logs Insights: + +```{raw} +fields @timestamp, @message +| filter @message like /^PROFILE/ +| parse @message "PROFILE | * | * | * | * | * | *" as timestamp, elapsed_s, op, module, dataset, context +``` \ No newline at end of file diff --git a/production.yml b/production.yml index ede0f0d..2585d92 100644 --- a/production.yml +++ b/production.yml @@ -12,6 +12,8 @@ services: - HF_HOME=/hf-cache - HF_TOKEN - DOCKER_ENV + - TFBPSHINY_LOG_HANDLER=console + - TFBPSHINY_PROFILE_HANDLER=console volumes: - hf_cache:/hf-cache expose: @@ -61,8 +63,8 @@ services: logging: driver: awslogs options: - awslogs-group: /tfbpshiny/production - awslogs-region: us-east-1 + awslogs-group: /tfbpshiny/production/traefik + awslogs-region: us-east-2 awslogs-stream: traefik awslogs-create-group: "true" restart: unless-stopped diff --git a/terraform/cloudwatch.tf b/terraform/cloudwatch.tf new file mode 100644 index 0000000..1b79dde --- /dev/null +++ b/terraform/cloudwatch.tf @@ -0,0 +1,226 @@ +# NOTE! THIS isn't working to create the dashboard. But I could paste in queries +# by hand to each panel and that did work so the logging is producing correct +# values + +locals { + log_group_app = "/tfbpshiny/production" + log_group_traefik = "/tfbpshiny/production/traefik" + namespace = "TFBPShiny" +} + +# --------------------------------------------------------------------------- +# Log groups +# --------------------------------------------------------------------------- +# Declaring these explicitly so Terraform owns retention/lifecycle. If either +# already exists in the account, import it with: +# terraform import aws_cloudwatch_log_group.app /tfbpshiny/production +# terraform import aws_cloudwatch_log_group.traefik /tfbpshiny/production/traefik + +resource "aws_cloudwatch_log_group" "app" { + name = local.log_group_app + retention_in_days = 30 +} + +resource "aws_cloudwatch_log_group" "traefik" { + name = local.log_group_traefik + retention_in_days = 14 +} + +# --------------------------------------------------------------------------- +# Metric filters — JSON format, no tokenization ambiguity +# --------------------------------------------------------------------------- +# Records emitted by the app are single-line JSON objects: +# PROFILE: {"event":"PROFILE","ts":"...","elapsed_s":0.1234,"op":"...","module":"...","dataset":"...","context":"...","session_id":"..."} +# SESSION: {"event":"SESSION","ts":"...","lifecycle":"START"|"END","session_id":"..."} +# +# CloudWatch JSON metric filters match on field values and extract numeric +# fields directly, with no whitespace-tokenization issues. +# +# default_value is intentionally omitted on latency filters. Setting it to 0 +# would drag the average toward zero during quiet periods and suppress alarms. + +resource "aws_cloudwatch_log_metric_filter" "profile_latency" { + name = "tfbpshiny-profile-latency" + log_group_name = aws_cloudwatch_log_group.app.name + pattern = "{ $.event = \"PROFILE\" }" + + metric_transformation { + name = "ProfileLatencySeconds" + namespace = local.namespace + value = "$.elapsed_s" + unit = "Seconds" + } +} + +# Narrow filter for vdb.query only — drives the latency alarm. +resource "aws_cloudwatch_log_metric_filter" "vdb_query_latency" { + name = "tfbpshiny-vdb-query-latency" + log_group_name = aws_cloudwatch_log_group.app.name + pattern = "{ $.event = \"PROFILE\" && $.op = \"vdb.query\" }" + + metric_transformation { + name = "VdbQueryLatencySeconds" + namespace = local.namespace + value = "$.elapsed_s" + unit = "Seconds" + } +} + +# Counts SESSION START records — one per new user connection. +resource "aws_cloudwatch_log_metric_filter" "session_start" { + name = "tfbpshiny-session-start" + log_group_name = aws_cloudwatch_log_group.app.name + pattern = "{ $.event = \"SESSION\" && $.lifecycle = \"START\" }" + + metric_transformation { + name = "SessionStart" + namespace = local.namespace + value = "1" + default_value = 0 + unit = "Count" + } +} + +# --------------------------------------------------------------------------- +# Alarms +# --------------------------------------------------------------------------- +# Raise threshold while the app is known to be slow; lower toward 2-5 s +# once query performance improves. +resource "aws_cloudwatch_metric_alarm" "high_vdb_query_latency" { + alarm_name = "tfbpshiny-high-vdb-query-latency" + alarm_description = "Avg vdb.query latency > 30 s over 15 min — investigate query bottlenecks." + namespace = local.namespace + metric_name = "VdbQueryLatencySeconds" + statistic = "Average" + period = 300 + evaluation_periods = 3 + datapoints_to_alarm = 3 + threshold = 30 + comparison_operator = "GreaterThanThreshold" + treat_missing_data = "notBreaching" +} + +resource "aws_cloudwatch_metric_alarm" "high_session_count" { + alarm_name = "tfbpshiny-high-session-count" + alarm_description = "More than 20 new sessions in 5 min — unexpected traffic spike." + namespace = local.namespace + metric_name = "SessionStart" + statistic = "Sum" + period = 300 + evaluation_periods = 1 + threshold = 20 + comparison_operator = "GreaterThanThreshold" + treat_missing_data = "notBreaching" +} + +# --------------------------------------------------------------------------- +# Dashboard +# --------------------------------------------------------------------------- +# Logs Insights auto-discovers JSON fields, so no parse directive needed. +# Queries filter on $.event to separate PROFILE from SESSION records. + +resource "aws_cloudwatch_dashboard" "tfbpshiny" { + dashboard_name = "tfbpshiny-production" + dashboard_body = jsonencode({ + widgets = [ + { + type = "log" + x = 0 + y = 0 + width = 24 + height = 6 + properties = { + title = "Avg and max latency by operation" + region = var.aws_region + view = "table" + logGroupNames = [local.log_group_app] + queryString = <<-EOT + fields @timestamp, op, module, dataset, context, elapsed_s + | filter event = "PROFILE" + | stats avg(elapsed_s) as avg_s, max(elapsed_s) as max_s, count() as n by op, module, context + | sort avg_s desc + EOT + } + }, + { + type = "log" + x = 0 + y = 6 + width = 12 + height = 6 + properties = { + title = "Sessions started (5-min bins)" + region = var.aws_region + view = "timeSeries" + logGroupNames = [local.log_group_app] + queryString = <<-EOT + fields @timestamp, session_id + | filter event = "SESSION" and lifecycle = "START" + | stats count(*) as sessions_started by bin(5m) + | sort @timestamp asc + EOT + } + }, + { + type = "log" + x = 12 + y = 6 + width = 12 + height = 6 + properties = { + title = "Unique visitors per day" + region = var.aws_region + view = "bar" + logGroupNames = [local.log_group_app] + queryString = <<-EOT + fields @timestamp, session_id + | filter event = "SESSION" and lifecycle = "START" + | stats count_distinct(session_id) as unique_visitors by bin(1d) + | sort @timestamp asc + EOT + } + }, + { + type = "log" + x = 0 + y = 12 + width = 24 + height = 6 + properties = { + title = "Latency by operation over time (5-min bins)" + region = var.aws_region + view = "timeSeries" + logGroupNames = [local.log_group_app] + queryString = <<-EOT + fields @timestamp, op, elapsed_s + | filter event = "PROFILE" + | stats avg(elapsed_s) as avg_s by bin(5m), op + | sort @timestamp asc + EOT + } + }, + { + type = "alarm" + x = 0 + y = 18 + width = 12 + height = 3 + properties = { + title = "vdb.query latency alarm" + alarms = [aws_cloudwatch_metric_alarm.high_vdb_query_latency.arn] + } + }, + { + type = "alarm" + x = 12 + y = 18 + width = 12 + height = 3 + properties = { + title = "High session count alarm" + alarms = [aws_cloudwatch_metric_alarm.high_session_count.arn] + } + }, + ] + }) +} diff --git a/terraform/main.tf b/terraform/main.tf index 7c841aa..20cf7e6 100644 --- a/terraform/main.tf +++ b/terraform/main.tf @@ -102,6 +102,16 @@ resource "aws_instance" "tfbpshiny" { user_data = file("${path.module}/user_data.sh") + # NOTE: ami and user_data are pinned via ignore_changes. This means that + # if the Amazon Linux 2023 AMI is updated or user_data.sh is modified, + # terraform apply will NOT replace the instance. This is intentional — + # both changes force instance termination, which would take down production. + # To deploy a new AMI or updated user_data, terminate the instance manually, + # remove this lifecycle block, and run terraform apply to reprovision. + lifecycle { + ignore_changes = [ami, user_data] + } + tags = { Name = "tfbpshiny-production" } diff --git a/tfbpshiny/__main__.py b/tfbpshiny/__main__.py index 3b4e78c..cb16872 100644 --- a/tfbpshiny/__main__.py +++ b/tfbpshiny/__main__.py @@ -38,6 +38,18 @@ def make_parser() -> argparse.ArgumentParser: choices=["console", "file"], help="Set log handler type", ) + parser.add_argument( + "--profile-handler", + type=str, + default="console", + choices=["console", "file"], + help="Handler for the profiler logger", + ) + parser.add_argument( + "--no-profile", + action="store_true", + help="Disable the profiler logger entirely", + ) subparsers = parser.add_subparsers(dest="command", required=True) @@ -78,6 +90,8 @@ def main() -> None: # convert to int when configuring the logger os.environ["TFBPSHINY_LOG_LEVEL"] = str(log_level.value) os.environ["TFBPSHINY_LOG_HANDLER"] = args.log_handler + os.environ["TFBPSHINY_PROFILE_HANDLER"] = args.profile_handler + os.environ["TFBPSHINY_PROFILE_ENABLED"] = "0" if args.no_profile else "1" args.func(args) diff --git a/tfbpshiny/app.py b/tfbpshiny/app.py index ae5ca4e..baa1f36 100644 --- a/tfbpshiny/app.py +++ b/tfbpshiny/app.py @@ -9,7 +9,7 @@ from dotenv import load_dotenv from shiny import App, reactive, render, ui -from configure_logger import configure_logger +from configure_logger import configure_logger, configure_profile_logger from tfbpshiny.components import github_badge, nav_button from tfbpshiny.modules.binding.server import ( binding_sidebar_server, @@ -41,6 +41,7 @@ selection_matrix_ui, selection_sidebar_ui, ) +from tfbpshiny.utils.profiler import log_session_event from tfbpshiny.utils.vdb_init import initialize_data if not os.getenv("DOCKER_ENV"): @@ -60,6 +61,25 @@ log_file=log_file, ) +profile_handler_type = cast( + Literal["console", "file"], + os.getenv("TFBPSHINY_PROFILE_HANDLER", "console"), +) +profile_log_file = os.getenv("TFBPSHINY_PROFILE_LOG_FILE", "tfbpshiny_profile.log") +profile_logger = configure_profile_logger( + handler_type=profile_handler_type, + log_file=profile_log_file, + enabled=os.getenv("TFBPSHINY_PROFILE_ENABLED", "1") == "1", +) +# Surface where each logger is writing so the env-var configuration is verifiable +# from the first lines of stdout instead of having to grep for log files. +logger.info( + f"Logger destinations — shiny: handler={handler_type} " + f"file={log_file if handler_type == 'file' else 'n/a'} | " + f"profiler: handler={profile_handler_type} " + f"file={profile_log_file if profile_handler_type == 'file' else 'n/a'}" +) + # instantiate the virtualDB and compute app-level dataset metadata virtualdb_config = os.getenv( @@ -67,7 +87,9 @@ ) hf_token: str | None = os.getenv("HF_TOKEN") logger.info(f"Loading VirtualDB with config: {virtualdb_config}") -vdb, app_datasets = initialize_data(virtualdb_config, hf_token) +vdb, app_datasets = initialize_data( + virtualdb_config, hf_token, profile_logger=profile_logger +) app_ui = ui.page_fillable( ui.include_css((Path(__file__).parent / "app.css").resolve()), @@ -100,6 +122,10 @@ def app_server(input: Any, output: Any, session: Any) -> None: """Create shared reactive state and call all module servers.""" + sid: str = session.id + log_session_event(profile_logger, "START", sid) + session.on_ended(lambda: log_session_event(profile_logger, "END", sid)) + # this stores the name of the currently active module, ie # "home", "selection", "binding", "perturbation", or "comparison" active_module: reactive.Value[str] = reactive.value("home") @@ -121,6 +147,8 @@ def app_server(input: Any, output: Any, session: Any) -> None: dataset_filters=dataset_filters, vdb=vdb, logger=logger, + profile_logger=profile_logger, + session_id=sid, ) corr_type, col_preference = binding_sidebar_server( @@ -139,6 +167,8 @@ def app_server(input: Any, output: Any, session: Any) -> None: vdb=vdb, app_datasets=app_datasets, logger=logger, + profile_logger=profile_logger, + session_id=sid, ) corr_type_p, col_preference_p = perturbation_sidebar_server( @@ -157,6 +187,8 @@ def app_server(input: Any, output: Any, session: Any) -> None: vdb=vdb, app_datasets=app_datasets, logger=logger, + profile_logger=profile_logger, + session_id=sid, ) top_n, effect_threshold, pvalue_threshold, facet_by = comparison_sidebar_server( @@ -177,6 +209,8 @@ def app_server(input: Any, output: Any, session: Any) -> None: facet_by=facet_by, vdb=vdb, logger=logger, + profile_logger=profile_logger, + session_id=sid, ) # set the active module when a nav button is clicked diff --git a/tfbpshiny/modules/binding/queries.py b/tfbpshiny/modules/binding/queries.py index 8a1564f..15704b3 100644 --- a/tfbpshiny/modules/binding/queries.py +++ b/tfbpshiny/modules/binding/queries.py @@ -7,6 +7,12 @@ import pandas as pd from labretriever import VirtualDB +#: Default correlation method for the binding analysis sidebar +DEFAULT_CORR_TYPE = "pearson" + +#: Default measurement column preference for the binding analysis sidebar +DEFAULT_COL_PREFERENCE = "effect" + # Map of db_name -> (effect_col, pvalue_col). # pvalue_col is empty string for datasets that have no pvalue column. # TODO: this information should be moved to virtualdb config. diff --git a/tfbpshiny/modules/binding/server/sidebar.py b/tfbpshiny/modules/binding/server/sidebar.py index 98b38f4..4300729 100644 --- a/tfbpshiny/modules/binding/server/sidebar.py +++ b/tfbpshiny/modules/binding/server/sidebar.py @@ -9,7 +9,11 @@ from labretriever import VirtualDB from shiny import module, reactive, render, ui -from tfbpshiny.components import sidebar_label +from tfbpshiny.components import empty_state +from tfbpshiny.modules.binding.queries import ( + DEFAULT_COL_PREFERENCE, + DEFAULT_CORR_TYPE, +) @module.server @@ -26,7 +30,13 @@ def binding_sidebar_server( Callable[[], str], # col_preference: "effect" | "pvalue" ]: """ - Render binding analysis sidebar controls; return reactive selections. + Wire up reactive accessors for the statically-declared binding sidebar controls and + render the empty-state banner. + + The actual input widgets (``corr_type``, ``col_preference``) are declared + in ``binding_sidebar_ui`` so their DOM identity persists across + dataset-toggle re-renders. This server only exposes reactive accessors for + those inputs and conditionally renders the dataset-required banner. :return: Tuple of (corr_type, col_preference). @@ -46,7 +56,7 @@ def corr_type() -> str: try: return str(input.corr_type()) except Exception: - return "pearson" + return DEFAULT_CORR_TYPE @reactive.calc def col_preference() -> str: @@ -62,36 +72,23 @@ def col_preference() -> str: try: return str(input.col_preference()) except Exception: - return "effect" + return DEFAULT_COL_PREFERENCE @render.ui - def sidebar_controls() -> ui.Tag: - active = active_binding_datasets() + def empty_state_message() -> ui.Tag | None: + """ + Empty-state banner shown when no binding datasets are selected. + + :trigger active_binding_datasets: re-runs on dataset selection change. + :returns: The banner div, or ``None`` when at least one dataset is active. - if not active: - return ui.div( - {"class": "empty-state compact"}, + """ + if not active_binding_datasets(): + return empty_state( ui.p("Select binding datasets from the Select Datasets page."), + compact=True, ) - - return ui.div( - sidebar_label("Column"), - ui.input_radio_buttons( - "col_preference", - label=None, - choices={"effect": "Effect", "pvalue": "P-value"}, - selected=col_preference(), - inline=True, - ), - sidebar_label("Correlation"), - ui.input_radio_buttons( - "corr_type", - label=None, - choices={"pearson": "Pearson", "spearman": "Spearman"}, - selected=corr_type(), - inline=True, - ), - ) + return None return corr_type, col_preference diff --git a/tfbpshiny/modules/binding/server/workspace.py b/tfbpshiny/modules/binding/server/workspace.py index c31f727..5e8b3b0 100644 --- a/tfbpshiny/modules/binding/server/workspace.py +++ b/tfbpshiny/modules/binding/server/workspace.py @@ -17,6 +17,7 @@ get_measurement_column, regulator_scatter_sql, ) +from tfbpshiny.utils.profiler import profile_span from tfbpshiny.utils.sample_conditions import fetch_sample_condition_map from tfbpshiny.utils.vdb_init import AppDatasets, get_regulator_display_name @@ -33,6 +34,8 @@ def binding_workspace_server( vdb: VirtualDB, app_datasets: AppDatasets, logger: Logger, + profile_logger: Logger, + session_id: str = "", ) -> None: """ Render the binding correlation rows: pairwise distributions @@ -139,17 +142,25 @@ def _all_corr_data() -> dict[tuple[str, str], pd.DataFrame]: logger.debug( f"Correlating {db_a}({col_a}) vs {db_b}({col_b}) ({method})" ) - result[(db_a, db_b)] = corr_pair_sql( - vdb, - db_a, - col_a, - filters.get(db_a), - db_b, - col_b, - filters.get(db_b), - method, - prefix=f"p{i}_", - ) + with profile_span( + profile_logger, + "vdb.query", + module="binding", + dataset=f"{db_a}x{db_b}", + context="_all_corr_data", + session_id=session_id, + ): + result[(db_a, db_b)] = corr_pair_sql( + vdb, + db_a, + col_a, + filters.get(db_a), + db_b, + col_b, + filters.get(db_b), + method, + prefix=f"p{i}_", + ) except Exception as exc: logger.error( f"Failed to correlate {db_a} vs {db_b}: {exc}", exc_info=True @@ -305,33 +316,52 @@ def distributions_plot() -> ui.Tag: " });" "})();" ) - return ui.HTML( - to_html( - fig, - include_plotlyjs="cdn", - full_html=False, - post_script=post_script, + with profile_span( + profile_logger, + "plot.build", + module="binding", + context="distributions_plot", + session_id=session_id, + ): + html = to_html( + fig, include_plotlyjs="cdn", full_html=False, post_script=post_script ) - ) + return ui.HTML(html) + # Disable Shiny's default suspend-when-hidden behavior so the side-effect + # render keeps firing reliably even if a future layout wraps the workspace + # in a navset_tab / conditional_panel / display:none toggle. + # DO NOT convert this back to @reactive.effect — see CLAUDE.md. + @output(suspend_when_hidden=False) @render.ui - def regulator_selector() -> ui.Tag: + def _regulator_choices_trigger() -> ui.Tag: """ - Dropdown of regulators present in at least one pair's correlation data. - - Choices are keyed by locus tag and labelled by gene symbol where available. The - previously selected regulator is preserved across re-renders if it is still - present in the new choice set. - - :trigger _all_corr_data: re-renders when correlation data changes (new - datasets selected, filters applied, or column/method changed). :trigger - active_binding_datasets: re-renders to refresh the symbol map when the - active dataset set changes. + Hidden render that pushes regulator choices to the static selectize. + + ``selected_regulator`` is declared statically in ``binding/ui.py``; + this render mutates its choices/selected value via + ``ui.update_selectize`` whenever the correlation data changes. The + prior ``@render.ui regulator_selector`` pattern recreated the widget + on every ``_all_corr_data`` invalidation, which fired spurious change + events on ``input.selected_regulator`` and cascaded into 6-15× re-runs + of the downstream scatter / regulator_plot queries. Updating in place + preserves DOM identity. + + Implemented as a render rather than a ``@reactive.effect`` so the + ``_all_corr_data`` read is **lazy** — the placeholder only mounts when + this workspace is active, so dataset toggles on the Selection page do + not incur correlation queries here. + + :trigger _all_corr_data: choices refresh when correlation data changes + (new datasets, filters applied, or column/method changed). + :returns: An invisible span; output is purely a side-effect carrier. """ corr_data = _all_corr_data() + marker = ui.tags.span(style="display:none") if not corr_data: - return ui.span() + ui.update_selectize("selected_regulator", choices={}, selected=None) + return marker # sym_map is built at server init from the pre-computed lookup table all_regs: set[str] = set() @@ -340,22 +370,20 @@ def regulator_selector() -> ui.Tag: all_regs |= set(df["regulator_locus_tag"].dropna().unique()) if not all_regs: - return ui.span() + ui.update_selectize("selected_regulator", choices={}, selected=None) + return marker choices = {r: sym_map.get(r, r) for r in all_regs} choices = dict(sorted(choices.items(), key=lambda kv: kv[1].lower())) - try: - current = str(input.selected_regulator()) - except Exception: - current = "" + with reactive.isolate(): + try: + current = str(input.selected_regulator()) + except Exception: + current = "" default = current if current in choices else next(iter(choices)) - return ui.input_selectize( - "selected_regulator", - "Regulator", - choices=choices, - selected=default, - ) + ui.update_selectize("selected_regulator", choices=choices, selected=default) + return marker @render.ui def regulator_plots() -> ui.Tag: @@ -441,7 +469,14 @@ def _strip_reg(f: dict | None) -> dict | None: reg, idx, ) - merged = vdb.query(scatter_sql, **scatter_params) + with profile_span( + profile_logger, + "vdb.query", + module="binding", + context="scatter", + session_id=session_id, + ): + merged = vdb.query(scatter_sql, **scatter_params) logger.debug( f"scatter {db_a}/{db_b} reg={reg!r} " f"rows={len(merged)} fa={fa!r} fb={fb!r}" @@ -503,9 +538,17 @@ def _strip_reg(f: dict | None) -> dict | None: width=400, height=400, ) + with profile_span( + profile_logger, + "plot.build", + module="binding", + context="regulator_plot", + session_id=session_id, + ): + plot_html = to_html(fig, include_plotlyjs=False, full_html=False) plot_divs.append( ui.div( - ui.HTML(to_html(fig, include_plotlyjs=False, full_html=False)), + ui.HTML(plot_html), style="flex: 0 0 auto;", ) ) diff --git a/tfbpshiny/modules/binding/ui.py b/tfbpshiny/modules/binding/ui.py index aecaa54..9acf659 100644 --- a/tfbpshiny/modules/binding/ui.py +++ b/tfbpshiny/modules/binding/ui.py @@ -6,30 +6,67 @@ from tfbpshiny.components import ( sidebar_heading, + sidebar_label, sidebar_shell, workspace_heading, workspace_shell, ) +from tfbpshiny.modules.binding.queries import ( + DEFAULT_COL_PREFERENCE, + DEFAULT_CORR_TYPE, +) @module.ui def binding_sidebar_ui() -> ui.Tag: + # Inputs are declared statically so they keep stable DOM identity across + # dataset-toggle re-renders. The empty-state banner is the only reactive + # piece — see binding/server/sidebar.py for why this matters. return sidebar_shell( "binding-sidebar", header=sidebar_heading("Binding"), - body=ui.output_ui("sidebar_controls"), + body=ui.div( + ui.output_ui("empty_state_message"), + sidebar_label("Column"), + ui.input_radio_buttons( + "col_preference", + label=None, + choices={"effect": "Effect", "pvalue": "P-value"}, + selected=DEFAULT_COL_PREFERENCE, + inline=True, + ), + sidebar_label("Correlation"), + ui.input_radio_buttons( + "corr_type", + label=None, + choices={"pearson": "Pearson", "spearman": "Spearman"}, + selected=DEFAULT_CORR_TYPE, + inline=True, + ), + ), ) @module.ui def binding_workspace_ui() -> ui.Tag: + # selected_regulator is declared statically so its DOM identity persists + # across data refreshes. Choices are pushed via ui.update_selectize from + # binding/server/workspace.py — see that file for why this matters. return workspace_shell( "binding-workspace", header=workspace_heading("Binding Correlation"), body=ui.div( ui.output_ui("distributions_plot"), ui.hr(), - ui.output_ui("regulator_selector"), + ui.input_selectize( + "selected_regulator", + "Regulator", + choices=[], + selected=None, + ), + # Hidden placeholder; mounted only when this workspace is active so + # the choices-update effect stays lazy. See server/workspace.py. + ui.output_ui("_regulator_choices_trigger"), ui.output_ui("regulator_plots"), ), ) diff --git a/tfbpshiny/modules/comparison/queries.py b/tfbpshiny/modules/comparison/queries.py index 6e998f2..0c9867f 100644 --- a/tfbpshiny/modules/comparison/queries.py +++ b/tfbpshiny/modules/comparison/queries.py @@ -29,6 +29,9 @@ #: Default p-value threshold (pvalue must be below this to be "responsive") DEFAULT_PVALUE_THRESHOLD = 0.05 +#: Default facet-by orientation for the top-N plot +DEFAULT_FACET_BY = "binding" + # --------------------------------------------------------------------------- # DTO query # --------------------------------------------------------------------------- diff --git a/tfbpshiny/modules/comparison/server/sidebar.py b/tfbpshiny/modules/comparison/server/sidebar.py index 4648290..25b7123 100644 --- a/tfbpshiny/modules/comparison/server/sidebar.py +++ b/tfbpshiny/modules/comparison/server/sidebar.py @@ -9,9 +9,10 @@ from labretriever import VirtualDB from shiny import module, reactive, render, ui -from tfbpshiny.components import sidebar_label +from tfbpshiny.components import empty_state from tfbpshiny.modules.comparison.queries import ( DEFAULT_EFFECT_THRESHOLD, + DEFAULT_FACET_BY, DEFAULT_PVALUE_THRESHOLD, DEFAULT_TOP_N, ) @@ -33,7 +34,14 @@ def comparison_sidebar_server( Callable[[], str], # facet_by: "binding" | "perturbation" ]: """ - Render comparison sidebar controls and return reactive selections. + Wire up reactive accessors for the statically-declared comparison sidebar controls + and render the empty-state banner. + + The actual input widgets (``top_n``, ``effect_threshold``, + ``pvalue_threshold``, ``facet_by``) are declared in ``comparison_sidebar_ui`` + so their DOM identity persists across dataset-toggle re-renders. This server + only exposes reactive accessors for those inputs and conditionally renders + the dataset-required banner. :returns: Tuple of (top_n, effect_threshold, pvalue_threshold, facet_by). @@ -95,59 +103,27 @@ def facet_by() -> str: try: return str(input.facet_by()) except Exception: - return "binding" + return DEFAULT_FACET_BY @render.ui - def sidebar_controls() -> ui.Tag: - binding = active_binding_datasets() - perturbation = active_perturbation_datasets() + def empty_state_message() -> ui.Tag | None: + """ + Empty-state banner shown when either dataset axis is empty. + + :trigger active_binding_datasets: re-runs on dataset selection change. + :trigger active_perturbation_datasets: re-runs on dataset selection change. + :returns: The banner div, or ``None`` when both axes have selections. - if not binding or not perturbation: - return ui.div( - {"class": "empty-state compact"}, + """ + if not active_binding_datasets() or not active_perturbation_datasets(): + return empty_state( ui.p( "Select at least one binding and one perturbation dataset " "from the Select Datasets page." ), + compact=True, ) - - return ui.div( - ui.input_numeric( - "top_n", - "Top N", - value=top_n(), - min=1, - max=500, - step=5, - ), - sidebar_label("Responsive threshold"), - ui.input_slider( - "effect_threshold", - "Min |effect|", - min=0.0, - max=5.0, - value=effect_threshold(), - step=0.1, - ), - ui.input_slider( - "pvalue_threshold", - "Max p-value", - min=0.001, - max=1.0, - value=pvalue_threshold(), - step=0.001, - ), - sidebar_label("Facet by"), - ui.input_radio_buttons( - "facet_by", - label=None, - choices={ - "binding": "Binding source", - "perturbation": "Perturbation source", - }, - selected=facet_by(), - ), - ) + return None return top_n, effect_threshold, pvalue_threshold, facet_by diff --git a/tfbpshiny/modules/comparison/server/workspace.py b/tfbpshiny/modules/comparison/server/workspace.py index f71eb0a..28e0c8d 100644 --- a/tfbpshiny/modules/comparison/server/workspace.py +++ b/tfbpshiny/modules/comparison/server/workspace.py @@ -20,6 +20,7 @@ PERTURBATION_LABEL_MAP, topn_responsive_ratio, ) +from tfbpshiny.utils.profiler import profile_span from tfbpshiny.utils.vdb_init import get_regulator_display_name # color palettes @@ -70,6 +71,8 @@ def comparison_workspace_server( facet_by: Callable[[], str], vdb: VirtualDB, logger: Logger, + profile_logger: Logger, + session_id: str = "", ) -> None: """Render the Top-N by Binding workspace plot.""" @@ -133,19 +136,27 @@ def _topn_data() -> pd.DataFrame: p_filters = ( None if p_cfg.get("hackett_time_filter") else filters.get(p_db) ) - result = topn_responsive_ratio( - vdb=vdb, - binding_view=b_db, - perturbation_view=p_db, - top_n=n, - effect_threshold=eff, - pvalue_threshold=pval, - binding_filters=filters.get(b_db), - perturbation_filters=p_filters, - param_prefix=f"{b_db}_{p_db}", - **b_cfg, - **p_cfg, - ) + with profile_span( + profile_logger, + "vdb.query", + module="comparison", + dataset=f"{b_db}x{p_db}", + context="_topn_data", + session_id=session_id, + ): + result = topn_responsive_ratio( + vdb=vdb, + binding_view=b_db, + perturbation_view=p_db, + top_n=n, + effect_threshold=eff, + pvalue_threshold=pval, + binding_filters=filters.get(b_db), + perturbation_filters=p_filters, + param_prefix=f"{b_db}_{p_db}", + **b_cfg, + **p_cfg, + ) assert isinstance(result, pd.DataFrame) result["binding_source"] = b_label result["perturbation_source"] = p_label @@ -162,7 +173,14 @@ def _topn_data() -> pd.DataFrame: if not results: return pd.DataFrame() - out = pd.concat(results, ignore_index=True) + with profile_span( + profile_logger, + "df.concat", + module="comparison", + context="_topn_data", + session_id=session_id, + ): + out = pd.concat(results, ignore_index=True) out["percent_responsive"] = out["responsive_ratio"] * 100 return out @@ -268,7 +286,15 @@ def topn_plot() -> ui.Tag: legend_title=legend_title, margin=dict(l=50, r=20, t=80, b=30), ) - return ui.HTML(to_html(fig, include_plotlyjs="cdn", full_html=False)) + with profile_span( + profile_logger, + "plot.build", + module="comparison", + context="topn_plot", + session_id=session_id, + ): + html = to_html(fig, include_plotlyjs="cdn", full_html=False) + return ui.HTML(html) __all__ = ["comparison_workspace_server"] diff --git a/tfbpshiny/modules/comparison/ui.py b/tfbpshiny/modules/comparison/ui.py index 4f0391d..96c5ee8 100644 --- a/tfbpshiny/modules/comparison/ui.py +++ b/tfbpshiny/modules/comparison/ui.py @@ -6,18 +6,65 @@ from tfbpshiny.components import ( sidebar_heading, + sidebar_label, sidebar_shell, workspace_heading, workspace_shell, ) +from tfbpshiny.modules.comparison.queries import ( + DEFAULT_EFFECT_THRESHOLD, + DEFAULT_FACET_BY, + DEFAULT_PVALUE_THRESHOLD, + DEFAULT_TOP_N, +) @module.ui def comparison_sidebar_ui() -> ui.Tag: + # Inputs are declared statically so they keep stable DOM identity across + # dataset-toggle re-renders. The empty-state banner is the only reactive + # piece — see comparison/server/sidebar.py for why this matters. return sidebar_shell( "comparison-sidebar", header=sidebar_heading("Comparison"), - body=ui.output_ui("sidebar_controls"), + body=ui.div( + ui.output_ui("empty_state_message"), + ui.input_numeric( + "top_n", + "Top N", + value=DEFAULT_TOP_N, + min=1, + max=500, + step=5, + ), + sidebar_label("Responsive threshold"), + ui.input_slider( + "effect_threshold", + "Min |effect|", + min=0.0, + max=5.0, + value=DEFAULT_EFFECT_THRESHOLD, + step=0.1, + ), + ui.input_slider( + "pvalue_threshold", + "Max p-value", + min=0.001, + max=1.0, + value=DEFAULT_PVALUE_THRESHOLD, + step=0.001, + ), + sidebar_label("Facet by"), + ui.input_radio_buttons( + "facet_by", + label=None, + choices={ + "binding": "Binding source", + "perturbation": "Perturbation source", + }, + selected=DEFAULT_FACET_BY, + ), + ), ) diff --git a/tfbpshiny/modules/perturbation/queries.py b/tfbpshiny/modules/perturbation/queries.py index 296b11e..9084620 100644 --- a/tfbpshiny/modules/perturbation/queries.py +++ b/tfbpshiny/modules/perturbation/queries.py @@ -9,6 +9,12 @@ from tfbpshiny.modules.binding.queries import _corr_pair_sql_impl +#: Default correlation method for the perturbation analysis sidebar +DEFAULT_CORR_TYPE = "pearson" + +#: Default measurement column preference for the perturbation analysis sidebar +DEFAULT_COL_PREFERENCE = "effect" + # Map of db_name -> (effect_col, pvalue_col). # pvalue_col is empty string for datasets that have no pvalue column. # TODO: this information should be moved to virtualdb config. diff --git a/tfbpshiny/modules/perturbation/server/sidebar.py b/tfbpshiny/modules/perturbation/server/sidebar.py index ff7352a..ec04d81 100644 --- a/tfbpshiny/modules/perturbation/server/sidebar.py +++ b/tfbpshiny/modules/perturbation/server/sidebar.py @@ -8,7 +8,11 @@ from shiny import module, reactive, render, ui -from tfbpshiny.components import sidebar_label +from tfbpshiny.components import empty_state +from tfbpshiny.modules.perturbation.queries import ( + DEFAULT_COL_PREFERENCE, + DEFAULT_CORR_TYPE, +) @module.server @@ -25,7 +29,13 @@ def perturbation_sidebar_server( Callable[[], str], # col_preference: "effect" | "pvalue" ]: """ - Render perturbation analysis sidebar controls; return reactive selections. + Wire up reactive accessors for the statically-declared perturbation sidebar controls + and render the empty-state banner. + + The actual input widgets (``corr_type``, ``col_preference``) are declared + in ``perturbation_sidebar_ui`` so their DOM identity persists across + dataset-toggle re-renders. This server only exposes reactive accessors for + those inputs and conditionally renders the dataset-required banner. :return: Tuple of (corr_type, col_preference). @@ -45,7 +55,7 @@ def corr_type() -> str: try: return str(input.corr_type()) except Exception: - return "pearson" + return DEFAULT_CORR_TYPE @reactive.calc def col_preference() -> str: @@ -61,36 +71,23 @@ def col_preference() -> str: try: return str(input.col_preference()) except Exception: - return "effect" + return DEFAULT_COL_PREFERENCE @render.ui - def sidebar_controls() -> ui.Tag: - active = active_perturbation_datasets() + def empty_state_message() -> ui.Tag | None: + """ + Empty-state banner shown when no perturbation datasets are selected. + + :trigger active_perturbation_datasets: re-runs on dataset selection change. + :returns: The banner div, or ``None`` when at least one dataset is active. - if not active: - return ui.div( - {"class": "empty-state compact"}, + """ + if not active_perturbation_datasets(): + return empty_state( ui.p("Select perturbation datasets from the Select Datasets page."), + compact=True, ) - - return ui.div( - sidebar_label("Column"), - ui.input_radio_buttons( - "col_preference", - label=None, - choices={"effect": "Effect", "pvalue": "P-value"}, - selected=col_preference(), - inline=True, - ), - sidebar_label("Correlation"), - ui.input_radio_buttons( - "corr_type", - label=None, - choices={"pearson": "Pearson", "spearman": "Spearman"}, - selected=corr_type(), - inline=True, - ), - ) + return None return corr_type, col_preference diff --git a/tfbpshiny/modules/perturbation/server/workspace.py b/tfbpshiny/modules/perturbation/server/workspace.py index 927c984..29bc992 100644 --- a/tfbpshiny/modules/perturbation/server/workspace.py +++ b/tfbpshiny/modules/perturbation/server/workspace.py @@ -17,6 +17,7 @@ get_measurement_column, regulator_scatter_sql, ) +from tfbpshiny.utils.profiler import profile_span from tfbpshiny.utils.sample_conditions import fetch_sample_condition_map from tfbpshiny.utils.vdb_init import AppDatasets, get_regulator_display_name @@ -33,6 +34,8 @@ def perturbation_workspace_server( vdb: VirtualDB, app_datasets: AppDatasets, logger: Logger, + profile_logger: Logger, + session_id: str = "", ) -> None: """ Render the perturbation correlation rows: pairwise distributions @@ -137,17 +140,25 @@ def _all_corr_data() -> dict[tuple[str, str], pd.DataFrame]: logger.debug( f"Correlating {db_a}({col_a}) vs {db_b}({col_b}) ({method})" ) - result[(db_a, db_b)] = corr_pair_sql( - vdb, - db_a, - col_a, - filters.get(db_a), - db_b, - col_b, - filters.get(db_b), - method, - prefix=f"p{i}_", - ) + with profile_span( + profile_logger, + "vdb.query", + module="perturbation", + dataset=f"{db_a}x{db_b}", + context="_all_corr_data", + session_id=session_id, + ): + result[(db_a, db_b)] = corr_pair_sql( + vdb, + db_a, + col_a, + filters.get(db_a), + db_b, + col_b, + filters.get(db_b), + method, + prefix=f"p{i}_", + ) except Exception as exc: logger.error( f"Failed to correlate {db_a} vs {db_b}: {exc}", exc_info=True @@ -303,33 +314,55 @@ def distributions_plot() -> ui.Tag: " });" "})();" ) - return ui.HTML( - to_html( + with profile_span( + profile_logger, + "plot.build", + module="perturbation", + context="distributions_plot", + session_id=session_id, + ): + html = to_html( fig, include_plotlyjs="cdn", full_html=False, post_script=post_script, ) - ) + return ui.HTML(html) + # Disable Shiny's default suspend-when-hidden behavior so the side-effect + # render keeps firing reliably even if a future layout wraps the workspace + # in a navset_tab / conditional_panel / display:none toggle. + # DO NOT convert this back to @reactive.effect — see CLAUDE.md. + @output(suspend_when_hidden=False) @render.ui - def regulator_selector() -> ui.Tag: + def _regulator_choices_trigger() -> ui.Tag: """ - Dropdown of regulators present in at least one pair's correlation data. - - Choices are keyed by locus tag and labelled by gene symbol where available. The - previously selected regulator is preserved across re-renders if it is still - present in the new choice set. - - :trigger _all_corr_data: re-renders when correlation data changes (new - datasets selected, filters applied, or column/method changed). :trigger - active_perturbation_datasets: re-renders to refresh the symbol map when the - active dataset set changes. + Hidden render that pushes regulator choices to the static selectize. + + ``selected_regulator`` is declared statically in ``perturbation/ui.py``; + this render mutates its choices/selected value via + ``ui.update_selectize`` whenever the correlation data changes. The + prior ``@render.ui regulator_selector`` pattern recreated the widget + on every ``_all_corr_data`` invalidation, which fired spurious change + events on ``input.selected_regulator`` and cascaded into 6-15× re-runs + of the downstream scatter / regulator_plot queries. Updating in place + preserves DOM identity. + + Implemented as a render rather than a ``@reactive.effect`` so the + ``_all_corr_data`` read is **lazy** — the placeholder only mounts when + this workspace is active, so dataset toggles on the Selection page do + not incur correlation queries here. + + :trigger _all_corr_data: choices refresh when correlation data changes + (new datasets, filters applied, or column/method changed). + :returns: An invisible span; output is purely a side-effect carrier. """ corr_data = _all_corr_data() + marker = ui.tags.span(style="display:none") if not corr_data: - return ui.span() + ui.update_selectize("selected_regulator", choices={}, selected=None) + return marker # sym_map is built at server init from the pre-computed lookup table all_regs: set[str] = set() @@ -338,22 +371,20 @@ def regulator_selector() -> ui.Tag: all_regs |= set(df["regulator_locus_tag"].dropna().unique()) if not all_regs: - return ui.span() + ui.update_selectize("selected_regulator", choices={}, selected=None) + return marker choices = {r: sym_map.get(r, r) for r in all_regs} choices = dict(sorted(choices.items(), key=lambda kv: kv[1].lower())) - try: - current = str(input.selected_regulator()) - except Exception: - current = "" + with reactive.isolate(): + try: + current = str(input.selected_regulator()) + except Exception: + current = "" default = current if current in choices else next(iter(choices)) - return ui.input_selectize( - "selected_regulator", - "Regulator", - choices=choices, - selected=default, - ) + ui.update_selectize("selected_regulator", choices=choices, selected=default) + return marker @render.ui def regulator_plots() -> ui.Tag: @@ -439,7 +470,14 @@ def _strip_reg(f: dict | None) -> dict | None: reg, idx, ) - merged = vdb.query(scatter_sql, **scatter_params) + with profile_span( + profile_logger, + "vdb.query", + module="perturbation", + context="scatter", + session_id=session_id, + ): + merged = vdb.query(scatter_sql, **scatter_params) logger.debug( f"scatter {db_a}/{db_b} reg={reg!r} " f"rows={len(merged)} fa={fa!r} fb={fb!r}" @@ -501,9 +539,17 @@ def _strip_reg(f: dict | None) -> dict | None: width=400, height=400, ) + with profile_span( + profile_logger, + "plot.build", + module="perturbation", + context="regulator_plot", + session_id=session_id, + ): + plot_html = to_html(fig, include_plotlyjs=False, full_html=False) plot_divs.append( ui.div( - ui.HTML(to_html(fig, include_plotlyjs=False, full_html=False)), + ui.HTML(plot_html), style="flex: 0 0 auto;", ) ) diff --git a/tfbpshiny/modules/perturbation/ui.py b/tfbpshiny/modules/perturbation/ui.py index b2574b8..6a301ae 100644 --- a/tfbpshiny/modules/perturbation/ui.py +++ b/tfbpshiny/modules/perturbation/ui.py @@ -6,30 +6,67 @@ from tfbpshiny.components import ( sidebar_heading, + sidebar_label, sidebar_shell, workspace_heading, workspace_shell, ) +from tfbpshiny.modules.perturbation.queries import ( + DEFAULT_COL_PREFERENCE, + DEFAULT_CORR_TYPE, +) @module.ui def perturbation_sidebar_ui() -> ui.Tag: + # Inputs are declared statically so they keep stable DOM identity across + # dataset-toggle re-renders. The empty-state banner is the only reactive + # piece — see perturbation/server/sidebar.py for why this matters. return sidebar_shell( "perturbation-sidebar", header=sidebar_heading("Perturbation"), - body=ui.output_ui("sidebar_controls"), + body=ui.div( + ui.output_ui("empty_state_message"), + sidebar_label("Column"), + ui.input_radio_buttons( + "col_preference", + label=None, + choices={"effect": "Effect", "pvalue": "P-value"}, + selected=DEFAULT_COL_PREFERENCE, + inline=True, + ), + sidebar_label("Correlation"), + ui.input_radio_buttons( + "corr_type", + label=None, + choices={"pearson": "Pearson", "spearman": "Spearman"}, + selected=DEFAULT_CORR_TYPE, + inline=True, + ), + ), ) @module.ui def perturbation_workspace_ui() -> ui.Tag: + # selected_regulator is declared statically so its DOM identity persists + # across data refreshes. Choices are pushed via ui.update_selectize from + # perturbation/server/workspace.py — see that file for why this matters. return workspace_shell( "perturbation-workspace", header=workspace_heading("Perturbation Analysis"), body=ui.div( ui.output_ui("distributions_plot"), ui.hr(), - ui.output_ui("regulator_selector"), + ui.input_selectize( + "selected_regulator", + "Regulator", + choices=[], + selected=None, + ), + # Hidden placeholder; mounted only when this workspace is active so + # the choices-update effect stays lazy. See server/workspace.py. + ui.output_ui("_regulator_choices_trigger"), ui.output_ui("regulator_plots"), ), ) diff --git a/tfbpshiny/modules/select_datasets/server/workspace.py b/tfbpshiny/modules/select_datasets/server/workspace.py index 90042e8..d9abb1c 100644 --- a/tfbpshiny/modules/select_datasets/server/workspace.py +++ b/tfbpshiny/modules/select_datasets/server/workspace.py @@ -24,9 +24,17 @@ diagonal_cell_modal_ui, off_diagonal_cell_modal_ui, ) +from tfbpshiny.utils.profiler import profile_span from tfbpshiny.utils.ratelimit import debounce from tfbpshiny.utils.vdb_init import HIDDEN_FILTER_FIELDS +#: Quiet window (seconds) for coalescing rapid dataset-toggle / filter-change +#: bursts before invalidating ``_matrix_data``. Tuned for typical mouse click +#: cadence of ~400-700ms; both ``_settled_datasets`` and ``_settled_filters`` +#: must use the same value so toggle-off (which writes both reactive sources) +#: produces a single coalesced recompute instead of two flickered ones. +_SETTLED_WINDOW_SEC = 1.0 + @module.server def select_datasets_workspace_server( @@ -38,6 +46,8 @@ def select_datasets_workspace_server( dataset_filters: reactive.Value[dict[str, Any]], vdb: VirtualDB, logger: Logger, + profile_logger: Logger, + session_id: str = "", ) -> None: """Render the sample-count matrix for all active datasets.""" @@ -46,12 +56,17 @@ def select_datasets_workspace_server( for db_name in vdb.get_datasets() } - @debounce(0.3) + @debounce(_SETTLED_WINDOW_SEC) @reactive.calc def _settled_datasets() -> list[str]: """ Combined list of all active datasets, debounced to coalesce rapid toggle clicks. + Window is ``_SETTLED_WINDOW_SEC`` so normal-speed multi-toggle sessions + (typical click cadence ~400-700ms) coalesce into a single + ``_matrix_data`` recompute instead of firing one full N-dataset query + matrix per click. + :trigger: ``active_binding_datasets``, ``active_perturbation_datasets`` — re-runs whenever either list changes, but downstream is only notified after a specified quiet period. @@ -60,6 +75,33 @@ def _settled_datasets() -> list[str]: """ return active_binding_datasets() + active_perturbation_datasets() + @debounce(_SETTLED_WINDOW_SEC) + @reactive.calc + def _settled_filters() -> dict[str, Any]: + """ + Debounced view of ``dataset_filters`` for ``_matrix_data`` consumption. + + Toggling a dataset OFF fires two reactive writes in quick succession from + ``dataset_row._on_toggle``: a ``toggle_state.set`` (caught by + ``_settled_datasets``'s debounce) and a ``dataset_filters.set`` to clear + the dataset's filters. Without this second debounce, the filter write + invalidates ``_matrix_data`` immediately while ``_settled_datasets`` is + still showing the pre-toggle list — causing the matrix to recompute with + an inconsistent (old list, new filters) view, then again 1s later when + the list settles. That manifests as the matrix flashing back and forth. + + Both reads are now coalesced through the same ``_SETTLED_WINDOW_SEC`` + window. Other ``dataset_filters()`` consumers (modal handlers, filter + buttons) are unaffected and keep instant response. + + :trigger: ``dataset_filters`` — re-runs when any filter changes, + but downstream is only notified after the same quiet period as + ``_settled_datasets``. + :returns: Current filter dict snapshot. + + """ + return dataset_filters() + @reactive.calc def _matrix_data() -> dict[str, Any]: """ @@ -67,14 +109,16 @@ def _matrix_data() -> dict[str, Any]: counts. :trigger: ``_settled_datasets`` — re-runs after rapid toggle changes settle. - ``dataset_filters`` — re-runs when any filter changes. + ``_settled_filters`` — re-runs after filter changes settle (same + debounce window, so toggle-off filter-clear and dataset-list updates + are coalesced into one recompute). :returns: Dict with keys ``"diagonal"`` — ``{db_name: {"regulators": int, "samples": int}}``; ``"cross_dataset"`` — ``{(db_i, db_j): {"common_regulators": int, "samples_a": int, "samples_b": int}}``. """ active = _settled_datasets() - filters = dataset_filters() + filters = _settled_filters() regulator_sets: dict[str, set[str]] = {} diagonal: dict[str, dict[str, int]] = {} @@ -83,12 +127,28 @@ def _matrix_data() -> dict[str, Any]: db_filters = filters.get(db_name) sql, params = regulator_locus_tags_query(db_name, db_filters) - reg_df = vdb.query(sql, **params) + with profile_span( + profile_logger, + "vdb.query", + module="select_datasets", + dataset=db_name, + context="_matrix_data", + session_id=session_id, + ): + reg_df = vdb.query(sql, **params) regulators = set(reg_df["regulator_locus_tag"].dropna().astype(str)) regulator_sets[db_name] = regulators sql, params = sample_count_query(db_name, db_filters) - n_samples = int(vdb.query(sql, **params).iloc[0, 0]) + with profile_span( + profile_logger, + "vdb.query", + module="select_datasets", + dataset=db_name, + context="_matrix_data", + session_id=session_id, + ): + n_samples = int(vdb.query(sql, **params).iloc[0, 0]) diagonal[db_name] = {"regulators": len(regulators), "samples": n_samples} @@ -105,8 +165,24 @@ def _matrix_data() -> dict[str, Any]: sql_b, params_b = sample_count_query( db_b, filters.get(db_b), restrict_to_regulators=common_list ) - n_a = int(vdb.query(sql_a, **params_a).iloc[0, 0]) - n_b = int(vdb.query(sql_b, **params_b).iloc[0, 0]) + with profile_span( + profile_logger, + "vdb.query", + module="select_datasets", + dataset=f"{db_a}x{db_b}", + context="_matrix_data", + session_id=session_id, + ): + n_a = int(vdb.query(sql_a, **params_a).iloc[0, 0]) + with profile_span( + profile_logger, + "vdb.query", + module="select_datasets", + dataset=f"{db_a}x{db_b}", + context="_matrix_data", + session_id=session_id, + ): + n_b = int(vdb.query(sql_b, **params_b).iloc[0, 0]) cross_dataset[(db_a, db_b)] = { "common_regulators": len(common), diff --git a/tfbpshiny/utils/profiler.py b/tfbpshiny/utils/profiler.py new file mode 100644 index 0000000..fa971c3 --- /dev/null +++ b/tfbpshiny/utils/profiler.py @@ -0,0 +1,89 @@ +"""Timing context manager for the profiler logger.""" + +from __future__ import annotations + +import json +import time +from collections.abc import Generator +from contextlib import contextmanager +from datetime import datetime, timezone +from logging import Logger + + +@contextmanager +def profile_span( + logger: Logger, + op: str, + module: str = "", + dataset: str = "", + context: str = "", + session_id: str = "", +) -> Generator[None]: + """ + Time a block of code and emit one structured JSON record to *logger*. + + Each record has the following keys:: + + {"event": "PROFILE", "ts": "...", "elapsed_s": 0.1234, + "op": "...", "module": "...", "dataset": "...", + "context": "...", "session_id": "..."} + + :param logger: The ``"profiler"`` logger. + :param op: Operation label, e.g. ``"vdb.query"`` or ``"plot.build"``. + :param module: Shiny module name, e.g. ``"binding"``. Empty for init-time spans. + :param dataset: Dataset(s) involved, e.g. ``"harbison"`` or ``"harbisонxrossi"``. + :param context: Function or plot name providing location, e.g. ``"_all_corr_data"``. + :param session_id: Shiny session ID, used to correlate spans and count concurrent + users. Empty for init-time spans that run outside a session. + + """ + t0 = time.perf_counter() + try: + yield + finally: + elapsed = time.perf_counter() - t0 + ts = datetime.now(timezone.utc).isoformat(timespec="milliseconds") + logger.debug( + json.dumps( + { + "event": "PROFILE", + "ts": ts, + "elapsed_s": round(elapsed, 4), + "op": op, + "module": module, + "dataset": dataset, + "context": context, + "session_id": session_id, + } + ) + ) + + +def log_session_event(logger: Logger, event: str, session_id: str) -> None: + """ + Emit a session lifecycle record to *logger*. + + Each record has the following keys:: + + {"event": "SESSION", "ts": "...", + "lifecycle": "START"|"END", "session_id": "..."} + + :param logger: The ``"profiler"`` logger. + :param event: ``"START"`` or ``"END"``. + :param session_id: Shiny session ID. + + """ + ts = datetime.now(timezone.utc).isoformat(timespec="milliseconds") + logger.debug( + json.dumps( + { + "event": "SESSION", + "ts": ts, + "lifecycle": event, + "session_id": session_id, + } + ) + ) + + +__all__ = ["profile_span", "log_session_event"] diff --git a/tfbpshiny/utils/vdb_init.py b/tfbpshiny/utils/vdb_init.py index de62974..e177be1 100644 --- a/tfbpshiny/utils/vdb_init.py +++ b/tfbpshiny/utils/vdb_init.py @@ -2,11 +2,15 @@ from __future__ import annotations +import logging from dataclasses import dataclass +from logging import Logger import pandas as pd from labretriever import VirtualDB +from tfbpshiny.utils.profiler import profile_span + # Metadata fields to suppress from the filter UI, keyed by db_name. # Use "*" for fields hidden across all datasets; use the db_name key for # dataset-specific exclusions. The effective hidden set for a given dataset @@ -140,7 +144,9 @@ def _filter_hackett_views(vdb: VirtualDB) -> None: ) -def ensure_hackett_analysis_set(vdb: VirtualDB) -> None: +def ensure_hackett_analysis_set( + vdb: VirtualDB, profile_logger: Logger | None = None +) -> None: """ Build the ``hackett_analysis_set`` table and permanently filter the ``hackett_meta`` and ``hackett`` views to include only those samples. @@ -148,9 +154,12 @@ def ensure_hackett_analysis_set(vdb: VirtualDB) -> None: Safe to call multiple times; uses ``CREATE OR REPLACE``. :param vdb: The application VirtualDB instance. + :param profile_logger: Optional profiler logger; timing is skipped when ``None``. """ - vdb._conn.execute(_HACKETT_ANALYSIS_SET_SQL) + _pl = profile_logger or logging.getLogger("profiler") + with profile_span(_pl, "init.hackett_set"): + vdb._conn.execute(_HACKETT_ANALYSIS_SET_SQL) _filter_hackett_views(vdb) @@ -174,7 +183,9 @@ def ensure_hackett_analysis_set(vdb: VirtualDB) -> None: """ -def _build_regulator_display_names(vdb: VirtualDB) -> None: +def _build_regulator_display_names( + vdb: VirtualDB, profile_logger: Logger | None = None +) -> None: """ Build the ``regulator_display_names`` DuckDB table from all dataset meta views. @@ -184,6 +195,7 @@ def _build_regulator_display_names(vdb: VirtualDB) -> None: non-empty symbol different from the tag is present; otherwise it equals the tag. :param vdb: The application VirtualDB instance. + :param profile_logger: Optional profiler logger; timing is skipped when ``None``. """ db_names = [ @@ -201,12 +213,15 @@ def _build_regulator_display_names(vdb: VirtualDB) -> None: table=_REGULATOR_DISPLAY_NAME_TABLE, union_sql=union_sql, ) - vdb._conn.execute(sql) + _pl = profile_logger or logging.getLogger("profiler") + with profile_span(_pl, "init.regulator_table"): + vdb._conn.execute(sql) def get_regulator_display_name( vdb: VirtualDB, locus_tags: list[str] | None = None, + profile_logger: Logger | None = None, ) -> pd.DataFrame: """ Return a DataFrame of regulator display names from the pre-built lookup table. @@ -219,13 +234,22 @@ def get_regulator_display_name( :rtype: pandas.DataFrame """ - if locus_tags is None: - return vdb._conn.execute(f"SELECT * FROM {_REGULATOR_DISPLAY_NAME_TABLE}").df() - return vdb._conn.execute( - f"SELECT * FROM {_REGULATOR_DISPLAY_NAME_TABLE} " - f"WHERE regulator_locus_tag = ANY(?)", - [locus_tags], - ).df() + _pl = profile_logger or logging.getLogger("profiler") + with profile_span( + _pl, + "vdb.execute", + dataset=_REGULATOR_DISPLAY_NAME_TABLE, + context="get_regulator_display_name", + ): + if locus_tags is None: + return vdb._conn.execute( + f"SELECT * FROM {_REGULATOR_DISPLAY_NAME_TABLE}" + ).df() + return vdb._conn.execute( + f"SELECT * FROM {_REGULATOR_DISPLAY_NAME_TABLE} " + f"WHERE regulator_locus_tag = ANY(?)", + [locus_tags], + ).df() @dataclass @@ -252,19 +276,21 @@ class AppDatasets: def initialize_data( virtualdb_config: str, hf_token: str | None = None, + profile_logger: Logger | None = None, ) -> tuple[VirtualDB, AppDatasets]: """ Construct the VirtualDB, run one-time setup, and compute app-level dataset metadata. :param virtualdb_config: Path to the VirtualDB YAML config file. :param hf_token: Optional HuggingFace token for private repo access. + :param profile_logger: Optional profiler logger for timing instrumentation. :returns: Tuple of ``(vdb, app_datasets)``. :rtype: tuple[VirtualDB, AppDatasets] """ vdb = VirtualDB(virtualdb_config, token=hf_token) - ensure_hackett_analysis_set(vdb) - _build_regulator_display_names(vdb) + ensure_hackett_analysis_set(vdb, profile_logger=profile_logger) + _build_regulator_display_names(vdb, profile_logger=profile_logger) condition_cols: dict[str, list[str]] = {} upstream_cols: dict[str, list[str]] = {}