From 6a2eabd8e3244d5c353df0f5daea86c6e8bd8e26 Mon Sep 17 00:00:00 2001 From: JasonOA888 Date: Fri, 13 Mar 2026 13:24:20 +0800 Subject: [PATCH 1/7] fix: persist Langflow database across container restarts Fixes #1127 - Langflow flow edits no longer lost after restart ## Problem Flow edits made in Langflow UI were being lost after container restart. The langflow service only mounted /app/flows but Langflow stores its SQLite database at /root/.langflow/ by default, which was ephemeral. ## Solution 1. Add persistent volume for Langflow data directory: - Mount ${LANGFLOW_DATA_PATH:-./langflow-data} to /root/.langflow 2. Explicitly set LANGFLOW_DATABASE_URL to ensure database location 3. Document the new LANGFLOW_DATA_PATH in .env.example ## Testing - Verified volume mount configuration - Database URL format: sqlite:////root/.langflow/langflow.db Closes #1127 --- .env.example | 5 +++++ docker-compose.yml | 2 ++ 2 files changed, 7 insertions(+) diff --git a/.env.example b/.env.example index db6eb53d4..a2a90fa59 100644 --- a/.env.example +++ b/.env.example @@ -74,6 +74,11 @@ OPENSEARCH_PASSWORD= # Default: ./opensearch-data OPENSEARCH_DATA_PATH=./opensearch-data +# Path to persist Langflow database and state (flows, credentials, settings) +# Without this volume, flow edits will be lost on container restart +# Default: ./langflow-data +LANGFLOW_DATA_PATH=./langflow-data + # OpenSearch Connection OPENSEARCH_HOST=opensearch OPENSEARCH_PORT=9200 diff --git a/docker-compose.yml b/docker-compose.yml index a4faf3692..c4a225178 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -127,6 +127,7 @@ services: langflow: volumes: - ${OPENRAG_FLOWS_PATH:-./flows}:/app/flows:U,z + - ${LANGFLOW_DATA_PATH:-./langflow-data}:/root/.langflow:U,z image: langflowai/openrag-langflow:${OPENRAG_VERSION:-latest} build: context: . @@ -146,6 +147,7 @@ services: - WATSONX_PROJECT_ID=${WATSONX_PROJECT_ID} - OLLAMA_BASE_URL=${OLLAMA_ENDPOINT} - LANGFLOW_LOAD_FLOWS_PATH=/app/flows + - LANGFLOW_DATABASE_URL=sqlite:////root/.langflow/langflow.db - LANGFLOW_SECRET_KEY=${LANGFLOW_SECRET_KEY} - JWT=None - OWNER=None From 2b1456afc82713f56df188fde9fe59aa560a0639 Mon Sep 17 00:00:00 2001 From: Mike Pawlowski Date: Tue, 17 Mar 2026 08:33:48 -0700 Subject: [PATCH 2/7] fix: persist Langflow database across container restarts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Issue - #1127 Summary - User-made edits to Langflow flows were silently discarded on every container restart. - Two root causes were identified and corrected: - (1) The Langflow data volume was mounted to the wrong path inside the container. - (2) The LANGFLOW_LOAD_FLOWS_PATH mechanism performed a blind upsert of all flows on every startup, overwriting any changes made in the Langflow UI. Docker / Infrastructure - Corrected the Langflow data volume mount target from /root/.langflow to /app/langflow-data in docker-compose.yml - Replaced LANGFLOW_LOAD_FLOWS_PATH env var with LANGFLOW_CONFIG_DIR=/app/langflow-data so Langflow resolves its config and database from the persisted volume - Updated LANGFLOW_DATABASE_URL to reference the new path (sqlite:////app/langflow-data/langflow.db) - Pre-created /app/langflow-data in Dockerfile.langflow during image build to ensure named Docker volumes are initialised with the correct ownership for the non-root container user - Added langflow-data/ directory with a .gitkeep file; updated .gitignore to track the directory stub while ignoring its contents Backend — Flow Bootstrapping - Added FlowsService.ensure_flows_exist(): a create-only startup routine that checks each configured flow ID against the Langflow API and creates missing flows from their JSON files, without ever patching or overwriting an existing flow - Replaced the LANGFLOW_LOAD_FLOWS_PATH blind-upsert behaviour with a call to ensure_flows_exist() inside startup_tasks() in src/main.py Makefile - Extended the factory-reset target to remove the langflow-data/ directory alongside opensearch-data/ and config/ Code Cleanup - Removed trailing whitespace throughout src/services/flows_service.py Builds on #1129 --- .gitignore | 3 + Dockerfile.langflow | 10 ++- Makefile | 6 ++ docker-compose.yml | 6 +- langflow-data/.gitkeep | 0 src/main.py | 9 +++ src/services/flows_service.py | 133 +++++++++++++++++++++++++--------- 7 files changed, 126 insertions(+), 41 deletions(-) create mode 100644 langflow-data/.gitkeep diff --git a/.gitignore b/.gitignore index 7da9d1140..02d024e00 100644 --- a/.gitignore +++ b/.gitignore @@ -32,6 +32,9 @@ wheels/ # OpenSearch data directory opensearch-data/ +# Langflow data directory (ignore contents, keep directory via .gitkeep) +/langflow-data/* +!/langflow-data/.gitkeep node_modules diff --git a/Dockerfile.langflow b/Dockerfile.langflow index 2f8286cd0..73793121b 100644 --- a/Dockerfile.langflow +++ b/Dockerfile.langflow @@ -1,7 +1,13 @@ FROM langflowai/langflow:1.8.0 -RUN pip install uv +# (+) Install uv +# (+) Pre-create the Langflow data directory with correct ownership. +# - This ensures named Docker volumes are initialised with uid=1000 so +# the non-root container user can write to the mounted path. +RUN set -ex \ + && pip install uv \ + && mkdir -p /app/langflow-data EXPOSE 7860 -CMD ["langflow", "run", "--host", "0.0.0.0", "--port", "7860"] \ No newline at end of file +CMD ["langflow", "run", "--host", "0.0.0.0", "--port", "7860"] diff --git a/Makefile b/Makefile index f5d391e8f..5fb3d948c 100644 --- a/Makefile +++ b/Makefile @@ -502,6 +502,7 @@ factory-reset: ## Complete reset (stop, remove volumes, clear data, remove image echo " - Stop all containers"; \ echo " - Remove all volumes"; \ echo " - Delete opensearch-data directory"; \ + echo " - Delete langflow-data directory"; \ echo " - Delete config directory"; \ echo " - Delete JWT keys (private_key.pem, public_key.pem)"; \ echo " - Remove OpenRAG images"; \ @@ -525,6 +526,11 @@ factory-reset: ## Complete reset (stop, remove volumes, clear data, remove image rm -rf opensearch-data/* 2>/dev/null || true; \ echo "$(PURPLE)opensearch-data removed$(NC)"; \ fi; \ + if [ -d "langflow-data" ]; then \ + echo "Removing langflow-data..."; \ + rm -rf langflow-data/* 2>/dev/null || true; \ + echo "$(PURPLE)langflow-data removed$(NC)"; \ + fi; \ if [ -d "config" ]; then \ echo "Removing config..."; \ rm -rf config; \ diff --git a/docker-compose.yml b/docker-compose.yml index c4a225178..6a2a07f16 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -127,7 +127,7 @@ services: langflow: volumes: - ${OPENRAG_FLOWS_PATH:-./flows}:/app/flows:U,z - - ${LANGFLOW_DATA_PATH:-./langflow-data}:/root/.langflow:U,z + - ${LANGFLOW_DATA_PATH:-./langflow-data}:/app/langflow-data:U,z image: langflowai/openrag-langflow:${OPENRAG_VERSION:-latest} build: context: . @@ -146,8 +146,8 @@ services: - WATSONX_URL=${WATSONX_URL:-${WATSONX_ENDPOINT}} - WATSONX_PROJECT_ID=${WATSONX_PROJECT_ID} - OLLAMA_BASE_URL=${OLLAMA_ENDPOINT} - - LANGFLOW_LOAD_FLOWS_PATH=/app/flows - - LANGFLOW_DATABASE_URL=sqlite:////root/.langflow/langflow.db + - LANGFLOW_CONFIG_DIR=/app/langflow-data + - LANGFLOW_DATABASE_URL=sqlite:////app/langflow-data/langflow.db - LANGFLOW_SECRET_KEY=${LANGFLOW_SECRET_KEY} - JWT=None - OWNER=None diff --git a/langflow-data/.gitkeep b/langflow-data/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/src/main.py b/src/main.py index 041c573cc..8eae9b293 100644 --- a/src/main.py +++ b/src/main.py @@ -1200,6 +1200,15 @@ async def startup_tasks(services): # Update MCP servers with provider credentials (especially important for no-auth mode) await _update_mcp_servers_with_provider_credentials(services) + # Ensure all configured flows exist in Langflow (create-only, never overwrites). + # This replaces LANGFLOW_LOAD_FLOWS_PATH, which performed a blind upsert on + # every container start and discarded any user edits made in the Langflow UI. + try: + flows_service = services["flows_service"] + await flows_service.ensure_flows_exist() + except Exception as e: + logger.warning("Failed to ensure Langflow flows exist at startup", error=str(e)) + # Check if flows were reset and reapply settings if config is edited try: config = get_openrag_config() diff --git a/src/services/flows_service.py b/src/services/flows_service.py index 00b381fb4..137af1080 100644 --- a/src/services/flows_service.py +++ b/src/services/flows_service.py @@ -47,7 +47,7 @@ async def resolve_ollama_url(self, endpoint: str, force_refresh: bool = False) - resolved_url = None for cand in candidates: test_url = replace_localhost_patterns(endpoint, cand) - + logger.debug(f"Probing Ollama candidate via Langflow: {test_url}") try: response = await clients.langflow_request( @@ -61,7 +61,7 @@ async def resolve_ollama_url(self, endpoint: str, force_refresh: bool = False) - except Exception as e: logger.debug(f"Probe failed for {test_url}: {e}") continue - + if not resolved_url: # Fallback to simple transformation if probing fails resolved_url = transform_localhost_url(endpoint) @@ -95,23 +95,23 @@ def _get_backup_directory(self): def _get_latest_backup_path(self, flow_id: str, flow_type: str): """ Get the path to the latest backup file for a flow. - + Args: flow_id: The flow ID flow_type: The flow type name - + Returns: str: Path to latest backup file, or None if no backup exists """ backup_dir = self._get_backup_directory() - + if not os.path.exists(backup_dir): return None - + # Find all backup files for this flow backup_files = [] prefix = f"{flow_type}_" - + try: for filename in os.listdir(backup_dir): if filename.startswith(prefix) and filename.endswith(".json"): @@ -122,10 +122,10 @@ def _get_latest_backup_path(self, flow_id: str, flow_type: str): except Exception as e: logger.warning(f"Error reading backup directory: {str(e)}") return None - + if not backup_files: return None - + # Return the most recent backup (highest mtime) backup_files.sort(key=lambda x: x[0], reverse=True) return backup_files[0][1] @@ -134,17 +134,17 @@ def _compare_flows(self, flow1: dict, flow2: dict): """ Compare two flow structures to see if they're different. Normalizes both flows before comparison. - + Args: flow1: First flow data flow2: Second flow data - + Returns: bool: True if flows are different, False if they're the same """ normalized1 = self._normalize_flow_structure(flow1) normalized2 = self._normalize_flow_structure(flow2) - + # Compare normalized structures return normalized1 != normalized2 @@ -152,10 +152,10 @@ async def backup_all_flows(self, only_if_changed=True): """ Backup all flows from Langflow to the backup folder. Only backs up flows that have changed since the last backup. - + Args: only_if_changed: If True, only backup flows that differ from latest backup - + Returns: dict: Summary of backup operations with success/failure status """ @@ -200,7 +200,7 @@ async def backup_all_flows(self, only_if_changed=True): flow_locked = current_flow.get("locked", False) latest_backup_path = self._get_latest_backup_path(flow_id, flow_type) has_backups = latest_backup_path is not None - + # If flow is locked and no backups exist, skip backup if flow_locked and not has_backups: logger.debug( @@ -212,13 +212,13 @@ async def backup_all_flows(self, only_if_changed=True): "reason": "locked_without_backups", }) continue - + # Check if we need to backup (only if changed) if only_if_changed and has_backups: try: with open(latest_backup_path, "r") as f: latest_backup = json.load(f) - + # Compare flows if not self._compare_flows(current_flow, latest_backup): logger.debug( @@ -280,12 +280,12 @@ async def backup_all_flows(self, only_if_changed=True): async def _backup_flow(self, flow_id: str, flow_type: str, flow_data: dict = None): """ Backup a single flow to the backup folder. - + Args: flow_id: The flow ID to backup flow_type: The flow type name (nudges, retrieval, ingest, url_ingest) flow_data: The flow data to backup (if None, fetches from API) - + Returns: str: Path to the backup file, or None if backup failed """ @@ -717,7 +717,7 @@ def _normalize_flow_structure(self, flow_data): for node in nodes: node_data = node.get("data", {}) node_template = node_data.get("node", {}) - + normalized_node = { "id": node.get("id"), # Keep ID for edge matching "type": node.get("type"), @@ -775,20 +775,20 @@ async def _compare_flow_with_file(self, flow_id: str): # Compare entire normalized structures exactly # Sort nodes and edges for consistent comparison normalized_langflow["data"]["nodes"] = sorted( - normalized_langflow["data"]["nodes"], + normalized_langflow["data"]["nodes"], key=lambda x: (x.get("id", ""), x.get("type", "")) ) normalized_file["data"]["nodes"] = sorted( - normalized_file["data"]["nodes"], + normalized_file["data"]["nodes"], key=lambda x: (x.get("id", ""), x.get("type", "")) ) normalized_langflow["data"]["edges"] = sorted( - normalized_langflow["data"]["edges"], + normalized_langflow["data"]["edges"], key=lambda x: (x.get("source", ""), x.get("target", ""), x.get("sourceHandle", ""), x.get("targetHandle", "")) ) normalized_file["data"]["edges"] = sorted( - normalized_file["data"]["edges"], + normalized_file["data"]["edges"], key=lambda x: (x.get("source", ""), x.get("target", ""), x.get("sourceHandle", ""), x.get("targetHandle", "")) ) @@ -799,6 +799,67 @@ async def _compare_flow_with_file(self, flow_id: str): logger.error(f"Error comparing flow {flow_id} with file: {str(e)}") return False + async def ensure_flows_exist(self): + """ + Ensure all configured flows exist in Langflow. + + Creates flows from their JSON files if they are not already present in + the Langflow database. This is intentionally create-only: it never + patches or overwrites an existing flow, preserving any edits the user + has made in the Langflow UI. + + This replaces the LANGFLOW_LOAD_FLOWS_PATH mechanism, which performed a + blind upsert on every container start and discarded user edits. + """ + flow_configs = [ + ("nudges", NUDGES_FLOW_ID), + ("retrieval", LANGFLOW_CHAT_FLOW_ID), + ("ingest", LANGFLOW_INGEST_FLOW_ID), + ("url_ingest", LANGFLOW_URL_INGEST_FLOW_ID), + ] + + for flow_type, flow_id in flow_configs: + if not flow_id: + continue + + try: + response = await clients.langflow_request( + "GET", f"/api/v1/flows/{flow_id}" + ) + if response.status_code == 200: + logger.info( + f"Flow {flow_type} (ID: {flow_id}) already exists, skipping creation" + ) + continue + + flow_path = self._find_flow_file_by_id(flow_id) + if not flow_path: + logger.warning( + f"No flow file found for {flow_type} (ID: {flow_id}), cannot create" + ) + continue + + with open(flow_path, "r") as f: + flow_data = json.load(f) + + response = await clients.langflow_request( + "PUT", f"/api/v1/flows/{flow_id}", json=flow_data + ) + if response.status_code in (200, 201): + logger.info( + f"Created {flow_type} flow (ID: {flow_id}) from {os.path.basename(flow_path)}" + ) + else: + logger.warning( + f"Failed to create {flow_type} flow (ID: {flow_id}): " + f"HTTP {response.status_code} — {response.text}" + ) + + except Exception as e: + logger.error( + f"Error ensuring {flow_type} flow (ID: {flow_id}) exists: {e}" + ) + async def check_flows_reset(self): """ Check if any flows have been reset by comparing with JSON files. @@ -819,7 +880,7 @@ async def check_flows_reset(self): logger.info(f"Checking if {flow_type} flow (ID: {flow_id}) was reset") is_reset = await self._compare_flow_with_file(flow_id) - + if is_reset: logger.info(f"Flow {flow_type} (ID: {flow_id}) appears to have been reset") reset_flows.append(flow_type) @@ -827,7 +888,7 @@ async def check_flows_reset(self): logger.info(f"Flow {flow_type} (ID: {flow_id}) does not match reset state") return reset_flows - + async def change_langflow_model_value( self, provider: str, @@ -917,23 +978,23 @@ async def _update_provider_components( # Get all embedding nodes in the flow embedding_nodes = self._find_nodes_in_flow(flow_data, display_name=OPENAI_EMBEDDING_COMPONENT_DISPLAY_NAME) logger.info(f"Found {len(embedding_nodes)} embedding nodes in flow {flow_name} with display name '{OPENAI_EMBEDDING_COMPONENT_DISPLAY_NAME}'") - + # Count configured embedding-enabled providers config_obj = get_openrag_config() configured_providers = [] if config_obj.providers.openai.configured: configured_providers.append("openai") if config_obj.providers.watsonx.configured: configured_providers.append("watsonx") if config_obj.providers.ollama.configured: configured_providers.append("ollama") - + # Ensure current provider is in the list for counting purposes if it's being configured if provider in ["openai", "watsonx", "ollama"] and provider not in configured_providers: configured_providers.append(provider) - + all_possible = ["openai", "watsonx", "ollama"] configured_providers = [p for p in all_possible if p in configured_providers] provider_count = len(configured_providers) logger.info(f"Configured embedding providers: {configured_providers} (count: {provider_count})") - + # Determine slot mapping context if provider_count == 1: logger.info("Configuration mode: all 3 slots belong to the single active provider") @@ -948,7 +1009,7 @@ async def _update_provider_components( for node, idx in embedding_nodes: if self._get_node_provider(node) == provider_display: matched_nodes.append((node, idx)) - + if matched_nodes: logger.info(f"Found {len(matched_nodes)} nodes already configured for provider '{provider}'") for node, idx in matched_nodes: @@ -1035,7 +1096,7 @@ async def _update_component_langflow(self, template, model: str): # Only call if code field exists (custom components should have code) if "code" in template and "value" in template["code"]: code_value = template["code"]["value"] - + try: update_payload = { "code": code_value, @@ -1044,11 +1105,11 @@ async def _update_component_langflow(self, template, model: str): "field_value": model, "tool_mode": False, } - + response = await clients.langflow_request( "POST", "/api/v1/custom_component/update", json=update_payload ) - + if response.status_code == 200: response_data = response.json() # Update template with the new template from response.data @@ -1161,11 +1222,11 @@ async def _enable_model_in_langflow(self, provider_name: str, model_value: str): "model_id": model_value, "enabled": True }] - + response = await clients.langflow_request( "POST", "/api/v1/models/enabled_models", json=enable_payload ) - + if response.status_code == 200: logger.info(f"Successfully enabled model {model_value} for provider {provider_name}") else: From a1dd86e647047f0dbcf7de02a542dab80a1727a7 Mon Sep 17 00:00:00 2001 From: Mike Pawlowski Date: Wed, 18 Mar 2026 15:20:13 -0700 Subject: [PATCH 3/7] fix: persist Langflow database across container restarts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Issue - #1127 Summary - Propagated LANGFLOW_DATA_PATH to all locations that reference OPENSEARCH_DATA_PATH CI / Test Infrastructure - Added langflow-data to the Docker container cleanup command in test-e2e.yml and test-integration.yml, ensuring CI runners start each run with a clean Langflow database - Added LANGFLOW_DATA_PATH=./langflow-data to frontend/.env.test.example alongside the existing OPENSEARCH_DATA_PATH entry - Added a langflow-data directory cleanup block to tests/conftest.py that mirrors the existing opensearch-data teardown, so integration tests run against a fresh Langflow DB Documentation - Added a LANGFLOW_DATA_PATH row to the Langflow settings table in docs/docs/reference/configuration.mdx, documenting the default value and that flow edits are lost without this volume TUI — Configuration Fields - Added a langflow_data_path ConfigField to the Langflow section in config_fields.py with a file picker placeholder and a default of $HOME/.openrag/data/langflow-data TUI — Env Manager - Added langflow_data_path field to the EnvConfig dataclass in env_manager.py - Added "LANGFLOW_DATA_PATH": "langflow_data_path" entry to _env_attr_map() so the variable is loaded from and written to .env files - Added LANGFLOW_DATA_PATH write in save_env_file() immediately after the OPENSEARCH_DATA_PATH write TUI — Config Screen - Added langflow_data_path to SPECIAL_FIELDS in config.py - Added _render_langflow_data_path() renderer with a "Pick…" directory picker button, matching the _render_opensearch_data_path() pattern - Added action_pick_langflow_data_path() action implementing the textual-fspicker directory picker flow - Wired the pick-langflow-data-btn button in on_pressed and added the _langflow_data_pick_callback fallback handler in on_screen_dismissed --- .github/workflows/test-e2e.yml | 4 +- .github/workflows/test-integration.yml | 4 +- docs/docs/reference/configuration.mdx | 1 + frontend/.env.test.example | 1 + src/tui/config_fields.py | 6 ++ src/tui/managers/env_manager.py | 5 ++ src/tui/screens/config.py | 83 ++++++++++++++++++++++++++ tests/conftest.py | 9 +++ 8 files changed, 109 insertions(+), 4 deletions(-) diff --git a/.github/workflows/test-e2e.yml b/.github/workflows/test-e2e.yml index f73f89bb7..6b80efbeb 100644 --- a/.github/workflows/test-e2e.yml +++ b/.github/workflows/test-e2e.yml @@ -37,10 +37,10 @@ jobs: docker builder prune -af || true docker compose -f docker-compose.yml down -v --remove-orphans || true - - name: Cleanup root-owned files (OpenSearch data, config) + - name: Cleanup root-owned files (OpenSearch data, config, Langflow data) run: | for i in 1 2 3; do - docker run --rm -v $(pwd):/work alpine sh -c "rm -rf /work/opensearch-data /work/config" && break + docker run --rm -v $(pwd):/work alpine sh -c "rm -rf /work/opensearch-data /work/config /work/langflow-data" && break echo "Attempt $i failed, retrying in 5s..." sleep 5 done || true diff --git a/.github/workflows/test-integration.yml b/.github/workflows/test-integration.yml index 4dd3485e3..470ccbaa5 100644 --- a/.github/workflows/test-integration.yml +++ b/.github/workflows/test-integration.yml @@ -42,10 +42,10 @@ jobs: docker builder prune -af || true docker compose -f docker-compose.yml down -v --remove-orphans || true - - name: Cleanup root-owned files (OpenSearch data, config) + - name: Cleanup root-owned files (OpenSearch data, config, Langflow data) run: | for i in 1 2 3; do - docker run --rm -v $(pwd):/work alpine sh -c "rm -rf /work/opensearch-data /work/config" && break + docker run --rm -v $(pwd):/work alpine sh -c "rm -rf /work/opensearch-data /work/config /work/langflow-data" && break echo "Attempt $i failed, retrying in 5s..." sleep 5 done || true diff --git a/docs/docs/reference/configuration.mdx b/docs/docs/reference/configuration.mdx index 65484f2df..895892b96 100644 --- a/docs/docs/reference/configuration.mdx +++ b/docs/docs/reference/configuration.mdx @@ -94,6 +94,7 @@ For better security, it is recommended to set `LANGFLOW_SUPERUSER_PASSWORD` so t | Variable | Default | Description | |----------|---------|-------------| +| `LANGFLOW_DATA_PATH` | `./langflow-data` | The path where OpenRAG persists the Langflow database (flows, credentials, settings) across container restarts. | | `LANGFLOW_AUTO_LOGIN` | Determined by `LANGFLOW_SUPERUSER_PASSWORD` | Whether to enable [auto-login mode](https://docs.langflow.org/api-keys-and-authentication#langflow-auto-login) for the Langflow visual editor and CLI. If `LANGFLOW_SUPERUSER_PASSWORD` isn't set, then `LANGFLOW_AUTO_LOGIN` is `True` and auto-login mode is enabled. If `LANGFLOW_SUPERUSER_PASSWORD` is set, then `LANGFLOW_AUTO_LOGIN` is `False` and auto-login mode is disabled. Langflow API calls always require authentication with a Langflow API key regardless of the auto-login setting. | | `LANGFLOW_ENABLE_SUPERUSER_CLI` | Determined by `LANGFLOW_SUPERUSER_PASSWORD` | Whether to enable the [Langflow CLI `langflow superuser` command](https://docs.langflow.org/api-keys-and-authentication#langflow-enable-superuser-cli). If `LANGFLOW_SUPERUSER_PASSWORD` isn't set, then `LANGFLOW_ENABLE_SUPERUSER_CLI` is `True` and superuser accounts can be created with the Langflow CLI. If `LANGFLOW_SUPERUSER_PASSWORD` is set, then `LANGFLOW_ENABLE_SUPERUSER_CLI` is `False` and the `langflow superuser` command is disabled. | | `LANGFLOW_NEW_USER_IS_ACTIVE` | Determined by `LANGFLOW_SUPERUSER_PASSWORD` | Whether new [Langflow user accounts are active by default](https://docs.langflow.org/api-keys-and-authentication#langflow-new-user-is-active). If `LANGFLOW_SUPERUSER_PASSWORD` isn't set, then `LANGFLOW_NEW_USER_IS_ACTIVE` is `True` and new user accounts are active by default. If `LANGFLOW_SUPERUSER_PASSWORD` is set, then `LANGFLOW_NEW_USER_IS_ACTIVE` is `False` and new user accounts are inactive by default. | diff --git a/frontend/.env.test.example b/frontend/.env.test.example index f53600840..c053ca248 100644 --- a/frontend/.env.test.example +++ b/frontend/.env.test.example @@ -7,6 +7,7 @@ OPENSEARCH_PASSWORD= # Paths OPENSEARCH_DATA_PATH=./opensearch-data +LANGFLOW_DATA_PATH=./langflow-data OPENSEARCH_INDEX_NAME=documents # Model Providers diff --git a/src/tui/config_fields.py b/src/tui/config_fields.py index 73bdcf37d..19aa01631 100644 --- a/src/tui/config_fields.py +++ b/src/tui/config_fields.py @@ -112,6 +112,12 @@ class ConfigSection: "langflow_superuser", "LANGFLOW_SUPERUSER", "Admin Username", placeholder="admin", default="admin", ), + ConfigField( + "langflow_data_path", "LANGFLOW_DATA_PATH", "Data Path", + placeholder="~/.openrag/data/langflow-data", + default="$HOME/.openrag/data/langflow-data", + helper_text="Directory to persist Langflow flows and state across restarts", + ), ConfigField( "langflow_public_url", "LANGFLOW_PUBLIC_URL", "Public URL", placeholder="http://localhost:7860", diff --git a/src/tui/managers/env_manager.py b/src/tui/managers/env_manager.py index 791507885..175c94e45 100644 --- a/src/tui/managers/env_manager.py +++ b/src/tui/managers/env_manager.py @@ -98,6 +98,7 @@ class EnvConfig: openrag_config_path: str = "$HOME/.openrag/config" openrag_data_path: str = "$HOME/.openrag/data" # Backend data (conversations, tokens, etc.) opensearch_data_path: str = "$HOME/.openrag/data/opensearch-data" + langflow_data_path: str = "$HOME/.openrag/data/langflow-data" openrag_tui_config_path_legacy: str = "$HOME/.openrag/tui/config" # Container version (linked to TUI version) @@ -223,6 +224,7 @@ def _env_attr_map(self) -> Dict[str, str]: "OPENRAG_CONFIG_PATH": "openrag_config_path", "OPENRAG_DATA_PATH": "openrag_data_path", "OPENSEARCH_DATA_PATH": "opensearch_data_path", + "LANGFLOW_DATA_PATH": "langflow_data_path", "LANGFLOW_AUTO_LOGIN": "langflow_auto_login", "LANGFLOW_NEW_USER_IS_ACTIVE": "langflow_new_user_is_active", "LANGFLOW_ENABLE_SUPERUSER_CLI": "langflow_enable_superuser_cli", @@ -507,6 +509,9 @@ def save_env_file(self) -> bool: f.write( f"OPENSEARCH_DATA_PATH={self._quote_env_value(expand_path(self.config.opensearch_data_path))}\n" ) + f.write( + f"LANGFLOW_DATA_PATH={self._quote_env_value(expand_path(self.config.langflow_data_path))}\n" + ) # Set OPENRAG_VERSION to TUI version if self.config.openrag_version: f.write(f"OPENRAG_VERSION={self._quote_env_value(self.config.openrag_version)}\n") diff --git a/src/tui/screens/config.py b/src/tui/screens/config.py index 54873b00d..4ca6c8bd0 100644 --- a/src/tui/screens/config.py +++ b/src/tui/screens/config.py @@ -203,6 +203,7 @@ def _create_header_text(self) -> Text: "opensearch_data_path", "langflow_superuser_password", "langflow_superuser", + "langflow_data_path", "google_oauth_client_id", "microsoft_graph_oauth_client_id", "openrag_documents_paths", @@ -306,6 +307,25 @@ def _render_opensearch_data_path(self, field: ConfigField) -> ComposeResult: self.inputs[field.name] = input_widget yield Static(" ") + def _render_langflow_data_path(self, field: ConfigField) -> ComposeResult: + """Langflow data path with file picker.""" + yield Label(field.label) + yield Static(field.helper_text, classes="helper-text") + current_value = getattr(self.env_manager.config, field.name, field.default) + input_widget = Input( + placeholder=field.placeholder, + value=current_value, + id=f"input-{field.name}", + ) + yield input_widget + yield Horizontal( + Button("Pick…", id="pick-langflow-data-btn"), + id="langflow-data-path-actions", + classes="controls-row", + ) + self.inputs[field.name] = input_widget + yield Static(" ") + def _render_langflow_superuser_password(self, field: ConfigField) -> ComposeResult: """Langflow password with generate checkbox and eye toggle.""" with Horizontal(): @@ -456,6 +476,8 @@ def on_button_pressed(self, event: Button.Pressed) -> None: self.action_pick_documents_path() elif event.button.id == "pick-opensearch-data-btn": self.action_pick_opensearch_data_path() + elif event.button.id == "pick-langflow-data-btn": + self.action_pick_langflow_data_path() elif event.button.id and event.button.id.startswith("toggle-"): # Generic toggle for password/secret field visibility field_name = event.button.id.removeprefix("toggle-") @@ -665,6 +687,58 @@ def _set_path(result) -> None: self._opensearch_data_pick_callback = _set_path # type: ignore[attr-defined] self.app.push_screen(picker) + def action_pick_langflow_data_path(self) -> None: + """Open textual-fspicker to select Langflow data directory.""" + try: + import importlib + + fsp = importlib.import_module("textual_fspicker") + except Exception: + self.notify("textual-fspicker not available", severity="warning") + return + + input_widget = self.inputs.get("langflow_data_path") + start = Path.home() + if input_widget and input_widget.value: + path_str = input_widget.value.strip() + if path_str: + candidate = Path(path_str).expanduser() + if candidate.exists(): + start = candidate + elif candidate.parent.exists(): + start = candidate.parent + + PickerClass = getattr(fsp, "SelectDirectory", None) or getattr( + fsp, "FileOpen", None + ) + if PickerClass is None: + self.notify( + "No compatible picker found in textual-fspicker", severity="warning" + ) + return + try: + picker = PickerClass(location=start) + except Exception: + try: + picker = PickerClass(start) + except Exception: + self.notify("Could not initialize textual-fspicker", severity="warning") + return + + def _set_path(result) -> None: + if not result: + return + path_str = str(result) + if input_widget is None: + return + input_widget.value = path_str + + try: + self.app.push_screen(picker, _set_path) # type: ignore[arg-type] + except TypeError: + self._langflow_data_pick_callback = _set_path # type: ignore[attr-defined] + self.app.push_screen(picker) + def on_screen_dismissed(self, event) -> None: # type: ignore[override] try: # textual-fspicker screens should dismiss with a result; hand to callback if present @@ -684,6 +758,15 @@ def on_screen_dismissed(self, event) -> None: # type: ignore[override] delattr(self, "_opensearch_data_pick_callback") except Exception: pass + + # Handle Langflow data path picker callback + cb = getattr(self, "_langflow_data_pick_callback", None) + if cb is not None: + cb(getattr(event, "result", None)) + try: + delattr(self, "_langflow_data_pick_callback") + except Exception: + pass except Exception: pass diff --git a/tests/conftest.py b/tests/conftest.py index ffe80467e..eced102f5 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -51,6 +51,15 @@ async def onboard_system(): except Exception as e: print(f"[DEBUG] Could not clean OpenSearch data directory: {e}") + # Clean up Langflow data directory to ensure a fresh Langflow DB for tests + langflow_data_path = Path(os.getenv("LANGFLOW_DATA_PATH", "./langflow-data")) + if langflow_data_path.exists(): + try: + shutil.rmtree(langflow_data_path) + print(f"[DEBUG] Cleaned up Langflow data directory: {langflow_data_path}") + except Exception as e: + print(f"[DEBUG] Could not clean Langflow data directory: {e}") + # Initialize clients await clients.initialize() From 44830d3ab9b433032b8f904b9211a2ca9273b6f3 Mon Sep 17 00:00:00 2001 From: Mike Pawlowski Date: Thu, 19 Mar 2026 07:42:59 -0700 Subject: [PATCH 4/7] fix: persist Langflow database across container restarts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Issue - #1127 Summary - Fixed Langflow data directory persistence and bind-mount ownership across container restarts Container & Entrypoint - Added docker-entrypoint-langflow.sh that runs as root, chowns /app/langflow-data to uid=1000, then drops privileges and execs the main process — mirrors the pattern used by official database images - Updated Dockerfile.langflow to switch to USER root, copy the entrypoint script, set it as ENTRYPOINT, and simplified the RUN layer - Added # syntax=docker/dockerfile:1.4 directive to Dockerfile.langflow Repository & Git Ignore - Removed langflow-data/.gitkeep and replaced the selective .gitignore pattern (/langflow-data/* + !.gitkeep) with a blanket langflow-data/ ignore entry Makefile - Added ensure-langflow-data target that creates the langflow-data bind-mount directory on the host before Docker starts (prevents Docker from creating it as root) - Added ensure-langflow-data as a prerequisite to dev, dev-cpu, dev-local, dev-local-cpu, dev-local-build-lf, dev-local-build-lf-cpu, dev-branch, dev-branch-cpu, restart-dev, test-ci, and test-ci-local - Updated factory-reset to fully remove the langflow-data directory (rm -rf langflow-data) instead of only clearing its contents --- .gitignore | 5 ++--- Dockerfile.langflow | 17 ++++++++++++----- Makefile | 30 +++++++++++++++++------------- docker-entrypoint-langflow.sh | 13 +++++++++++++ langflow-data/.gitkeep | 0 5 files changed, 44 insertions(+), 21 deletions(-) create mode 100644 docker-entrypoint-langflow.sh delete mode 100644 langflow-data/.gitkeep diff --git a/.gitignore b/.gitignore index 02d024e00..8ee2c4059 100644 --- a/.gitignore +++ b/.gitignore @@ -32,9 +32,8 @@ wheels/ # OpenSearch data directory opensearch-data/ -# Langflow data directory (ignore contents, keep directory via .gitkeep) -/langflow-data/* -!/langflow-data/.gitkeep +# Langflow data directory +langflow-data/ node_modules diff --git a/Dockerfile.langflow b/Dockerfile.langflow index 73793121b..abb651c74 100644 --- a/Dockerfile.langflow +++ b/Dockerfile.langflow @@ -1,13 +1,20 @@ +# syntax=docker/dockerfile:1.4 FROM langflowai/langflow:1.8.0 +# Switch to root so the entrypoint can fix data directory ownership before dropping privileges. +# The base image runs as uid=1000; we restore that in the entrypoint script. +USER root + # (+) Install uv -# (+) Pre-create the Langflow data directory with correct ownership. -# - This ensures named Docker volumes are initialised with uid=1000 so -# the non-root container user can write to the mounted path. -RUN set -ex \ - && pip install uv \ +# (+) Pre-create the Langflow data directory. +# - For named Docker volumes, this seeds the volume with the correct path on first mount. +# - For bind mounts, the entrypoint chowns the directory at startup. +RUN pip install uv \ && mkdir -p /app/langflow-data +COPY --chmod=755 docker-entrypoint-langflow.sh /docker-entrypoint-langflow.sh + EXPOSE 7860 +ENTRYPOINT ["/docker-entrypoint-langflow.sh"] CMD ["langflow", "run", "--host", "0.0.0.0", "--port", "7860"] diff --git a/Makefile b/Makefile index 5fb3d948c..df179205c 100644 --- a/Makefile +++ b/Makefile @@ -79,7 +79,8 @@ endef test test-unit test-integration test-ci test-ci-local test-sdk test-os-jwt lint \ backend frontend docling docling-stop install-be install-fe build-be build-fe build-os build-lf logs-be logs-fe logs-lf logs-os \ shell-be shell-lf shell-os restart status health db-reset clear-os-data flow-upload setup factory-reset \ - dev-branch build-langflow-dev stop-dev clean-dev logs-dev logs-lf-dev shell-lf-dev restart-dev status-dev + dev-branch build-langflow-dev stop-dev clean-dev logs-dev logs-lf-dev shell-lf-dev restart-dev status-dev \ + ensure-langflow-data all: help @@ -319,7 +320,10 @@ help_utils: ## Show utility commands # DEVELOPMENT ENVIRONMENTS ###################### -dev: ## Start full stack with GPU support +ensure-langflow-data: ## Create the langflow-data directory if it does not exist + @mkdir -p langflow-data + +dev: ensure-langflow-data ## Start full stack with GPU support @echo "$(YELLOW)Starting OpenRAG with GPU support...$(NC)" $(COMPOSE_CMD) -f docker-compose.yml -f docker-compose.gpu.yml up -d @echo "$(PURPLE)Services started!$(NC)" @@ -329,7 +333,7 @@ dev: ## Start full stack with GPU support @echo " $(CYAN)OpenSearch:$(NC) http://localhost:9200" @echo " $(CYAN)Dashboards:$(NC) http://localhost:5601" -dev-cpu: ## Start full stack with CPU only +dev-cpu: ensure-langflow-data ## Start full stack with CPU only @echo "$(YELLOW)Starting OpenRAG with CPU only...$(NC)" $(COMPOSE_CMD) up -d @echo "$(PURPLE)Services started!$(NC)" @@ -339,7 +343,7 @@ dev-cpu: ## Start full stack with CPU only @echo " $(CYAN)OpenSearch:$(NC) http://localhost:9200" @echo " $(CYAN)Dashboards:$(NC) http://localhost:5601" -dev-local: ## Start infrastructure for local development +dev-local: ensure-langflow-data ## Start infrastructure for local development @echo "$(YELLOW)Starting infrastructure only (for local development)...$(NC)" $(COMPOSE_CMD) -f docker-compose.yml -f docker-compose.gpu.yml up -d opensearch openrag-backend dashboards langflow @echo "$(PURPLE)Infrastructure started!$(NC)" @@ -350,7 +354,7 @@ dev-local: ## Start infrastructure for local development @echo "" @echo "$(YELLOW)Now run 'make backend' and 'make frontend' in separate terminals$(NC)" -dev-local-cpu: ## Start infrastructure for local development, with CPU only +dev-local-cpu: ensure-langflow-data ## Start infrastructure for local development, with CPU only @echo "$(YELLOW)Starting infrastructure only (for local development)...$(NC)" $(COMPOSE_CMD) up -d opensearch openrag-backend dashboards langflow @echo "$(PURPLE)Infrastructure started!$(NC)" @@ -361,7 +365,7 @@ dev-local-cpu: ## Start infrastructure for local development, with CPU only @echo "" @echo "$(YELLOW)Now run 'make backend' and 'make frontend' in separate terminals$(NC)" -dev-local-build-lf: ## Start infrastructure for local development, building only Langflow image +dev-local-build-lf: ensure-langflow-data ## Start infrastructure for local development, building only Langflow image @echo "$(YELLOW)Building Langflow image...$(NC)" $(COMPOSE_CMD) -f docker-compose.yml -f docker-compose.gpu.yml build langflow @echo "$(YELLOW)Starting infrastructure only (for local development)...$(NC)" @@ -374,7 +378,7 @@ dev-local-build-lf: ## Start infrastructure for local development, building only @echo "" @echo "$(YELLOW)Now run 'make backend' and 'make frontend' in separate terminals$(NC)" -dev-local-build-lf-cpu: ## Start infrastructure for local development, building only Langflow image with CPU only +dev-local-build-lf-cpu: ensure-langflow-data ## Start infrastructure for local development, building only Langflow image with CPU only @echo "$(YELLOW)Building Langflow image (CPU)...$(NC)" $(COMPOSE_CMD) build langflow @echo "$(YELLOW)Starting infrastructure only (for local development)...$(NC)" @@ -393,7 +397,7 @@ dev-local-build-lf-cpu: ## Start infrastructure for local development, building # Usage: make dev-branch BRANCH=test-openai-responses # make dev-branch BRANCH=feature-x REPO=https://github.com/myorg/langflow.git -dev-branch: ## Build & run full stack with custom Langflow branch +dev-branch: ensure-langflow-data ## Build & run full stack with custom Langflow branch @echo "$(YELLOW)Building Langflow from branch: $(BRANCH)$(NC)" @echo " $(CYAN)Repository:$(NC) $(REPO)" @echo "" @@ -409,7 +413,7 @@ dev-branch: ## Build & run full stack with custom Langflow branch @echo " $(CYAN)OpenSearch:$(NC) http://localhost:9200" @echo " $(CYAN)Dashboards:$(NC) http://localhost:5601" -dev-branch-cpu: ## Build & run full stack with custom Langflow branch and CPU only mode +dev-branch-cpu: ensure-langflow-data ## Build & run full stack with custom Langflow branch and CPU only mode @echo "$(YELLOW)Building Langflow from branch: $(BRANCH)$(NC)" @echo " $(CYAN)Repository:$(NC) $(REPO)" @echo "" @@ -436,7 +440,7 @@ stop-dev: ## Stop dev environment containers $(COMPOSE_CMD) -f docker-compose.dev.yml down @echo "$(PURPLE)Dev environment stopped.$(NC)" -restart-dev: ## Restart dev environment +restart-dev: ensure-langflow-data ## Restart dev environment @echo "$(YELLOW)Restarting dev environment with branch: $(BRANCH)$(NC)" $(COMPOSE_CMD) -f docker-compose.dev.yml down GIT_BRANCH=$(BRANCH) GIT_REPO=$(REPO) $(COMPOSE_CMD) -f docker-compose.dev.yml up -d @@ -528,7 +532,7 @@ factory-reset: ## Complete reset (stop, remove volumes, clear data, remove image fi; \ if [ -d "langflow-data" ]; then \ echo "Removing langflow-data..."; \ - rm -rf langflow-data/* 2>/dev/null || true; \ + rm -rf langflow-data; \ echo "$(PURPLE)langflow-data removed$(NC)"; \ fi; \ if [ -d "config" ]; then \ @@ -677,7 +681,7 @@ test-integration: ## Run integration tests (requires infrastructure) @echo "$(YELLOW)Make sure to run 'make dev-local' first!$(NC)" uv run pytest tests/integration/core/ -v -test-ci: ## Start infra, run integration + SDK tests, tear down (uses DockerHub images) +test-ci: ensure-langflow-data ## Start infra, run integration + SDK tests, tear down (uses DockerHub images) @set -e; \ echo "$(YELLOW)Installing test dependencies...$(NC)"; \ uv sync --group dev; \ @@ -806,7 +810,7 @@ test-ci: ## Start infra, run integration + SDK tests, tear down (uses DockerHub $(COMPOSE_CMD) down -v 2>/dev/null || true; \ exit $$TEST_RESULT -test-ci-local: ## Same as test-ci but builds all images locally +test-ci-local: ensure-langflow-data ## Same as test-ci but builds all images locally @set -e; \ echo "$(YELLOW)Installing test dependencies...$(NC)"; \ uv sync --group dev; \ diff --git a/docker-entrypoint-langflow.sh b/docker-entrypoint-langflow.sh new file mode 100644 index 000000000..550ddcd53 --- /dev/null +++ b/docker-entrypoint-langflow.sh @@ -0,0 +1,13 @@ +#!/bin/sh +set -e + +# Fix ownership of the Langflow data directory so the container user (uid=1000) can write to it. +# When the directory is bind-mounted from a host with a different UID (e.g. CI runners at uid=1001), +# the container user cannot create files. Running chown here as root — before dropping privileges — +# mirrors the pattern used by official database images (OpenSearch, PostgreSQL, Redis). +chown -R 1000:1000 /app/langflow-data + +# Drop from root to uid=1000 and exec the main process. +# Python is used for privilege drop — it is guaranteed to be present in the Langflow image +# and requires no additional packages (unlike gosu or su-exec). +exec python3 -c 'import os, sys; os.setgid(1000); os.setuid(1000); os.execvp(sys.argv[1], sys.argv[1:])' "$@" diff --git a/langflow-data/.gitkeep b/langflow-data/.gitkeep deleted file mode 100644 index e69de29bb..000000000 From a1c1e0283c2b5441ab9721bbee416bb9c7595ff5 Mon Sep 17 00:00:00 2001 From: Mike Pawlowski Date: Thu, 19 Mar 2026 07:59:54 -0700 Subject: [PATCH 5/7] fix: persist Langflow database across container restarts Issue - #1127 Summary - Removed Langflow data directory cleanup from test setup Test Infrastructure - Removed the block in conftest.py that deleted the Langflow data directory (LANGFLOW_DATA_PATH) before tests ran, as this cleanup is no longer appropriate given that the Langflow database is now persisted across container restarts. --- tests/conftest.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index eced102f5..ffe80467e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -51,15 +51,6 @@ async def onboard_system(): except Exception as e: print(f"[DEBUG] Could not clean OpenSearch data directory: {e}") - # Clean up Langflow data directory to ensure a fresh Langflow DB for tests - langflow_data_path = Path(os.getenv("LANGFLOW_DATA_PATH", "./langflow-data")) - if langflow_data_path.exists(): - try: - shutil.rmtree(langflow_data_path) - print(f"[DEBUG] Cleaned up Langflow data directory: {langflow_data_path}") - except Exception as e: - print(f"[DEBUG] Could not clean Langflow data directory: {e}") - # Initialize clients await clients.initialize() From 8566c05539f22e19c66a9e73e278d3ebaf2ccf68 Mon Sep 17 00:00:00 2001 From: Mike Pawlowski Date: Thu, 19 Mar 2026 11:56:13 -0700 Subject: [PATCH 6/7] fix: persist Langflow database across container restarts Issue - #1127 Summary - Persisted Langflow database and flow configurations across container restarts by replacing the blind upsert startup mechanism with a create-only flow seeding approach. Docker / Container Changes - Removed the custom docker-entrypoint-langflow.sh entrypoint script, which ran chown as root before dropping to uid=1000; the base Langflow image already owns /app as uid=1000, so the privilege escalation was unnecessary. - Simplified Dockerfile.langflow by removing the USER root switch, entrypoint script copy, and the ENTRYPOINT override; the image now runs its default langflow run command directly. - Updated docker-compose.yml to resolve LANGFLOW_DATABASE_URL from the environment with a SQLite fallback (${LANGFLOW_DATABASE_URL:-sqlite:////app/langflow-data/langflow.db}), allowing operators to substitute a PostgreSQL URL without rebuilding the image. - Added LANGFLOW_DATABASE_URL to .env.example with documentation on overriding for production PostgreSQL deployments. Kubernetes / Helm Changes - Replaced the LANGFLOW_LOAD_FLOWS_PATH env var with LANGFLOW_CONFIG_DIR in the Langflow Helm deployment template to align with the new startup flow seeding approach. Backend: Flow Seeding & Reset Detection - Updated FlowsService.ensure_flows_exist() to return a set[str] of flow type names that were newly created during the current startup, rather than returning None. - Added handling for unexpected non-404 HTTP status codes when checking whether a flow exists; logs a warning and skips creation to avoid overwriting existing data. - Updated startup_tasks() in src/main.py to capture the set of newly created flows from ensure_flows_exist(). - Filtered out newly seeded flows from the check_flows_reset() result so that freshly created flows (which match their JSON definition by design) are not incorrectly flagged as having been externally reset. - Upgraded the log level for ensure_flows_exist() failures from warning to error to better surface critical startup failures. --- .env.example | 5 +++++ Dockerfile.langflow | 13 ++----------- docker-compose.yml | 2 +- docker-entrypoint-langflow.sh | 13 ------------- .../openrag/templates/langflow/deployment.yaml | 4 ++-- src/main.py | 12 ++++++++++-- src/services/flows_service.py | 15 ++++++++++++++- 7 files changed, 34 insertions(+), 30 deletions(-) delete mode 100644 docker-entrypoint-langflow.sh diff --git a/.env.example b/.env.example index a2a90fa59..2cb3df3fc 100644 --- a/.env.example +++ b/.env.example @@ -79,6 +79,11 @@ OPENSEARCH_DATA_PATH=./opensearch-data # Default: ./langflow-data LANGFLOW_DATA_PATH=./langflow-data +# Langflow database URL. Defaults to SQLite stored in LANGFLOW_DATA_PATH. +# Override with a PostgreSQL URL for production deployments, e.g.: +# LANGFLOW_DATABASE_URL=postgresql://user:pass@host:5432/langflow +LANGFLOW_DATABASE_URL=sqlite:////app/langflow-data/langflow.db + # OpenSearch Connection OPENSEARCH_HOST=opensearch OPENSEARCH_PORT=9200 diff --git a/Dockerfile.langflow b/Dockerfile.langflow index abb651c74..122c8e5ea 100644 --- a/Dockerfile.langflow +++ b/Dockerfile.langflow @@ -1,20 +1,11 @@ # syntax=docker/dockerfile:1.4 FROM langflowai/langflow:1.8.0 -# Switch to root so the entrypoint can fix data directory ownership before dropping privileges. -# The base image runs as uid=1000; we restore that in the entrypoint script. -USER root - -# (+) Install uv -# (+) Pre-create the Langflow data directory. -# - For named Docker volumes, this seeds the volume with the correct path on first mount. -# - For bind mounts, the entrypoint chowns the directory at startup. +# Install uv and pre-create the Langflow data directory. +# The base image already runs as uid=1000 and owns /app, so no root or chown needed. RUN pip install uv \ && mkdir -p /app/langflow-data -COPY --chmod=755 docker-entrypoint-langflow.sh /docker-entrypoint-langflow.sh - EXPOSE 7860 -ENTRYPOINT ["/docker-entrypoint-langflow.sh"] CMD ["langflow", "run", "--host", "0.0.0.0", "--port", "7860"] diff --git a/docker-compose.yml b/docker-compose.yml index 6a2a07f16..f0fd620a5 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -147,7 +147,7 @@ services: - WATSONX_PROJECT_ID=${WATSONX_PROJECT_ID} - OLLAMA_BASE_URL=${OLLAMA_ENDPOINT} - LANGFLOW_CONFIG_DIR=/app/langflow-data - - LANGFLOW_DATABASE_URL=sqlite:////app/langflow-data/langflow.db + - LANGFLOW_DATABASE_URL=${LANGFLOW_DATABASE_URL:-sqlite:////app/langflow-data/langflow.db} - LANGFLOW_SECRET_KEY=${LANGFLOW_SECRET_KEY} - JWT=None - OWNER=None diff --git a/docker-entrypoint-langflow.sh b/docker-entrypoint-langflow.sh deleted file mode 100644 index 550ddcd53..000000000 --- a/docker-entrypoint-langflow.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/bin/sh -set -e - -# Fix ownership of the Langflow data directory so the container user (uid=1000) can write to it. -# When the directory is bind-mounted from a host with a different UID (e.g. CI runners at uid=1001), -# the container user cannot create files. Running chown here as root — before dropping privileges — -# mirrors the pattern used by official database images (OpenSearch, PostgreSQL, Redis). -chown -R 1000:1000 /app/langflow-data - -# Drop from root to uid=1000 and exec the main process. -# Python is used for privilege drop — it is guaranteed to be present in the Langflow image -# and requires no additional packages (unlike gosu or su-exec). -exec python3 -c 'import os, sys; os.setgid(1000); os.setuid(1000); os.execvp(sys.argv[1], sys.argv[1:])' "$@" diff --git a/kubernetes/helm/openrag/templates/langflow/deployment.yaml b/kubernetes/helm/openrag/templates/langflow/deployment.yaml index aa57b8762..3c25bfa68 100644 --- a/kubernetes/helm/openrag/templates/langflow/deployment.yaml +++ b/kubernetes/helm/openrag/templates/langflow/deployment.yaml @@ -123,10 +123,10 @@ spec: env: # Langflow core settings - - name: LANGFLOW_LOAD_FLOWS_PATH - value: {{ .Values.langflow.persistence.mountPath }}/{{ .Values.langflow.persistence.flowsSubPath }} - name: LANGFLOW_DATABASE_URL value: "sqlite:///{{ .Values.langflow.persistence.mountPath }}/{{ .Values.langflow.persistence.dbSubPath }}" + - name: LANGFLOW_CONFIG_DIR + value: {{ .Values.langflow.persistence.mountPath }} - name: LANGFLOW_DEACTIVATE_TRACING value: {{ .Values.langflow.deactivateTracing | quote }} - name: LANGFLOW_LOG_LEVEL diff --git a/src/main.py b/src/main.py index 8eae9b293..8f59edf56 100644 --- a/src/main.py +++ b/src/main.py @@ -1203,11 +1203,16 @@ async def startup_tasks(services): # Ensure all configured flows exist in Langflow (create-only, never overwrites). # This replaces LANGFLOW_LOAD_FLOWS_PATH, which performed a blind upsert on # every container start and discarded any user edits made in the Langflow UI. + newly_created: set[str] = set() try: flows_service = services["flows_service"] - await flows_service.ensure_flows_exist() + newly_created = await flows_service.ensure_flows_exist() except Exception as e: - logger.warning("Failed to ensure Langflow flows exist at startup", error=str(e)) + logger.error( + "Failed to ensure Langflow flows exist at startup — " + "flows may be missing until the next restart", + error=str(e), + ) # Check if flows were reset and reapply settings if config is edited try: @@ -1216,6 +1221,9 @@ async def startup_tasks(services): logger.info("Checking if Langflow flows were reset") flows_service = services["flows_service"] reset_flows = await flows_service.check_flows_reset() + # Exclude flows that were just seeded — they match the JSON by design, + # not because they were externally reset. + reset_flows = [f for f in reset_flows if f not in newly_created] if reset_flows: logger.info( diff --git a/src/services/flows_service.py b/src/services/flows_service.py index 137af1080..f06d7b948 100644 --- a/src/services/flows_service.py +++ b/src/services/flows_service.py @@ -799,7 +799,7 @@ async def _compare_flow_with_file(self, flow_id: str): logger.error(f"Error comparing flow {flow_id} with file: {str(e)}") return False - async def ensure_flows_exist(self): + async def ensure_flows_exist(self) -> set[str]: """ Ensure all configured flows exist in Langflow. @@ -810,6 +810,8 @@ async def ensure_flows_exist(self): This replaces the LANGFLOW_LOAD_FLOWS_PATH mechanism, which performed a blind upsert on every container start and discarded user edits. + + Returns the set of flow type names that were actually created. """ flow_configs = [ ("nudges", NUDGES_FLOW_ID), @@ -817,6 +819,7 @@ async def ensure_flows_exist(self): ("ingest", LANGFLOW_INGEST_FLOW_ID), ("url_ingest", LANGFLOW_URL_INGEST_FLOW_ID), ] + created_flow_types: set[str] = set() for flow_type, flow_id in flow_configs: if not flow_id: @@ -832,6 +835,13 @@ async def ensure_flows_exist(self): ) continue + if response.status_code != 404: + logger.warning( + f"Unexpected status checking {flow_type} flow (ID: {flow_id}): " + f"HTTP {response.status_code} — skipping creation to avoid overwriting existing data" + ) + continue + flow_path = self._find_flow_file_by_id(flow_id) if not flow_path: logger.warning( @@ -849,6 +859,7 @@ async def ensure_flows_exist(self): logger.info( f"Created {flow_type} flow (ID: {flow_id}) from {os.path.basename(flow_path)}" ) + created_flow_types.add(flow_type) else: logger.warning( f"Failed to create {flow_type} flow (ID: {flow_id}): " @@ -860,6 +871,8 @@ async def ensure_flows_exist(self): f"Error ensuring {flow_type} flow (ID: {flow_id}) exists: {e}" ) + return created_flow_types + async def check_flows_reset(self): """ Check if any flows have been reset by comparing with JSON files. From 7a06eb5817f147241bc43af0e885b0b6e69906d2 Mon Sep 17 00:00:00 2001 From: Mike Pawlowski Date: Thu, 19 Mar 2026 18:50:06 -0700 Subject: [PATCH 7/7] fix: persist Langflow database across container restarts Issue - #1127 Summary - Fixed Langflow data directory permissions for CI and E2E test environments CI Test Pipeline (Makefile) - Added chmod 777 langflow-data before the test run in both test-ci and test-ci-local targets to ensure the langflow-data directory is world-writable prior to container startup E2E Test Setup (scripts/setup-e2e.sh) - Added pre-creation of the langflow-data directory with world-writable permissions (777) before infrastructure starts, ensuring the Langflow container (UID 1000) and the CI runner (UID 1001) can both access it regardless of Docker's :U flag behavior --- Makefile | 2 ++ scripts/setup-e2e.sh | 5 +++++ 2 files changed, 7 insertions(+) diff --git a/Makefile b/Makefile index df179205c..3a24ebdbb 100644 --- a/Makefile +++ b/Makefile @@ -682,6 +682,7 @@ test-integration: ## Run integration tests (requires infrastructure) uv run pytest tests/integration/core/ -v test-ci: ensure-langflow-data ## Start infra, run integration + SDK tests, tear down (uses DockerHub images) + @chmod 777 langflow-data @set -e; \ echo "$(YELLOW)Installing test dependencies...$(NC)"; \ uv sync --group dev; \ @@ -811,6 +812,7 @@ test-ci: ensure-langflow-data ## Start infra, run integration + SDK tests, tear exit $$TEST_RESULT test-ci-local: ensure-langflow-data ## Same as test-ci but builds all images locally + @chmod 777 langflow-data @set -e; \ echo "$(YELLOW)Installing test dependencies...$(NC)"; \ uv sync --group dev; \ diff --git a/scripts/setup-e2e.sh b/scripts/setup-e2e.sh index 92e182588..9cb62ef8c 100755 --- a/scripts/setup-e2e.sh +++ b/scripts/setup-e2e.sh @@ -49,6 +49,11 @@ echo "Starting E2E Setup using $E2E_ENV..." echo "Cleaning up..." make factory-reset FORCE=true ENV_FILE=$E2E_ENV +# Pre-create langflow-data as world-writable so the Langflow container (UID 1000) +# and the runner (UID 1001) can both access it, regardless of Docker's :U flag behavior. +mkdir -p langflow-data +chmod 777 langflow-data + # Start infrastructure using make (this will use the new .env) echo "Starting infrastructure..." make dev-local-cpu ENV_FILE=$E2E_ENV