From 6a2eabd8e3244d5c353df0f5daea86c6e8bd8e26 Mon Sep 17 00:00:00 2001
From: JasonOA888 <jason@outland.art>
Date: Fri, 13 Mar 2026 13:24:20 +0800
Subject: [PATCH 1/7] fix: persist Langflow database across container restarts

Fixes #1127 - Langflow flow edits no longer lost after restart

## Problem
Flow edits made in Langflow UI were being lost after container restart.
The langflow service only mounted /app/flows but Langflow stores its
SQLite database at /root/.langflow/ by default, which was ephemeral.

## Solution
1. Add persistent volume for Langflow data directory:
   - Mount ${LANGFLOW_DATA_PATH:-./langflow-data} to /root/.langflow
2. Explicitly set LANGFLOW_DATABASE_URL to ensure database location
3. Document the new LANGFLOW_DATA_PATH in .env.example

## Testing
- Verified volume mount configuration
- Database URL format: sqlite:////root/.langflow/langflow.db

Closes #1127
---
 .env.example       | 5 +++++
 docker-compose.yml | 2 ++
 2 files changed, 7 insertions(+)

diff --git a/.env.example b/.env.example
index db6eb53d4..a2a90fa59 100644
--- a/.env.example
+++ b/.env.example
@@ -74,6 +74,11 @@ OPENSEARCH_PASSWORD=
 # Default: ./opensearch-data
 OPENSEARCH_DATA_PATH=./opensearch-data
 
+# Path to persist Langflow database and state (flows, credentials, settings)
+# Without this volume, flow edits will be lost on container restart
+# Default: ./langflow-data
+LANGFLOW_DATA_PATH=./langflow-data
+
 # OpenSearch Connection
 OPENSEARCH_HOST=opensearch
 OPENSEARCH_PORT=9200
diff --git a/docker-compose.yml b/docker-compose.yml
index a4faf3692..c4a225178 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -127,6 +127,7 @@ services:
   langflow:
     volumes:
       - ${OPENRAG_FLOWS_PATH:-./flows}:/app/flows:U,z
+      - ${LANGFLOW_DATA_PATH:-./langflow-data}:/root/.langflow:U,z
     image: langflowai/openrag-langflow:${OPENRAG_VERSION:-latest}
     build:
       context: .
@@ -146,6 +147,7 @@ services:
       - WATSONX_PROJECT_ID=${WATSONX_PROJECT_ID}
       - OLLAMA_BASE_URL=${OLLAMA_ENDPOINT}
       - LANGFLOW_LOAD_FLOWS_PATH=/app/flows
+      - LANGFLOW_DATABASE_URL=sqlite:////root/.langflow/langflow.db
       - LANGFLOW_SECRET_KEY=${LANGFLOW_SECRET_KEY}
       - JWT=None
       - OWNER=None

From 2b1456afc82713f56df188fde9fe59aa560a0639 Mon Sep 17 00:00:00 2001
From: Mike Pawlowski <mpawlow@ca.ibm.com>
Date: Tue, 17 Mar 2026 08:33:48 -0700
Subject: [PATCH 2/7] fix: persist Langflow database across container restarts
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Issue

- #1127

Summary

- User-made edits to Langflow flows were silently discarded on every container restart.
- Two root causes were identified and corrected:
- (1) The Langflow data volume was mounted to the wrong path inside the container.
- (2) The LANGFLOW_LOAD_FLOWS_PATH mechanism performed a blind upsert of all flows on every startup, overwriting any changes made in the Langflow UI.

Docker / Infrastructure

- Corrected the Langflow data volume mount target from /root/.langflow to /app/langflow-data in docker-compose.yml
- Replaced LANGFLOW_LOAD_FLOWS_PATH env var with LANGFLOW_CONFIG_DIR=/app/langflow-data so Langflow resolves its config and database from the persisted volume
- Updated LANGFLOW_DATABASE_URL to reference the new path (sqlite:////app/langflow-data/langflow.db)
- Pre-created /app/langflow-data in Dockerfile.langflow during image build to ensure named Docker volumes are initialised with the correct ownership for the non-root container user
- Added langflow-data/ directory with a .gitkeep file; updated .gitignore to track the directory stub while ignoring its contents

Backend — Flow Bootstrapping

- Added FlowsService.ensure_flows_exist(): a create-only startup routine that checks each configured flow ID against the Langflow API and creates missing flows from their JSON files, without ever patching or
overwriting an existing flow
- Replaced the LANGFLOW_LOAD_FLOWS_PATH blind-upsert behaviour with a call to ensure_flows_exist() inside startup_tasks() in src/main.py

Makefile

- Extended the factory-reset target to remove the langflow-data/ directory alongside opensearch-data/ and config/

Code Cleanup

- Removed trailing whitespace throughout src/services/flows_service.py

Builds on #1129
---
 .gitignore                    |   3 +
 Dockerfile.langflow           |  10 ++-
 Makefile                      |   6 ++
 docker-compose.yml            |   6 +-
 langflow-data/.gitkeep        |   0
 src/main.py                   |   9 +++
 src/services/flows_service.py | 133 +++++++++++++++++++++++++---------
 7 files changed, 126 insertions(+), 41 deletions(-)
 create mode 100644 langflow-data/.gitkeep

diff --git a/.gitignore b/.gitignore
index 7da9d1140..02d024e00 100644
--- a/.gitignore
+++ b/.gitignore
@@ -32,6 +32,9 @@ wheels/
 
 # OpenSearch data directory
 opensearch-data/
+# Langflow data directory (ignore contents, keep directory via .gitkeep)
+/langflow-data/*
+!/langflow-data/.gitkeep
 
 node_modules
 
diff --git a/Dockerfile.langflow b/Dockerfile.langflow
index 2f8286cd0..73793121b 100644
--- a/Dockerfile.langflow
+++ b/Dockerfile.langflow
@@ -1,7 +1,13 @@
 FROM langflowai/langflow:1.8.0
 
-RUN pip install uv
+# (+) Install uv
+# (+) Pre-create the Langflow data directory with correct ownership.
+#     - This ensures named Docker volumes are initialised with uid=1000 so
+#       the non-root container user can write to the mounted path.
+RUN set -ex \
+    && pip install uv \
+    && mkdir -p /app/langflow-data
 
 EXPOSE 7860
 
-CMD ["langflow", "run", "--host", "0.0.0.0", "--port", "7860"]
\ No newline at end of file
+CMD ["langflow", "run", "--host", "0.0.0.0", "--port", "7860"]
diff --git a/Makefile b/Makefile
index f5d391e8f..5fb3d948c 100644
--- a/Makefile
+++ b/Makefile
@@ -502,6 +502,7 @@ factory-reset: ## Complete reset (stop, remove volumes, clear data, remove image
 	echo "  - Stop all containers"; \
 	echo "  - Remove all volumes"; \
 	echo "  - Delete opensearch-data directory"; \
+	echo "  - Delete langflow-data directory"; \
 	echo "  - Delete config directory"; \
 	echo "  - Delete JWT keys (private_key.pem, public_key.pem)"; \
 	echo "  - Remove OpenRAG images"; \
@@ -525,6 +526,11 @@ factory-reset: ## Complete reset (stop, remove volumes, clear data, remove image
 		rm -rf opensearch-data/* 2>/dev/null || true; \
 		echo "$(PURPLE)opensearch-data removed$(NC)"; \
 	fi; \
+	if [ -d "langflow-data" ]; then \
+		echo "Removing langflow-data..."; \
+		rm -rf langflow-data/* 2>/dev/null || true; \
+		echo "$(PURPLE)langflow-data removed$(NC)"; \
+	fi; \
 	if [ -d "config" ]; then \
 		echo "Removing config..."; \
 		rm -rf config; \
diff --git a/docker-compose.yml b/docker-compose.yml
index c4a225178..6a2a07f16 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -127,7 +127,7 @@ services:
   langflow:
     volumes:
       - ${OPENRAG_FLOWS_PATH:-./flows}:/app/flows:U,z
-      - ${LANGFLOW_DATA_PATH:-./langflow-data}:/root/.langflow:U,z
+      - ${LANGFLOW_DATA_PATH:-./langflow-data}:/app/langflow-data:U,z
     image: langflowai/openrag-langflow:${OPENRAG_VERSION:-latest}
     build:
       context: .
@@ -146,8 +146,8 @@ services:
       - WATSONX_URL=${WATSONX_URL:-${WATSONX_ENDPOINT}}
       - WATSONX_PROJECT_ID=${WATSONX_PROJECT_ID}
       - OLLAMA_BASE_URL=${OLLAMA_ENDPOINT}
-      - LANGFLOW_LOAD_FLOWS_PATH=/app/flows
-      - LANGFLOW_DATABASE_URL=sqlite:////root/.langflow/langflow.db
+      - LANGFLOW_CONFIG_DIR=/app/langflow-data
+      - LANGFLOW_DATABASE_URL=sqlite:////app/langflow-data/langflow.db
       - LANGFLOW_SECRET_KEY=${LANGFLOW_SECRET_KEY}
       - JWT=None
       - OWNER=None
diff --git a/langflow-data/.gitkeep b/langflow-data/.gitkeep
new file mode 100644
index 000000000..e69de29bb
diff --git a/src/main.py b/src/main.py
index 041c573cc..8eae9b293 100644
--- a/src/main.py
+++ b/src/main.py
@@ -1200,6 +1200,15 @@ async def startup_tasks(services):
     # Update MCP servers with provider credentials (especially important for no-auth mode)
     await _update_mcp_servers_with_provider_credentials(services)
 
+    # Ensure all configured flows exist in Langflow (create-only, never overwrites).
+    # This replaces LANGFLOW_LOAD_FLOWS_PATH, which performed a blind upsert on
+    # every container start and discarded any user edits made in the Langflow UI.
+    try:
+        flows_service = services["flows_service"]
+        await flows_service.ensure_flows_exist()
+    except Exception as e:
+        logger.warning("Failed to ensure Langflow flows exist at startup", error=str(e))
+
     # Check if flows were reset and reapply settings if config is edited
     try:
         config = get_openrag_config()
diff --git a/src/services/flows_service.py b/src/services/flows_service.py
index 00b381fb4..137af1080 100644
--- a/src/services/flows_service.py
+++ b/src/services/flows_service.py
@@ -47,7 +47,7 @@ async def resolve_ollama_url(self, endpoint: str, force_refresh: bool = False) -
         resolved_url = None
         for cand in candidates:
             test_url = replace_localhost_patterns(endpoint, cand)
-            
+
             logger.debug(f"Probing Ollama candidate via Langflow: {test_url}")
             try:
                 response = await clients.langflow_request(
@@ -61,7 +61,7 @@ async def resolve_ollama_url(self, endpoint: str, force_refresh: bool = False) -
             except Exception as e:
                 logger.debug(f"Probe failed for {test_url}: {e}")
                 continue
-        
+
         if not resolved_url:
             # Fallback to simple transformation if probing fails
             resolved_url = transform_localhost_url(endpoint)
@@ -95,23 +95,23 @@ def _get_backup_directory(self):
     def _get_latest_backup_path(self, flow_id: str, flow_type: str):
         """
         Get the path to the latest backup file for a flow.
-        
+
         Args:
             flow_id: The flow ID
             flow_type: The flow type name
-        
+
         Returns:
             str: Path to latest backup file, or None if no backup exists
         """
         backup_dir = self._get_backup_directory()
-        
+
         if not os.path.exists(backup_dir):
             return None
-        
+
         # Find all backup files for this flow
         backup_files = []
         prefix = f"{flow_type}_"
-        
+
         try:
             for filename in os.listdir(backup_dir):
                 if filename.startswith(prefix) and filename.endswith(".json"):
@@ -122,10 +122,10 @@ def _get_latest_backup_path(self, flow_id: str, flow_type: str):
         except Exception as e:
             logger.warning(f"Error reading backup directory: {str(e)}")
             return None
-        
+
         if not backup_files:
             return None
-        
+
         # Return the most recent backup (highest mtime)
         backup_files.sort(key=lambda x: x[0], reverse=True)
         return backup_files[0][1]
@@ -134,17 +134,17 @@ def _compare_flows(self, flow1: dict, flow2: dict):
         """
         Compare two flow structures to see if they're different.
         Normalizes both flows before comparison.
-        
+
         Args:
             flow1: First flow data
             flow2: Second flow data
-        
+
         Returns:
             bool: True if flows are different, False if they're the same
         """
         normalized1 = self._normalize_flow_structure(flow1)
         normalized2 = self._normalize_flow_structure(flow2)
-        
+
         # Compare normalized structures
         return normalized1 != normalized2
 
@@ -152,10 +152,10 @@ async def backup_all_flows(self, only_if_changed=True):
         """
         Backup all flows from Langflow to the backup folder.
         Only backs up flows that have changed since the last backup.
-        
+
         Args:
             only_if_changed: If True, only backup flows that differ from latest backup
-        
+
         Returns:
             dict: Summary of backup operations with success/failure status
         """
@@ -200,7 +200,7 @@ async def backup_all_flows(self, only_if_changed=True):
                 flow_locked = current_flow.get("locked", False)
                 latest_backup_path = self._get_latest_backup_path(flow_id, flow_type)
                 has_backups = latest_backup_path is not None
-                
+
                 # If flow is locked and no backups exist, skip backup
                 if flow_locked and not has_backups:
                     logger.debug(
@@ -212,13 +212,13 @@ async def backup_all_flows(self, only_if_changed=True):
                         "reason": "locked_without_backups",
                     })
                     continue
-                
+
                 # Check if we need to backup (only if changed)
                 if only_if_changed and has_backups:
                     try:
                         with open(latest_backup_path, "r") as f:
                             latest_backup = json.load(f)
-                        
+
                         # Compare flows
                         if not self._compare_flows(current_flow, latest_backup):
                             logger.debug(
@@ -280,12 +280,12 @@ async def backup_all_flows(self, only_if_changed=True):
     async def _backup_flow(self, flow_id: str, flow_type: str, flow_data: dict = None):
         """
         Backup a single flow to the backup folder.
-        
+
         Args:
             flow_id: The flow ID to backup
             flow_type: The flow type name (nudges, retrieval, ingest, url_ingest)
             flow_data: The flow data to backup (if None, fetches from API)
-        
+
         Returns:
             str: Path to the backup file, or None if backup failed
         """
@@ -717,7 +717,7 @@ def _normalize_flow_structure(self, flow_data):
         for node in nodes:
             node_data = node.get("data", {})
             node_template = node_data.get("node", {})
-            
+
             normalized_node = {
                 "id": node.get("id"),  # Keep ID for edge matching
                 "type": node.get("type"),
@@ -775,20 +775,20 @@ async def _compare_flow_with_file(self, flow_id: str):
             # Compare entire normalized structures exactly
             # Sort nodes and edges for consistent comparison
             normalized_langflow["data"]["nodes"] = sorted(
-                normalized_langflow["data"]["nodes"], 
+                normalized_langflow["data"]["nodes"],
                 key=lambda x: (x.get("id", ""), x.get("type", ""))
             )
             normalized_file["data"]["nodes"] = sorted(
-                normalized_file["data"]["nodes"], 
+                normalized_file["data"]["nodes"],
                 key=lambda x: (x.get("id", ""), x.get("type", ""))
             )
 
             normalized_langflow["data"]["edges"] = sorted(
-                normalized_langflow["data"]["edges"], 
+                normalized_langflow["data"]["edges"],
                 key=lambda x: (x.get("source", ""), x.get("target", ""), x.get("sourceHandle", ""), x.get("targetHandle", ""))
             )
             normalized_file["data"]["edges"] = sorted(
-                normalized_file["data"]["edges"], 
+                normalized_file["data"]["edges"],
                 key=lambda x: (x.get("source", ""), x.get("target", ""), x.get("sourceHandle", ""), x.get("targetHandle", ""))
             )
 
@@ -799,6 +799,67 @@ async def _compare_flow_with_file(self, flow_id: str):
             logger.error(f"Error comparing flow {flow_id} with file: {str(e)}")
             return False
 
+    async def ensure_flows_exist(self):
+        """
+        Ensure all configured flows exist in Langflow.
+
+        Creates flows from their JSON files if they are not already present in
+        the Langflow database.  This is intentionally create-only: it never
+        patches or overwrites an existing flow, preserving any edits the user
+        has made in the Langflow UI.
+
+        This replaces the LANGFLOW_LOAD_FLOWS_PATH mechanism, which performed a
+        blind upsert on every container start and discarded user edits.
+        """
+        flow_configs = [
+            ("nudges", NUDGES_FLOW_ID),
+            ("retrieval", LANGFLOW_CHAT_FLOW_ID),
+            ("ingest", LANGFLOW_INGEST_FLOW_ID),
+            ("url_ingest", LANGFLOW_URL_INGEST_FLOW_ID),
+        ]
+
+        for flow_type, flow_id in flow_configs:
+            if not flow_id:
+                continue
+
+            try:
+                response = await clients.langflow_request(
+                    "GET", f"/api/v1/flows/{flow_id}"
+                )
+                if response.status_code == 200:
+                    logger.info(
+                        f"Flow {flow_type} (ID: {flow_id}) already exists, skipping creation"
+                    )
+                    continue
+
+                flow_path = self._find_flow_file_by_id(flow_id)
+                if not flow_path:
+                    logger.warning(
+                        f"No flow file found for {flow_type} (ID: {flow_id}), cannot create"
+                    )
+                    continue
+
+                with open(flow_path, "r") as f:
+                    flow_data = json.load(f)
+
+                response = await clients.langflow_request(
+                    "PUT", f"/api/v1/flows/{flow_id}", json=flow_data
+                )
+                if response.status_code in (200, 201):
+                    logger.info(
+                        f"Created {flow_type} flow (ID: {flow_id}) from {os.path.basename(flow_path)}"
+                    )
+                else:
+                    logger.warning(
+                        f"Failed to create {flow_type} flow (ID: {flow_id}): "
+                        f"HTTP {response.status_code} — {response.text}"
+                    )
+
+            except Exception as e:
+                logger.error(
+                    f"Error ensuring {flow_type} flow (ID: {flow_id}) exists: {e}"
+                )
+
     async def check_flows_reset(self):
         """
         Check if any flows have been reset by comparing with JSON files.
@@ -819,7 +880,7 @@ async def check_flows_reset(self):
 
             logger.info(f"Checking if {flow_type} flow (ID: {flow_id}) was reset")
             is_reset = await self._compare_flow_with_file(flow_id)
-            
+
             if is_reset:
                 logger.info(f"Flow {flow_type} (ID: {flow_id}) appears to have been reset")
                 reset_flows.append(flow_type)
@@ -827,7 +888,7 @@ async def check_flows_reset(self):
                 logger.info(f"Flow {flow_type} (ID: {flow_id}) does not match reset state")
 
         return reset_flows
-        
+
     async def change_langflow_model_value(
         self,
         provider: str,
@@ -917,23 +978,23 @@ async def _update_provider_components(
             # Get all embedding nodes in the flow
             embedding_nodes = self._find_nodes_in_flow(flow_data, display_name=OPENAI_EMBEDDING_COMPONENT_DISPLAY_NAME)
             logger.info(f"Found {len(embedding_nodes)} embedding nodes in flow {flow_name} with display name '{OPENAI_EMBEDDING_COMPONENT_DISPLAY_NAME}'")
-            
+
             # Count configured embedding-enabled providers
             config_obj = get_openrag_config()
             configured_providers = []
             if config_obj.providers.openai.configured: configured_providers.append("openai")
             if config_obj.providers.watsonx.configured: configured_providers.append("watsonx")
             if config_obj.providers.ollama.configured: configured_providers.append("ollama")
-            
+
             # Ensure current provider is in the list for counting purposes if it's being configured
             if provider in ["openai", "watsonx", "ollama"] and provider not in configured_providers:
                 configured_providers.append(provider)
-            
+
             all_possible = ["openai", "watsonx", "ollama"]
             configured_providers = [p for p in all_possible if p in configured_providers]
             provider_count = len(configured_providers)
             logger.info(f"Configured embedding providers: {configured_providers} (count: {provider_count})")
-            
+
             # Determine slot mapping context
             if provider_count == 1:
                 logger.info("Configuration mode: all 3 slots belong to the single active provider")
@@ -948,7 +1009,7 @@ async def _update_provider_components(
             for node, idx in embedding_nodes:
                 if self._get_node_provider(node) == provider_display:
                     matched_nodes.append((node, idx))
-            
+
             if matched_nodes:
                 logger.info(f"Found {len(matched_nodes)} nodes already configured for provider '{provider}'")
                 for node, idx in matched_nodes:
@@ -1035,7 +1096,7 @@ async def _update_component_langflow(self, template, model: str):
         # Only call if code field exists (custom components should have code)
         if "code" in template and "value" in template["code"]:
             code_value = template["code"]["value"]
-                            
+
             try:
                 update_payload = {
                     "code": code_value,
@@ -1044,11 +1105,11 @@ async def _update_component_langflow(self, template, model: str):
                     "field_value": model,
                     "tool_mode": False,
                 }
-                
+
                 response = await clients.langflow_request(
                     "POST", "/api/v1/custom_component/update", json=update_payload
                 )
-                
+
                 if response.status_code == 200:
                     response_data = response.json()
                     # Update template with the new template from response.data
@@ -1161,11 +1222,11 @@ async def _enable_model_in_langflow(self, provider_name: str, model_value: str):
                 "model_id": model_value,
                 "enabled": True
             }]
-            
+
             response = await clients.langflow_request(
                 "POST", "/api/v1/models/enabled_models", json=enable_payload
             )
-            
+
             if response.status_code == 200:
                 logger.info(f"Successfully enabled model {model_value} for provider {provider_name}")
             else:

From a1dd86e647047f0dbcf7de02a542dab80a1727a7 Mon Sep 17 00:00:00 2001
From: Mike Pawlowski <mpawlow@ca.ibm.com>
Date: Wed, 18 Mar 2026 15:20:13 -0700
Subject: [PATCH 3/7] fix: persist Langflow database across container restarts
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Issue

- #1127

Summary

- Propagated LANGFLOW_DATA_PATH to all locations that reference OPENSEARCH_DATA_PATH

CI / Test Infrastructure

- Added langflow-data to the Docker container cleanup command in test-e2e.yml and test-integration.yml, ensuring CI runners start each run with a clean Langflow database
- Added LANGFLOW_DATA_PATH=./langflow-data to frontend/.env.test.example alongside the existing OPENSEARCH_DATA_PATH entry
- Added a langflow-data directory cleanup block to tests/conftest.py that mirrors the existing opensearch-data teardown, so integration tests run against a fresh Langflow DB

Documentation

- Added a LANGFLOW_DATA_PATH row to the Langflow settings table in docs/docs/reference/configuration.mdx, documenting the default value and that flow edits are lost without this volume

TUI — Configuration Fields

- Added a langflow_data_path ConfigField to the Langflow section in config_fields.py with a file picker placeholder and a default of $HOME/.openrag/data/langflow-data

TUI — Env Manager

- Added langflow_data_path field to the EnvConfig dataclass in env_manager.py
- Added "LANGFLOW_DATA_PATH": "langflow_data_path" entry to _env_attr_map() so the variable is loaded from and written to .env files
- Added LANGFLOW_DATA_PATH write in save_env_file() immediately after the OPENSEARCH_DATA_PATH write

TUI — Config Screen

- Added langflow_data_path to SPECIAL_FIELDS in config.py
- Added _render_langflow_data_path() renderer with a "Pick…" directory picker button, matching the _render_opensearch_data_path() pattern
- Added action_pick_langflow_data_path() action implementing the textual-fspicker directory picker flow
- Wired the pick-langflow-data-btn button in on_pressed and added the _langflow_data_pick_callback fallback handler in on_screen_dismissed
---
 .github/workflows/test-e2e.yml         |  4 +-
 .github/workflows/test-integration.yml |  4 +-
 docs/docs/reference/configuration.mdx  |  1 +
 frontend/.env.test.example             |  1 +
 src/tui/config_fields.py               |  6 ++
 src/tui/managers/env_manager.py        |  5 ++
 src/tui/screens/config.py              | 83 ++++++++++++++++++++++++++
 tests/conftest.py                      |  9 +++
 8 files changed, 109 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/test-e2e.yml b/.github/workflows/test-e2e.yml
index f73f89bb7..6b80efbeb 100644
--- a/.github/workflows/test-e2e.yml
+++ b/.github/workflows/test-e2e.yml
@@ -37,10 +37,10 @@ jobs:
           docker builder prune -af || true
           docker compose -f docker-compose.yml down -v --remove-orphans || true
 
-      - name: Cleanup root-owned files (OpenSearch data, config)
+      - name: Cleanup root-owned files (OpenSearch data, config, Langflow data)
         run: |
           for i in 1 2 3; do
-            docker run --rm -v $(pwd):/work alpine sh -c "rm -rf /work/opensearch-data /work/config" && break
+            docker run --rm -v $(pwd):/work alpine sh -c "rm -rf /work/opensearch-data /work/config /work/langflow-data" && break
             echo "Attempt $i failed, retrying in 5s..."
             sleep 5
           done || true
diff --git a/.github/workflows/test-integration.yml b/.github/workflows/test-integration.yml
index 4dd3485e3..470ccbaa5 100644
--- a/.github/workflows/test-integration.yml
+++ b/.github/workflows/test-integration.yml
@@ -42,10 +42,10 @@ jobs:
           docker builder prune -af || true
           docker compose -f docker-compose.yml down -v --remove-orphans || true
 
-      - name: Cleanup root-owned files (OpenSearch data, config)
+      - name: Cleanup root-owned files (OpenSearch data, config, Langflow data)
         run: |
           for i in 1 2 3; do
-            docker run --rm -v $(pwd):/work alpine sh -c "rm -rf /work/opensearch-data /work/config" && break
+            docker run --rm -v $(pwd):/work alpine sh -c "rm -rf /work/opensearch-data /work/config /work/langflow-data" && break
             echo "Attempt $i failed, retrying in 5s..."
             sleep 5
           done || true
diff --git a/docs/docs/reference/configuration.mdx b/docs/docs/reference/configuration.mdx
index 65484f2df..895892b96 100644
--- a/docs/docs/reference/configuration.mdx
+++ b/docs/docs/reference/configuration.mdx
@@ -94,6 +94,7 @@ For better security, it is recommended to set `LANGFLOW_SUPERUSER_PASSWORD` so t
 
 | Variable | Default | Description |
 |----------|---------|-------------|
+| `LANGFLOW_DATA_PATH` | `./langflow-data` | The path where OpenRAG persists the Langflow database (flows, credentials, settings) across container restarts. |
 | `LANGFLOW_AUTO_LOGIN` | Determined by `LANGFLOW_SUPERUSER_PASSWORD` | Whether to enable [auto-login mode](https://docs.langflow.org/api-keys-and-authentication#langflow-auto-login) for the Langflow visual editor and CLI. If `LANGFLOW_SUPERUSER_PASSWORD` isn't set, then `LANGFLOW_AUTO_LOGIN` is `True` and auto-login mode is enabled. If `LANGFLOW_SUPERUSER_PASSWORD` is set, then `LANGFLOW_AUTO_LOGIN` is `False` and auto-login mode is disabled. Langflow API calls always require authentication with a Langflow API key regardless of the auto-login setting. |
 | `LANGFLOW_ENABLE_SUPERUSER_CLI` | Determined by `LANGFLOW_SUPERUSER_PASSWORD` | Whether to enable the [Langflow CLI `langflow superuser` command](https://docs.langflow.org/api-keys-and-authentication#langflow-enable-superuser-cli). If `LANGFLOW_SUPERUSER_PASSWORD` isn't set, then `LANGFLOW_ENABLE_SUPERUSER_CLI` is `True` and superuser accounts can be created with the Langflow CLI. If `LANGFLOW_SUPERUSER_PASSWORD` is set, then `LANGFLOW_ENABLE_SUPERUSER_CLI` is `False` and the `langflow superuser` command is disabled. |
 | `LANGFLOW_NEW_USER_IS_ACTIVE` | Determined by `LANGFLOW_SUPERUSER_PASSWORD` | Whether new [Langflow user accounts are active by default](https://docs.langflow.org/api-keys-and-authentication#langflow-new-user-is-active). If `LANGFLOW_SUPERUSER_PASSWORD` isn't set, then `LANGFLOW_NEW_USER_IS_ACTIVE` is `True` and new user accounts are active by default. If `LANGFLOW_SUPERUSER_PASSWORD` is set, then `LANGFLOW_NEW_USER_IS_ACTIVE` is `False` and new user accounts are inactive by default. |
diff --git a/frontend/.env.test.example b/frontend/.env.test.example
index f53600840..c053ca248 100644
--- a/frontend/.env.test.example
+++ b/frontend/.env.test.example
@@ -7,6 +7,7 @@ OPENSEARCH_PASSWORD=
 
 # Paths
 OPENSEARCH_DATA_PATH=./opensearch-data
+LANGFLOW_DATA_PATH=./langflow-data
 OPENSEARCH_INDEX_NAME=documents
 
 # Model Providers
diff --git a/src/tui/config_fields.py b/src/tui/config_fields.py
index 73bdcf37d..19aa01631 100644
--- a/src/tui/config_fields.py
+++ b/src/tui/config_fields.py
@@ -112,6 +112,12 @@ class ConfigSection:
             "langflow_superuser", "LANGFLOW_SUPERUSER", "Admin Username",
             placeholder="admin", default="admin",
         ),
+        ConfigField(
+            "langflow_data_path", "LANGFLOW_DATA_PATH", "Data Path",
+            placeholder="~/.openrag/data/langflow-data",
+            default="$HOME/.openrag/data/langflow-data",
+            helper_text="Directory to persist Langflow flows and state across restarts",
+        ),
         ConfigField(
             "langflow_public_url", "LANGFLOW_PUBLIC_URL", "Public URL",
             placeholder="http://localhost:7860",
diff --git a/src/tui/managers/env_manager.py b/src/tui/managers/env_manager.py
index 791507885..175c94e45 100644
--- a/src/tui/managers/env_manager.py
+++ b/src/tui/managers/env_manager.py
@@ -98,6 +98,7 @@ class EnvConfig:
     openrag_config_path: str = "$HOME/.openrag/config"
     openrag_data_path: str = "$HOME/.openrag/data"  # Backend data (conversations, tokens, etc.)
     opensearch_data_path: str = "$HOME/.openrag/data/opensearch-data"
+    langflow_data_path: str = "$HOME/.openrag/data/langflow-data"
     openrag_tui_config_path_legacy: str = "$HOME/.openrag/tui/config"
 
     # Container version (linked to TUI version)
@@ -223,6 +224,7 @@ def _env_attr_map(self) -> Dict[str, str]:
             "OPENRAG_CONFIG_PATH": "openrag_config_path",
             "OPENRAG_DATA_PATH": "openrag_data_path",
             "OPENSEARCH_DATA_PATH": "opensearch_data_path",
+            "LANGFLOW_DATA_PATH": "langflow_data_path",
             "LANGFLOW_AUTO_LOGIN": "langflow_auto_login",
             "LANGFLOW_NEW_USER_IS_ACTIVE": "langflow_new_user_is_active",
             "LANGFLOW_ENABLE_SUPERUSER_CLI": "langflow_enable_superuser_cli",
@@ -507,6 +509,9 @@ def save_env_file(self) -> bool:
                 f.write(
                     f"OPENSEARCH_DATA_PATH={self._quote_env_value(expand_path(self.config.opensearch_data_path))}\n"
                 )
+                f.write(
+                    f"LANGFLOW_DATA_PATH={self._quote_env_value(expand_path(self.config.langflow_data_path))}\n"
+                )
                 # Set OPENRAG_VERSION to TUI version
                 if self.config.openrag_version:
                     f.write(f"OPENRAG_VERSION={self._quote_env_value(self.config.openrag_version)}\n")
diff --git a/src/tui/screens/config.py b/src/tui/screens/config.py
index 54873b00d..4ca6c8bd0 100644
--- a/src/tui/screens/config.py
+++ b/src/tui/screens/config.py
@@ -203,6 +203,7 @@ def _create_header_text(self) -> Text:
         "opensearch_data_path",
         "langflow_superuser_password",
         "langflow_superuser",
+        "langflow_data_path",
         "google_oauth_client_id",
         "microsoft_graph_oauth_client_id",
         "openrag_documents_paths",
@@ -306,6 +307,25 @@ def _render_opensearch_data_path(self, field: ConfigField) -> ComposeResult:
         self.inputs[field.name] = input_widget
         yield Static(" ")
 
+    def _render_langflow_data_path(self, field: ConfigField) -> ComposeResult:
+        """Langflow data path with file picker."""
+        yield Label(field.label)
+        yield Static(field.helper_text, classes="helper-text")
+        current_value = getattr(self.env_manager.config, field.name, field.default)
+        input_widget = Input(
+            placeholder=field.placeholder,
+            value=current_value,
+            id=f"input-{field.name}",
+        )
+        yield input_widget
+        yield Horizontal(
+            Button("Pick…", id="pick-langflow-data-btn"),
+            id="langflow-data-path-actions",
+            classes="controls-row",
+        )
+        self.inputs[field.name] = input_widget
+        yield Static(" ")
+
     def _render_langflow_superuser_password(self, field: ConfigField) -> ComposeResult:
         """Langflow password with generate checkbox and eye toggle."""
         with Horizontal():
@@ -456,6 +476,8 @@ def on_button_pressed(self, event: Button.Pressed) -> None:
             self.action_pick_documents_path()
         elif event.button.id == "pick-opensearch-data-btn":
             self.action_pick_opensearch_data_path()
+        elif event.button.id == "pick-langflow-data-btn":
+            self.action_pick_langflow_data_path()
         elif event.button.id and event.button.id.startswith("toggle-"):
             # Generic toggle for password/secret field visibility
             field_name = event.button.id.removeprefix("toggle-")
@@ -665,6 +687,58 @@ def _set_path(result) -> None:
             self._opensearch_data_pick_callback = _set_path  # type: ignore[attr-defined]
             self.app.push_screen(picker)
 
+    def action_pick_langflow_data_path(self) -> None:
+        """Open textual-fspicker to select Langflow data directory."""
+        try:
+            import importlib
+
+            fsp = importlib.import_module("textual_fspicker")
+        except Exception:
+            self.notify("textual-fspicker not available", severity="warning")
+            return
+
+        input_widget = self.inputs.get("langflow_data_path")
+        start = Path.home()
+        if input_widget and input_widget.value:
+            path_str = input_widget.value.strip()
+            if path_str:
+                candidate = Path(path_str).expanduser()
+                if candidate.exists():
+                    start = candidate
+                elif candidate.parent.exists():
+                    start = candidate.parent
+
+        PickerClass = getattr(fsp, "SelectDirectory", None) or getattr(
+            fsp, "FileOpen", None
+        )
+        if PickerClass is None:
+            self.notify(
+                "No compatible picker found in textual-fspicker", severity="warning"
+            )
+            return
+        try:
+            picker = PickerClass(location=start)
+        except Exception:
+            try:
+                picker = PickerClass(start)
+            except Exception:
+                self.notify("Could not initialize textual-fspicker", severity="warning")
+                return
+
+        def _set_path(result) -> None:
+            if not result:
+                return
+            path_str = str(result)
+            if input_widget is None:
+                return
+            input_widget.value = path_str
+
+        try:
+            self.app.push_screen(picker, _set_path)  # type: ignore[arg-type]
+        except TypeError:
+            self._langflow_data_pick_callback = _set_path  # type: ignore[attr-defined]
+            self.app.push_screen(picker)
+
     def on_screen_dismissed(self, event) -> None:  # type: ignore[override]
         try:
             # textual-fspicker screens should dismiss with a result; hand to callback if present
@@ -684,6 +758,15 @@ def on_screen_dismissed(self, event) -> None:  # type: ignore[override]
                     delattr(self, "_opensearch_data_pick_callback")
                 except Exception:
                     pass
+
+            # Handle Langflow data path picker callback
+            cb = getattr(self, "_langflow_data_pick_callback", None)
+            if cb is not None:
+                cb(getattr(event, "result", None))
+                try:
+                    delattr(self, "_langflow_data_pick_callback")
+                except Exception:
+                    pass
         except Exception:
             pass
 
diff --git a/tests/conftest.py b/tests/conftest.py
index ffe80467e..eced102f5 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -51,6 +51,15 @@ async def onboard_system():
         except Exception as e:
             print(f"[DEBUG] Could not clean OpenSearch data directory: {e}")
 
+    # Clean up Langflow data directory to ensure a fresh Langflow DB for tests
+    langflow_data_path = Path(os.getenv("LANGFLOW_DATA_PATH", "./langflow-data"))
+    if langflow_data_path.exists():
+        try:
+            shutil.rmtree(langflow_data_path)
+            print(f"[DEBUG] Cleaned up Langflow data directory: {langflow_data_path}")
+        except Exception as e:
+            print(f"[DEBUG] Could not clean Langflow data directory: {e}")
+
     # Initialize clients
     await clients.initialize()
 

From 44830d3ab9b433032b8f904b9211a2ca9273b6f3 Mon Sep 17 00:00:00 2001
From: Mike Pawlowski <mpawlow@ca.ibm.com>
Date: Thu, 19 Mar 2026 07:42:59 -0700
Subject: [PATCH 4/7] fix: persist Langflow database across container restarts
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Issue

- #1127

Summary

- Fixed Langflow data directory persistence and bind-mount ownership across container restarts

Container & Entrypoint

- Added docker-entrypoint-langflow.sh that runs as root, chowns /app/langflow-data to uid=1000, then drops privileges and execs the main process — mirrors the pattern used by official database images
- Updated Dockerfile.langflow to switch to USER root, copy the entrypoint script, set it as ENTRYPOINT, and simplified the RUN layer
- Added # syntax=docker/dockerfile:1.4 directive to Dockerfile.langflow

Repository & Git Ignore

- Removed langflow-data/.gitkeep and replaced the selective .gitignore pattern (/langflow-data/* + !.gitkeep) with a blanket langflow-data/ ignore entry

Makefile

- Added ensure-langflow-data target that creates the langflow-data bind-mount directory on the host before Docker starts (prevents Docker from creating it as root)
- Added ensure-langflow-data as a prerequisite to dev, dev-cpu, dev-local, dev-local-cpu, dev-local-build-lf, dev-local-build-lf-cpu, dev-branch, dev-branch-cpu, restart-dev, test-ci, and test-ci-local
- Updated factory-reset to fully remove the langflow-data directory (rm -rf langflow-data) instead of only clearing its contents
---
 .gitignore                    |  5 ++---
 Dockerfile.langflow           | 17 ++++++++++++-----
 Makefile                      | 30 +++++++++++++++++-------------
 docker-entrypoint-langflow.sh | 13 +++++++++++++
 langflow-data/.gitkeep        |  0
 5 files changed, 44 insertions(+), 21 deletions(-)
 create mode 100644 docker-entrypoint-langflow.sh
 delete mode 100644 langflow-data/.gitkeep

diff --git a/.gitignore b/.gitignore
index 02d024e00..8ee2c4059 100644
--- a/.gitignore
+++ b/.gitignore
@@ -32,9 +32,8 @@ wheels/
 
 # OpenSearch data directory
 opensearch-data/
-# Langflow data directory (ignore contents, keep directory via .gitkeep)
-/langflow-data/*
-!/langflow-data/.gitkeep
+# Langflow data directory
+langflow-data/
 
 node_modules
 
diff --git a/Dockerfile.langflow b/Dockerfile.langflow
index 73793121b..abb651c74 100644
--- a/Dockerfile.langflow
+++ b/Dockerfile.langflow
@@ -1,13 +1,20 @@
+# syntax=docker/dockerfile:1.4
 FROM langflowai/langflow:1.8.0
 
+# Switch to root so the entrypoint can fix data directory ownership before dropping privileges.
+# The base image runs as uid=1000; we restore that in the entrypoint script.
+USER root
+
 # (+) Install uv
-# (+) Pre-create the Langflow data directory with correct ownership.
-#     - This ensures named Docker volumes are initialised with uid=1000 so
-#       the non-root container user can write to the mounted path.
-RUN set -ex \
-    && pip install uv \
+# (+) Pre-create the Langflow data directory.
+#     - For named Docker volumes, this seeds the volume with the correct path on first mount.
+#     - For bind mounts, the entrypoint chowns the directory at startup.
+RUN pip install uv \
     && mkdir -p /app/langflow-data
 
+COPY --chmod=755 docker-entrypoint-langflow.sh /docker-entrypoint-langflow.sh
+
 EXPOSE 7860
 
+ENTRYPOINT ["/docker-entrypoint-langflow.sh"]
 CMD ["langflow", "run", "--host", "0.0.0.0", "--port", "7860"]
diff --git a/Makefile b/Makefile
index 5fb3d948c..df179205c 100644
--- a/Makefile
+++ b/Makefile
@@ -79,7 +79,8 @@ endef
        test test-unit test-integration test-ci test-ci-local test-sdk test-os-jwt lint \
        backend frontend docling docling-stop install-be install-fe build-be build-fe build-os build-lf logs-be logs-fe logs-lf logs-os \
        shell-be shell-lf shell-os restart status health db-reset clear-os-data flow-upload setup factory-reset \
-       dev-branch build-langflow-dev stop-dev clean-dev logs-dev logs-lf-dev shell-lf-dev restart-dev status-dev
+       dev-branch build-langflow-dev stop-dev clean-dev logs-dev logs-lf-dev shell-lf-dev restart-dev status-dev \
+       ensure-langflow-data
 
 all: help
 
@@ -319,7 +320,10 @@ help_utils: ## Show utility commands
 # DEVELOPMENT ENVIRONMENTS
 ######################
 
-dev: ## Start full stack with GPU support
+ensure-langflow-data: ## Create the langflow-data directory if it does not exist
+	@mkdir -p langflow-data
+
+dev: ensure-langflow-data ## Start full stack with GPU support
 	@echo "$(YELLOW)Starting OpenRAG with GPU support...$(NC)"
 	$(COMPOSE_CMD) -f docker-compose.yml -f docker-compose.gpu.yml up -d
 	@echo "$(PURPLE)Services started!$(NC)"
@@ -329,7 +333,7 @@ dev: ## Start full stack with GPU support
 	@echo "   $(CYAN)OpenSearch:$(NC) http://localhost:9200"
 	@echo "   $(CYAN)Dashboards:$(NC) http://localhost:5601"
 
-dev-cpu: ## Start full stack with CPU only
+dev-cpu: ensure-langflow-data ## Start full stack with CPU only
 	@echo "$(YELLOW)Starting OpenRAG with CPU only...$(NC)"
 	$(COMPOSE_CMD) up -d
 	@echo "$(PURPLE)Services started!$(NC)"
@@ -339,7 +343,7 @@ dev-cpu: ## Start full stack with CPU only
 	@echo "   $(CYAN)OpenSearch:$(NC) http://localhost:9200"
 	@echo "   $(CYAN)Dashboards:$(NC) http://localhost:5601"
 
-dev-local: ## Start infrastructure for local development
+dev-local: ensure-langflow-data ## Start infrastructure for local development
 	@echo "$(YELLOW)Starting infrastructure only (for local development)...$(NC)"
 	$(COMPOSE_CMD) -f docker-compose.yml -f docker-compose.gpu.yml up -d opensearch openrag-backend dashboards langflow
 	@echo "$(PURPLE)Infrastructure started!$(NC)"
@@ -350,7 +354,7 @@ dev-local: ## Start infrastructure for local development
 	@echo ""
 	@echo "$(YELLOW)Now run 'make backend' and 'make frontend' in separate terminals$(NC)"
 
-dev-local-cpu: ## Start infrastructure for local development, with CPU only
+dev-local-cpu: ensure-langflow-data ## Start infrastructure for local development, with CPU only
 	@echo "$(YELLOW)Starting infrastructure only (for local development)...$(NC)"
 	$(COMPOSE_CMD) up -d opensearch openrag-backend dashboards langflow
 	@echo "$(PURPLE)Infrastructure started!$(NC)"
@@ -361,7 +365,7 @@ dev-local-cpu: ## Start infrastructure for local development, with CPU only
 	@echo ""
 	@echo "$(YELLOW)Now run 'make backend' and 'make frontend' in separate terminals$(NC)"
 
-dev-local-build-lf: ## Start infrastructure for local development, building only Langflow image
+dev-local-build-lf: ensure-langflow-data ## Start infrastructure for local development, building only Langflow image
 	@echo "$(YELLOW)Building Langflow image...$(NC)"
 	$(COMPOSE_CMD) -f docker-compose.yml -f docker-compose.gpu.yml build langflow
 	@echo "$(YELLOW)Starting infrastructure only (for local development)...$(NC)"
@@ -374,7 +378,7 @@ dev-local-build-lf: ## Start infrastructure for local development, building only
 	@echo ""
 	@echo "$(YELLOW)Now run 'make backend' and 'make frontend' in separate terminals$(NC)"
 
-dev-local-build-lf-cpu: ## Start infrastructure for local development, building only Langflow image with CPU only
+dev-local-build-lf-cpu: ensure-langflow-data ## Start infrastructure for local development, building only Langflow image with CPU only
 	@echo "$(YELLOW)Building Langflow image (CPU)...$(NC)"
 	$(COMPOSE_CMD) build langflow
 	@echo "$(YELLOW)Starting infrastructure only (for local development)...$(NC)"
@@ -393,7 +397,7 @@ dev-local-build-lf-cpu: ## Start infrastructure for local development, building
 # Usage: make dev-branch BRANCH=test-openai-responses
 #        make dev-branch BRANCH=feature-x REPO=https://github.com/myorg/langflow.git
 
-dev-branch: ## Build & run full stack with custom Langflow branch
+dev-branch: ensure-langflow-data ## Build & run full stack with custom Langflow branch
 	@echo "$(YELLOW)Building Langflow from branch: $(BRANCH)$(NC)"
 	@echo "   $(CYAN)Repository:$(NC) $(REPO)"
 	@echo ""
@@ -409,7 +413,7 @@ dev-branch: ## Build & run full stack with custom Langflow branch
 	@echo "   $(CYAN)OpenSearch:$(NC)            http://localhost:9200"
 	@echo "   $(CYAN)Dashboards:$(NC)            http://localhost:5601"
 
-dev-branch-cpu: ## Build & run full stack with custom Langflow branch and CPU only mode
+dev-branch-cpu: ensure-langflow-data ## Build & run full stack with custom Langflow branch and CPU only mode
 	@echo "$(YELLOW)Building Langflow from branch: $(BRANCH)$(NC)"
 	@echo "   $(CYAN)Repository:$(NC) $(REPO)"
 	@echo ""
@@ -436,7 +440,7 @@ stop-dev: ## Stop dev environment containers
 	$(COMPOSE_CMD) -f docker-compose.dev.yml down
 	@echo "$(PURPLE)Dev environment stopped.$(NC)"
 
-restart-dev: ## Restart dev environment
+restart-dev: ensure-langflow-data ## Restart dev environment
 	@echo "$(YELLOW)Restarting dev environment with branch: $(BRANCH)$(NC)"
 	$(COMPOSE_CMD) -f docker-compose.dev.yml down
 	GIT_BRANCH=$(BRANCH) GIT_REPO=$(REPO) $(COMPOSE_CMD) -f docker-compose.dev.yml up -d
@@ -528,7 +532,7 @@ factory-reset: ## Complete reset (stop, remove volumes, clear data, remove image
 	fi; \
 	if [ -d "langflow-data" ]; then \
 		echo "Removing langflow-data..."; \
-		rm -rf langflow-data/* 2>/dev/null || true; \
+		rm -rf langflow-data; \
 		echo "$(PURPLE)langflow-data removed$(NC)"; \
 	fi; \
 	if [ -d "config" ]; then \
@@ -677,7 +681,7 @@ test-integration: ## Run integration tests (requires infrastructure)
 	@echo "$(YELLOW)Make sure to run 'make dev-local' first!$(NC)"
 	uv run pytest tests/integration/core/ -v
 
-test-ci: ## Start infra, run integration + SDK tests, tear down (uses DockerHub images)
+test-ci: ensure-langflow-data ## Start infra, run integration + SDK tests, tear down (uses DockerHub images)
 	@set -e; \
 	echo "$(YELLOW)Installing test dependencies...$(NC)"; \
 	uv sync --group dev; \
@@ -806,7 +810,7 @@ test-ci: ## Start infra, run integration + SDK tests, tear down (uses DockerHub
 	$(COMPOSE_CMD) down -v 2>/dev/null || true; \
 	exit $$TEST_RESULT
 
-test-ci-local: ## Same as test-ci but builds all images locally
+test-ci-local: ensure-langflow-data ## Same as test-ci but builds all images locally
 	@set -e; \
 	echo "$(YELLOW)Installing test dependencies...$(NC)"; \
 	uv sync --group dev; \
diff --git a/docker-entrypoint-langflow.sh b/docker-entrypoint-langflow.sh
new file mode 100644
index 000000000..550ddcd53
--- /dev/null
+++ b/docker-entrypoint-langflow.sh
@@ -0,0 +1,13 @@
+#!/bin/sh
+set -e
+
+# Fix ownership of the Langflow data directory so the container user (uid=1000) can write to it.
+# When the directory is bind-mounted from a host with a different UID (e.g. CI runners at uid=1001),
+# the container user cannot create files. Running chown here as root — before dropping privileges —
+# mirrors the pattern used by official database images (OpenSearch, PostgreSQL, Redis).
+chown -R 1000:1000 /app/langflow-data
+
+# Drop from root to uid=1000 and exec the main process.
+# Python is used for privilege drop — it is guaranteed to be present in the Langflow image
+# and requires no additional packages (unlike gosu or su-exec).
+exec python3 -c 'import os, sys; os.setgid(1000); os.setuid(1000); os.execvp(sys.argv[1], sys.argv[1:])' "$@"
diff --git a/langflow-data/.gitkeep b/langflow-data/.gitkeep
deleted file mode 100644
index e69de29bb..000000000

From a1c1e0283c2b5441ab9721bbee416bb9c7595ff5 Mon Sep 17 00:00:00 2001
From: Mike Pawlowski <mpawlow@ca.ibm.com>
Date: Thu, 19 Mar 2026 07:59:54 -0700
Subject: [PATCH 5/7] fix: persist Langflow database across container restarts

Issue

- #1127

Summary

- Removed Langflow data directory cleanup from test setup

Test Infrastructure

- Removed the block in conftest.py that deleted the Langflow data directory (LANGFLOW_DATA_PATH) before tests ran, as this cleanup is no longer appropriate given that the Langflow database is now persisted
across container restarts.
---
 tests/conftest.py | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/tests/conftest.py b/tests/conftest.py
index eced102f5..ffe80467e 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -51,15 +51,6 @@ async def onboard_system():
         except Exception as e:
             print(f"[DEBUG] Could not clean OpenSearch data directory: {e}")
 
-    # Clean up Langflow data directory to ensure a fresh Langflow DB for tests
-    langflow_data_path = Path(os.getenv("LANGFLOW_DATA_PATH", "./langflow-data"))
-    if langflow_data_path.exists():
-        try:
-            shutil.rmtree(langflow_data_path)
-            print(f"[DEBUG] Cleaned up Langflow data directory: {langflow_data_path}")
-        except Exception as e:
-            print(f"[DEBUG] Could not clean Langflow data directory: {e}")
-
     # Initialize clients
     await clients.initialize()
 

From 8566c05539f22e19c66a9e73e278d3ebaf2ccf68 Mon Sep 17 00:00:00 2001
From: Mike Pawlowski <mpawlow@ca.ibm.com>
Date: Thu, 19 Mar 2026 11:56:13 -0700
Subject: [PATCH 6/7] fix: persist Langflow database across container restarts

Issue

- #1127

Summary

- Persisted Langflow database and flow configurations across container restarts by replacing the blind upsert startup mechanism with a create-only flow seeding approach.

Docker / Container Changes

- Removed the custom docker-entrypoint-langflow.sh entrypoint script, which ran chown as root before dropping to uid=1000; the base Langflow image already owns /app as uid=1000, so the privilege escalation was
unnecessary.
- Simplified Dockerfile.langflow by removing the USER root switch, entrypoint script copy, and the ENTRYPOINT override; the image now runs its default langflow run command directly.
- Updated docker-compose.yml to resolve LANGFLOW_DATABASE_URL from the environment with a SQLite fallback (${LANGFLOW_DATABASE_URL:-sqlite:////app/langflow-data/langflow.db}), allowing operators to substitute
a PostgreSQL URL without rebuilding the image.
- Added LANGFLOW_DATABASE_URL to .env.example with documentation on overriding for production PostgreSQL deployments.

Kubernetes / Helm Changes

- Replaced the LANGFLOW_LOAD_FLOWS_PATH env var with LANGFLOW_CONFIG_DIR in the Langflow Helm deployment template to align with the new startup flow seeding approach.

Backend: Flow Seeding & Reset Detection

- Updated FlowsService.ensure_flows_exist() to return a set[str] of flow type names that were newly created during the current startup, rather than returning None.
- Added handling for unexpected non-404 HTTP status codes when checking whether a flow exists; logs a warning and skips creation to avoid overwriting existing data.
- Updated startup_tasks() in src/main.py to capture the set of newly created flows from ensure_flows_exist().
- Filtered out newly seeded flows from the check_flows_reset() result so that freshly created flows (which match their JSON definition by design) are not incorrectly flagged as having been externally reset.
- Upgraded the log level for ensure_flows_exist() failures from warning to error to better surface critical startup failures.
---
 .env.example                                      |  5 +++++
 Dockerfile.langflow                               | 13 ++-----------
 docker-compose.yml                                |  2 +-
 docker-entrypoint-langflow.sh                     | 13 -------------
 .../openrag/templates/langflow/deployment.yaml    |  4 ++--
 src/main.py                                       | 12 ++++++++++--
 src/services/flows_service.py                     | 15 ++++++++++++++-
 7 files changed, 34 insertions(+), 30 deletions(-)
 delete mode 100644 docker-entrypoint-langflow.sh

diff --git a/.env.example b/.env.example
index a2a90fa59..2cb3df3fc 100644
--- a/.env.example
+++ b/.env.example
@@ -79,6 +79,11 @@ OPENSEARCH_DATA_PATH=./opensearch-data
 # Default: ./langflow-data
 LANGFLOW_DATA_PATH=./langflow-data
 
+# Langflow database URL. Defaults to SQLite stored in LANGFLOW_DATA_PATH.
+# Override with a PostgreSQL URL for production deployments, e.g.:
+#   LANGFLOW_DATABASE_URL=postgresql://user:pass@host:5432/langflow
+LANGFLOW_DATABASE_URL=sqlite:////app/langflow-data/langflow.db
+
 # OpenSearch Connection
 OPENSEARCH_HOST=opensearch
 OPENSEARCH_PORT=9200
diff --git a/Dockerfile.langflow b/Dockerfile.langflow
index abb651c74..122c8e5ea 100644
--- a/Dockerfile.langflow
+++ b/Dockerfile.langflow
@@ -1,20 +1,11 @@
 # syntax=docker/dockerfile:1.4
 FROM langflowai/langflow:1.8.0
 
-# Switch to root so the entrypoint can fix data directory ownership before dropping privileges.
-# The base image runs as uid=1000; we restore that in the entrypoint script.
-USER root
-
-# (+) Install uv
-# (+) Pre-create the Langflow data directory.
-#     - For named Docker volumes, this seeds the volume with the correct path on first mount.
-#     - For bind mounts, the entrypoint chowns the directory at startup.
+# Install uv and pre-create the Langflow data directory.
+# The base image already runs as uid=1000 and owns /app, so no root or chown needed.
 RUN pip install uv \
     && mkdir -p /app/langflow-data
 
-COPY --chmod=755 docker-entrypoint-langflow.sh /docker-entrypoint-langflow.sh
-
 EXPOSE 7860
 
-ENTRYPOINT ["/docker-entrypoint-langflow.sh"]
 CMD ["langflow", "run", "--host", "0.0.0.0", "--port", "7860"]
diff --git a/docker-compose.yml b/docker-compose.yml
index 6a2a07f16..f0fd620a5 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -147,7 +147,7 @@ services:
       - WATSONX_PROJECT_ID=${WATSONX_PROJECT_ID}
       - OLLAMA_BASE_URL=${OLLAMA_ENDPOINT}
       - LANGFLOW_CONFIG_DIR=/app/langflow-data
-      - LANGFLOW_DATABASE_URL=sqlite:////app/langflow-data/langflow.db
+      - LANGFLOW_DATABASE_URL=${LANGFLOW_DATABASE_URL:-sqlite:////app/langflow-data/langflow.db}
       - LANGFLOW_SECRET_KEY=${LANGFLOW_SECRET_KEY}
       - JWT=None
       - OWNER=None
diff --git a/docker-entrypoint-langflow.sh b/docker-entrypoint-langflow.sh
deleted file mode 100644
index 550ddcd53..000000000
--- a/docker-entrypoint-langflow.sh
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/bin/sh
-set -e
-
-# Fix ownership of the Langflow data directory so the container user (uid=1000) can write to it.
-# When the directory is bind-mounted from a host with a different UID (e.g. CI runners at uid=1001),
-# the container user cannot create files. Running chown here as root — before dropping privileges —
-# mirrors the pattern used by official database images (OpenSearch, PostgreSQL, Redis).
-chown -R 1000:1000 /app/langflow-data
-
-# Drop from root to uid=1000 and exec the main process.
-# Python is used for privilege drop — it is guaranteed to be present in the Langflow image
-# and requires no additional packages (unlike gosu or su-exec).
-exec python3 -c 'import os, sys; os.setgid(1000); os.setuid(1000); os.execvp(sys.argv[1], sys.argv[1:])' "$@"
diff --git a/kubernetes/helm/openrag/templates/langflow/deployment.yaml b/kubernetes/helm/openrag/templates/langflow/deployment.yaml
index aa57b8762..3c25bfa68 100644
--- a/kubernetes/helm/openrag/templates/langflow/deployment.yaml
+++ b/kubernetes/helm/openrag/templates/langflow/deployment.yaml
@@ -123,10 +123,10 @@ spec:
 
           env:
             # Langflow core settings
-            - name: LANGFLOW_LOAD_FLOWS_PATH
-              value: {{ .Values.langflow.persistence.mountPath }}/{{ .Values.langflow.persistence.flowsSubPath }}
             - name: LANGFLOW_DATABASE_URL
               value: "sqlite:///{{ .Values.langflow.persistence.mountPath }}/{{ .Values.langflow.persistence.dbSubPath }}"
+            - name: LANGFLOW_CONFIG_DIR
+              value: {{ .Values.langflow.persistence.mountPath }}
             - name: LANGFLOW_DEACTIVATE_TRACING
               value: {{ .Values.langflow.deactivateTracing | quote }}
             - name: LANGFLOW_LOG_LEVEL
diff --git a/src/main.py b/src/main.py
index 8eae9b293..8f59edf56 100644
--- a/src/main.py
+++ b/src/main.py
@@ -1203,11 +1203,16 @@ async def startup_tasks(services):
     # Ensure all configured flows exist in Langflow (create-only, never overwrites).
     # This replaces LANGFLOW_LOAD_FLOWS_PATH, which performed a blind upsert on
     # every container start and discarded any user edits made in the Langflow UI.
+    newly_created: set[str] = set()
     try:
         flows_service = services["flows_service"]
-        await flows_service.ensure_flows_exist()
+        newly_created = await flows_service.ensure_flows_exist()
     except Exception as e:
-        logger.warning("Failed to ensure Langflow flows exist at startup", error=str(e))
+        logger.error(
+            "Failed to ensure Langflow flows exist at startup — "
+            "flows may be missing until the next restart",
+            error=str(e),
+        )
 
     # Check if flows were reset and reapply settings if config is edited
     try:
@@ -1216,6 +1221,9 @@ async def startup_tasks(services):
             logger.info("Checking if Langflow flows were reset")
             flows_service = services["flows_service"]
             reset_flows = await flows_service.check_flows_reset()
+            # Exclude flows that were just seeded — they match the JSON by design,
+            # not because they were externally reset.
+            reset_flows = [f for f in reset_flows if f not in newly_created]
 
             if reset_flows:
                 logger.info(
diff --git a/src/services/flows_service.py b/src/services/flows_service.py
index 137af1080..f06d7b948 100644
--- a/src/services/flows_service.py
+++ b/src/services/flows_service.py
@@ -799,7 +799,7 @@ async def _compare_flow_with_file(self, flow_id: str):
             logger.error(f"Error comparing flow {flow_id} with file: {str(e)}")
             return False
 
-    async def ensure_flows_exist(self):
+    async def ensure_flows_exist(self) -> set[str]:
         """
         Ensure all configured flows exist in Langflow.
 
@@ -810,6 +810,8 @@ async def ensure_flows_exist(self):
 
         This replaces the LANGFLOW_LOAD_FLOWS_PATH mechanism, which performed a
         blind upsert on every container start and discarded user edits.
+
+        Returns the set of flow type names that were actually created.
         """
         flow_configs = [
             ("nudges", NUDGES_FLOW_ID),
@@ -817,6 +819,7 @@ async def ensure_flows_exist(self):
             ("ingest", LANGFLOW_INGEST_FLOW_ID),
             ("url_ingest", LANGFLOW_URL_INGEST_FLOW_ID),
         ]
+        created_flow_types: set[str] = set()
 
         for flow_type, flow_id in flow_configs:
             if not flow_id:
@@ -832,6 +835,13 @@ async def ensure_flows_exist(self):
                     )
                     continue
 
+                if response.status_code != 404:
+                    logger.warning(
+                        f"Unexpected status checking {flow_type} flow (ID: {flow_id}): "
+                        f"HTTP {response.status_code} — skipping creation to avoid overwriting existing data"
+                    )
+                    continue
+
                 flow_path = self._find_flow_file_by_id(flow_id)
                 if not flow_path:
                     logger.warning(
@@ -849,6 +859,7 @@ async def ensure_flows_exist(self):
                     logger.info(
                         f"Created {flow_type} flow (ID: {flow_id}) from {os.path.basename(flow_path)}"
                     )
+                    created_flow_types.add(flow_type)
                 else:
                     logger.warning(
                         f"Failed to create {flow_type} flow (ID: {flow_id}): "
@@ -860,6 +871,8 @@ async def ensure_flows_exist(self):
                     f"Error ensuring {flow_type} flow (ID: {flow_id}) exists: {e}"
                 )
 
+        return created_flow_types
+
     async def check_flows_reset(self):
         """
         Check if any flows have been reset by comparing with JSON files.

From 7a06eb5817f147241bc43af0e885b0b6e69906d2 Mon Sep 17 00:00:00 2001
From: Mike Pawlowski <mpawlow@ca.ibm.com>
Date: Thu, 19 Mar 2026 18:50:06 -0700
Subject: [PATCH 7/7] fix: persist Langflow database across container restarts

Issue

- #1127

Summary

- Fixed Langflow data directory permissions for CI and E2E test environments

CI Test Pipeline (Makefile)

- Added chmod 777 langflow-data before the test run in both test-ci and test-ci-local targets to ensure the langflow-data directory is world-writable prior to container startup

E2E Test Setup (scripts/setup-e2e.sh)

- Added pre-creation of the langflow-data directory with world-writable permissions (777) before infrastructure starts, ensuring the Langflow container (UID 1000) and the CI runner (UID 1001) can both access it regardless of Docker's :U flag behavior
---
 Makefile             | 2 ++
 scripts/setup-e2e.sh | 5 +++++
 2 files changed, 7 insertions(+)

diff --git a/Makefile b/Makefile
index df179205c..3a24ebdbb 100644
--- a/Makefile
+++ b/Makefile
@@ -682,6 +682,7 @@ test-integration: ## Run integration tests (requires infrastructure)
 	uv run pytest tests/integration/core/ -v
 
 test-ci: ensure-langflow-data ## Start infra, run integration + SDK tests, tear down (uses DockerHub images)
+	@chmod 777 langflow-data
 	@set -e; \
 	echo "$(YELLOW)Installing test dependencies...$(NC)"; \
 	uv sync --group dev; \
@@ -811,6 +812,7 @@ test-ci: ensure-langflow-data ## Start infra, run integration + SDK tests, tear
 	exit $$TEST_RESULT
 
 test-ci-local: ensure-langflow-data ## Same as test-ci but builds all images locally
+	@chmod 777 langflow-data
 	@set -e; \
 	echo "$(YELLOW)Installing test dependencies...$(NC)"; \
 	uv sync --group dev; \
diff --git a/scripts/setup-e2e.sh b/scripts/setup-e2e.sh
index 92e182588..9cb62ef8c 100755
--- a/scripts/setup-e2e.sh
+++ b/scripts/setup-e2e.sh
@@ -49,6 +49,11 @@ echo "Starting E2E Setup using $E2E_ENV..."
 echo "Cleaning up..."
 make factory-reset FORCE=true ENV_FILE=$E2E_ENV
 
+# Pre-create langflow-data as world-writable so the Langflow container (UID 1000)
+# and the runner (UID 1001) can both access it, regardless of Docker's :U flag behavior.
+mkdir -p langflow-data
+chmod 777 langflow-data
+
 # Start infrastructure using make (this will use the new .env)
 echo "Starting infrastructure..."
 make dev-local-cpu ENV_FILE=$E2E_ENV