diff --git a/tests/e2e-prow/rhoai/manifests/lightspeed/lightspeed-stack.yaml b/tests/e2e-prow/rhoai/manifests/lightspeed/lightspeed-stack.yaml
index 55f9a9310..b10da3c5c 100644
--- a/tests/e2e-prow/rhoai/manifests/lightspeed/lightspeed-stack.yaml
+++ b/tests/e2e-prow/rhoai/manifests/lightspeed/lightspeed-stack.yaml
@@ -22,13 +22,14 @@ spec:
             secretKeyRef:
               name: llama-stack-ip-secret
               key: key
-        # Same vars as docker-compose / server-mode YAML (${env.FAISS_VECTOR_STORE_ID} in byok_rag).
         - name: FAISS_VECTOR_STORE_ID
           valueFrom:
             secretKeyRef:
               name: faiss-vector-store-secret
               key: id
               optional: true
+        - name: KV_RAG_PATH
+          value: "/app-root/src/.llama/storage/rag/kv_store.db"
       image: ${LIGHTSPEED_STACK_IMAGE}
       ports:
         - containerPort: 8080
diff --git a/tests/e2e-prow/rhoai/manifests/lightspeed/llama-stack-prow.yaml b/tests/e2e-prow/rhoai/manifests/lightspeed/llama-stack-prow.yaml
new file mode 100644
index 000000000..757933c3d
--- /dev/null
+++ b/tests/e2e-prow/rhoai/manifests/lightspeed/llama-stack-prow.yaml
@@ -0,0 +1,205 @@
+# Llama Stack pod for Prow: uses pre-built image with enrichment + RAG restore.
+#
+# Requires: ConfigMap llama-stack-config (run.yaml), ConfigMap rag-data (kv_store.db.gz),
+#           ConfigMap lightspeed-stack-config (lightspeed-stack.yaml).
+# Requires: Image built as ${LLAMA_STACK_IMAGE} (set by pipeline.sh).
+#
+apiVersion: v1
+kind: Pod
+metadata:
+  name: llama-stack-service
+  labels:
+    pod: llama-stack-service
+spec:
+  securityContext:
+    seccompProfile:
+      type: RuntimeDefault
+  initContainers:
+    - name: setup-rag-data
+      image: busybox:latest
+      securityContext:
+        allowPrivilegeEscalation: false
+        capabilities:
+          drop: ["ALL"]
+        runAsNonRoot: true
+        runAsUser: 65534
+        seccompProfile:
+          type: RuntimeDefault
+      command:
+        - /bin/sh
+        - -c
+        - |
+          set -e
+          mkdir -p /data/src/.llama/storage/rag /data/src/.llama/storage/files /data/.e2e-rag-seed
+          if [ ! -f /rag-data/kv_store.db.gz ]; then
+            echo "FATAL: missing /rag-data/kv_store.db.gz"
+            ls -la /rag-data || true
+            exit 1
+          fi
+          gunzip -c /rag-data/kv_store.db.gz > /data/.e2e-rag-seed/kv_store.db
+          cp -f /data/.e2e-rag-seed/kv_store.db /data/src/.llama/storage/rag/kv_store.db
+          chmod -R 777 /data/src /data/.e2e-rag-seed
+          echo "RAG data extracted successfully"
+      volumeMounts:
+        - name: rag-storage
+          mountPath: /data
+        - name: rag-data
+          mountPath: /rag-data
+    - name: materialize-run-yaml
+      image: busybox:latest
+      securityContext:
+        allowPrivilegeEscalation: false
+        capabilities:
+          drop: ["ALL"]
+        runAsNonRoot: true
+        runAsUser: 65534
+        seccompProfile:
+          type: RuntimeDefault
+      command:
+        - /bin/sh
+        - -c
+        - |
+          set -e
+          cp /cm/run.yaml /work/run.yaml
+          chmod 664 /work/run.yaml
+      volumeMounts:
+        - name: config-cm
+          mountPath: /cm
+          readOnly: true
+        - name: rag-storage
+          mountPath: /work
+  containers:
+    - name: llama-stack-container
+      image: ${LLAMA_STACK_IMAGE}
+      securityContext:
+        allowPrivilegeEscalation: false
+        capabilities:
+          drop: ["ALL"]
+        runAsNonRoot: true
+        runAsUser: 1001
+        seccompProfile:
+          type: RuntimeDefault
+      workingDir: /opt/app-root
+      env:
+        - name: PYTHONPATH
+          value: "/opt/app-root/src"
+        - name: HOME
+          value: "/opt/app-root/src"
+        - name: KV_STORE_PATH
+          value: "/opt/app-root/src/.llama/storage/kv_store.db"
+        - name: KV_RAG_PATH
+          value: "/opt/app-root/src/.llama/storage/rag/kv_store.db"
+        - name: SQL_STORE_PATH
+          value: "/opt/app-root/src/.llama/storage/sql_store.db"
+        - name: KSVC_URL
+          valueFrom:
+            secretKeyRef:
+              name: api-url-secret
+              key: key
+        - name: VLLM_API_KEY
+          valueFrom:
+            secretKeyRef:
+              name: vllm-api-key-secret
+              key: key
+        - name: INFERENCE_MODEL
+          value: "meta-llama/Llama-3.1-8B-Instruct"
+        - name: OPENAI_API_KEY
+          valueFrom:
+            secretKeyRef:
+              name: openai-api-key-secret
+              key: key
+              optional: true
+        - name: E2E_OPENAI_MODEL
+          value: "gpt-4o-mini"
+        - name: LLAMA_STACK_CONFIG
+          value: "/opt/app-root/src/.llama/storage/run.yaml"
+        - name: FAISS_VECTOR_STORE_ID
+          valueFrom:
+            secretKeyRef:
+              name: faiss-vector-store-secret
+              key: id
+        - name: E2E_LLAMA_HOSTNAME
+          valueFrom:
+            secretKeyRef:
+              name: llama-stack-ip-secret
+              key: key
+      command:
+        - /bin/bash
+        - -c
+        - |
+          set -e
+          RAG_SEED="/opt/app-root/src/.llama/storage/.e2e-rag-seed/kv_store.db"
+          RAG_CM_GZ="/opt/app-root/rag-data-cm/kv_store.db.gz"
+          RAG_WORK="${KV_RAG_PATH:-/opt/app-root/src/.llama/storage/rag/kv_store.db}"
+          restore_rag_seed() {
+            mkdir -p "$(dirname "$RAG_WORK")"
+            if [[ -f "$RAG_CM_GZ" ]]; then
+              RAG_WORK="$RAG_WORK" RAG_CM_GZ="$RAG_CM_GZ" python3 -c 'import gzip, os, shutil, sys; r, g = os.environ["RAG_WORK"], os.environ["RAG_CM_GZ"]; t = r + ".tmp"; i = gzip.open(g, "rb"); o = open(t, "wb"); shutil.copyfileobj(i, o); i.close(); o.close(); sz = os.path.getsize(t); (sz >= 1048576) or (print("FATAL: RAG from ConfigMap too small:", sz, file=sys.stderr) or sys.exit(1)); os.replace(t, r); os.chmod(r, 0o664)' || exit 1
+            elif [[ -f "$RAG_SEED" ]]; then
+              cp -f "$RAG_SEED" "$RAG_WORK"
+              chmod 664 "$RAG_WORK" 2>/dev/null || true
+            fi
+          }
+          restore_rag_seed
+          INPUT_CONFIG="${LLAMA_STACK_CONFIG:-/opt/app-root/run.yaml}"
+          ENRICHED_CONFIG="/opt/app-root/run.yaml"
+          LIGHTSPEED_CONFIG="${LIGHTSPEED_CONFIG:-/opt/app-root/lightspeed-stack.yaml}"
+          ENV_FILE="/opt/app-root/.env"
+          if [[ -f "$LIGHTSPEED_CONFIG" ]]; then
+            echo "Enriching llama-stack config..."
+            ENRICHMENT_FAILED=0
+            python3 /opt/app-root/src/llama_stack_configuration.py \
+              -c "$LIGHTSPEED_CONFIG" \
+              -i "$INPUT_CONFIG" \
+              -o "$ENRICHED_CONFIG" \
+              -e "$ENV_FILE" 2>&1 || ENRICHMENT_FAILED=1
+            if [[ -f "$ENV_FILE" ]]; then
+              set -a && . "$ENV_FILE" && set +a
+            fi
+            if [[ -f "$ENRICHED_CONFIG" ]] && [[ "$ENRICHMENT_FAILED" -eq 0 ]]; then
+              echo "Using enriched config: $ENRICHED_CONFIG"
+              restore_rag_seed
+              exec llama stack run "$ENRICHED_CONFIG"
+            fi
+          fi
+          echo "Using original config: $INPUT_CONFIG"
+          restore_rag_seed
+          exec llama stack run "$INPUT_CONFIG"
+      ports:
+        - containerPort: 8321
+      readinessProbe:
+        httpGet:
+          path: /v1/health
+          port: 8321
+        initialDelaySeconds: 20
+        periodSeconds: 5
+        failureThreshold: 36
+      livenessProbe:
+        httpGet:
+          path: /v1/health
+          port: 8321
+        initialDelaySeconds: 120
+        periodSeconds: 20
+        failureThreshold: 3
+      volumeMounts:
+        - name: rag-storage
+          mountPath: /opt/app-root/src/.llama/storage
+        - name: lightspeed-config
+          mountPath: /opt/app-root/lightspeed-stack.yaml
+          subPath: lightspeed-stack.yaml
+          readOnly: true
+        - name: rag-data
+          mountPath: /opt/app-root/rag-data-cm
+          readOnly: true
+  volumes:
+    - name: rag-storage
+      emptyDir: {}
+    - name: config-cm
+      configMap:
+        name: llama-stack-config
+    - name: lightspeed-config
+      configMap:
+        name: lightspeed-stack-config
+    - name: rag-data
+      configMap:
+        name: rag-data
diff --git a/tests/e2e-prow/rhoai/manifests/lightspeed/llama-stack.yaml b/tests/e2e-prow/rhoai/manifests/lightspeed/llama-stack.yaml
deleted file mode 100644
index de22831f6..000000000
--- a/tests/e2e-prow/rhoai/manifests/lightspeed/llama-stack.yaml
+++ /dev/null
@@ -1,62 +0,0 @@
-apiVersion: v1
-kind: Pod
-metadata:
-  name: llama-stack-service
-spec:
-  imagePullSecrets:
-    - name: quay-lightspeed-pull-secret
-  initContainers:
-    - name: setup-rag-data
-      image: busybox:latest
-      command:
-        - /bin/sh
-        - -c
-        - |
-          mkdir -p /data/storage/rag
-          gunzip -c /rag-data/kv_store.db.gz > /data/storage/rag/kv_store.db
-          echo "RAG data extracted successfully"
-          ls -la /data/storage/rag/
-      volumeMounts:
-        - name: app-root
-          mountPath: /data
-        - name: rag-data
-          mountPath: /rag-data
-  containers:
-    - name: llama-stack-container
-      command: ["llama", "stack", "run", "/opt/app-root/run.yaml"]
-      env:
-        - name: KSVC_URL
-          valueFrom:
-            secretKeyRef:
-              name: api-url-secret
-              key: key
-        - name: VLLM_API_KEY
-          valueFrom:
-            secretKeyRef:
-              name: vllm-api-key-secret
-              key: key
-        - name: INFERENCE_MODEL
-          value: "meta-llama/Llama-3.1-8B-Instruct"
-        - name: FAISS_VECTOR_STORE_ID
-          valueFrom:
-            secretKeyRef:
-              name: faiss-vector-store-secret
-              key: id
-      image: ${LLAMA_STACK_IMAGE}
-      ports:
-        - containerPort: 8321
-      volumeMounts:
-        - name: app-root
-          mountPath: /opt/app-root/src/.llama
-        - name: config
-          mountPath: /opt/app-root/run.yaml
-          subPath: run.yaml
-  volumes:
-    - name: app-root
-      emptyDir: {}
-    - name: config
-      configMap:
-        name: llama-stack-config
-    - name: rag-data
-      configMap:
-        name: rag-data
diff --git a/tests/e2e-prow/rhoai/manifests/operators/ds-cluster.yaml b/tests/e2e-prow/rhoai/manifests/operators/ds-cluster.yaml
index e9b619726..d57226cc1 100644
--- a/tests/e2e-prow/rhoai/manifests/operators/ds-cluster.yaml
+++ b/tests/e2e-prow/rhoai/manifests/operators/ds-cluster.yaml
@@ -2,7 +2,6 @@ apiVersion: datasciencecluster.opendatahub.io/v1
 kind: DataScienceCluster
 metadata:
   name: default-dsc
-  namespace: e2e-rhoai-dsc
 spec:
   serviceMesh:
     managementState: Managed
diff --git a/tests/e2e-prow/rhoai/manifests/vllm/vllm-runtime-cpu.yaml b/tests/e2e-prow/rhoai/manifests/vllm/vllm-runtime-cpu.yaml
index 4c3f5e7bd..990dc2df3 100644
--- a/tests/e2e-prow/rhoai/manifests/vllm/vllm-runtime-cpu.yaml
+++ b/tests/e2e-prow/rhoai/manifests/vllm/vllm-runtime-cpu.yaml
@@ -24,7 +24,7 @@ spec:
         - --port
         - "8080"
         - --max-model-len
-        - "2048"
+        - "32768"
       image: quay.io/rh-ee-cpompeia/vllm-cpu:latest
       name: kserve-container
       env:
diff --git a/tests/e2e-prow/rhoai/manifests/vllm/vllm-runtime-gpu.yaml b/tests/e2e-prow/rhoai/manifests/vllm/vllm-runtime-gpu.yaml
index b7597991c..e925890d2 100644
--- a/tests/e2e-prow/rhoai/manifests/vllm/vllm-runtime-gpu.yaml
+++ b/tests/e2e-prow/rhoai/manifests/vllm/vllm-runtime-gpu.yaml
@@ -24,7 +24,7 @@ spec:
         - --port
         - "8080"
         - --max-model-len
-        - "2048"
+        - "32768"
         - --gpu-memory-utilization
         - "0.9"
       image: ${VLLM_IMAGE}
diff --git a/tests/e2e-prow/rhoai/pipeline-services.sh b/tests/e2e-prow/rhoai/pipeline-services.sh
index cd33ab9d5..1db04b6ea 100755
--- a/tests/e2e-prow/rhoai/pipeline-services.sh
+++ b/tests/e2e-prow/rhoai/pipeline-services.sh
@@ -1,27 +1,30 @@
 #!/bin/bash
 
 BASE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+NAMESPACE="${NAMESPACE:-e2e-rhoai-dsc}"
 
-# Deploy llama-stack
-envsubst < "$BASE_DIR/manifests/lightspeed/llama-stack.yaml" | oc apply -f -
+# Create llama-stack-ip-secret before deploying the pod (it references the secret as an env var)
+export E2E_LLAMA_HOSTNAME="llama-stack-service-svc.${NAMESPACE}.svc.cluster.local"
+oc create secret generic llama-stack-ip-secret \
+    --from-literal=key="$E2E_LLAMA_HOSTNAME" \
+    -n "$NAMESPACE" 2>/dev/null || echo "Secret llama-stack-ip-secret exists"
+
+# Deploy llama-stack (substitute only LLAMA_STACK_IMAGE, leave other ${} intact)
+envsubst '${LLAMA_STACK_IMAGE}' < "$BASE_DIR/manifests/lightspeed/llama-stack-prow.yaml" | oc apply -n "$NAMESPACE" -f -
 
 oc wait pod/llama-stack-service \
-  -n e2e-rhoai-dsc --for=condition=Ready --timeout=600s
+  -n "$NAMESPACE" --for=condition=Ready --timeout=600s
 
-# Get url address of llama-stack pod
-oc label pod llama-stack-service pod=llama-stack-service -n e2e-rhoai-dsc
+# Expose llama-stack service
+oc label pod llama-stack-service pod=llama-stack-service -n "$NAMESPACE"
 
 oc expose pod llama-stack-service \
   --name=llama-stack-service-svc \
   --port=8321 \
   --type=ClusterIP \
-  -n e2e-rhoai-dsc
-
-export E2E_LLAMA_HOSTNAME="llama-stack-service-svc.e2e-rhoai-dsc.svc.cluster.local"
-
-oc create secret generic llama-stack-ip-secret \
-    --from-literal=key="$E2E_LLAMA_HOSTNAME" \
-    -n e2e-rhoai-dsc || echo "Secret exists"
+  -n "$NAMESPACE"
 
-# Deploy lightspeed-stack
-oc apply -f "$BASE_DIR/manifests/lightspeed/lightspeed-stack.yaml"
+# Deploy lightspeed-stack (substitute only LIGHTSPEED_STACK_IMAGE, leave other ${} intact)
+LIGHTSPEED_STACK_IMAGE="${LIGHTSPEED_STACK_IMAGE:-quay.io/lightspeed-core/lightspeed-stack:dev-latest}"
+export LIGHTSPEED_STACK_IMAGE
+envsubst '${LIGHTSPEED_STACK_IMAGE}' < "$BASE_DIR/manifests/lightspeed/lightspeed-stack.yaml" | oc apply -n "$NAMESPACE" -f -
diff --git a/tests/e2e-prow/rhoai/pipeline.sh b/tests/e2e-prow/rhoai/pipeline.sh
index 73585cb82..39f6fcdba 100755
--- a/tests/e2e-prow/rhoai/pipeline.sh
+++ b/tests/e2e-prow/rhoai/pipeline.sh
@@ -9,13 +9,14 @@ export RUNNING_PROW=true
 # 1. GLOBAL CONFIG
 #========================================
 NAMESPACE="e2e-rhoai-dsc"
+export NAMESPACE
 MODEL_NAME="meta-llama/Llama-3.1-8B-Instruct"
 PIPELINE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 
-# RHOAI llama-stack image
-LLAMA_STACK_IMAGE="${LLAMA_STACK_IMAGE:-quay.io/rhoai/odh-llama-stack-core-rhel9:rhoai-3.3}"
-echo "Using llama-stack image: $LLAMA_STACK_IMAGE"
-export LLAMA_STACK_IMAGE
+# RHOAI llama-stack image (unused when building from source via llama-stack-openai.yaml)
+# LLAMA_STACK_IMAGE="${LLAMA_STACK_IMAGE:-quay.io/rhoai/odh-llama-stack-core-rhel9:rhoai-3.4-ea.2}"
+# echo "Using llama-stack image: $LLAMA_STACK_IMAGE"
+# export LLAMA_STACK_IMAGE
 
 #========================================
 # 2. ENVIRONMENT SETUP
@@ -38,15 +39,21 @@ oc version
 oc whoami
 
 #========================================
-# 3. CREATE NAMESPACE & SECRETS
+# 3. BOOTSTRAP OPERATORS & DSC (before namespace — DSC operator may delete it)
 #========================================
-echo "===== Creating namespace & secrets ====="
-oc get ns "$NAMESPACE" >/dev/null 2>&1 || oc create namespace "$NAMESPACE"
-
-# Create NFD and NVIDIA namespaces
+echo "===== Bootstrapping operators ====="
+# Create NFD and NVIDIA namespaces (needed by operator subscriptions)
 oc apply -f "$PIPELINE_DIR/manifests/namespaces/nfd.yaml"
 oc apply -f "$PIPELINE_DIR/manifests/namespaces/nvidia-operator.yaml"
 
+# Install operators and apply DataScienceCluster (this may delete/recreate namespaces)
+"$PIPELINE_DIR/scripts/bootstrap.sh" "$PIPELINE_DIR"
+
+#========================================
+# 4. CREATE NAMESPACE & SECRETS (after DSC settles)
+#========================================
+echo "===== Creating namespace & secrets ====="
+oc get ns "$NAMESPACE" >/dev/null 2>&1 || oc create namespace "$NAMESPACE"
 
 create_secret() {
     local name=$1; shift
@@ -56,6 +63,22 @@ create_secret() {
 
 create_secret hf-token-secret --from-literal=token="$HUGGING_FACE_HUB_TOKEN"
 create_secret vllm-api-key-secret --from-literal=key="$VLLM_API_KEY"
+create_secret openai-api-key-secret --from-literal=key=""
+
+# MCP token secrets for lightspeed-stack
+REPO_ROOT="$(cd "$PIPELINE_DIR/../../.." && pwd)"
+if [ -f "$REPO_ROOT/tests/e2e/secrets/mcp-token" ]; then
+  oc create secret generic mcp-file-auth-token -n "$NAMESPACE" \
+    --from-file=token="$REPO_ROOT/tests/e2e/secrets/mcp-token" \
+    --dry-run=client -o yaml | oc apply -n "$NAMESPACE" -f -
+  echo "✅ mcp-file-auth-token secret applied"
+fi
+if [ -f "$REPO_ROOT/tests/e2e/secrets/invalid-mcp-token" ]; then
+  oc create secret generic mcp-invalid-file-auth-token -n "$NAMESPACE" \
+    --from-file=token="$REPO_ROOT/tests/e2e/secrets/invalid-mcp-token" \
+    --dry-run=client -o yaml | oc apply -n "$NAMESPACE" -f -
+  echo "✅ mcp-invalid-file-auth-token secret applied"
+fi
 
 # Create Quay pull secret for llama-stack images
 echo "Creating Quay pull secret..."
@@ -70,7 +93,7 @@ oc secrets link default quay-lightspeed-pull-secret --for=pull -n "$NAMESPACE" 2
 
 
 #========================================
-# 4. CONFIGMAPS
+# 5. CONFIGMAPS
 #========================================
 echo "===== Setting up configmaps ====="
 
@@ -79,14 +102,17 @@ curl -sL -o tool_chat_template_llama3.1_json.jinja \
     || { echo "❌ Failed to download jinja template"; exit 1; }
 
 oc create configmap vllm-chat-template -n "$NAMESPACE" \
-    --from-file=tool_chat_template_llama3.1_json.jinja --dry-run=client -o yaml | oc apply -f -
+    --from-file=tool_chat_template_llama3.1_json.jinja --dry-run=client -o yaml | oc apply -n "$NAMESPACE" -f -
 
 
 #========================================
-# 5. DEPLOY vLLM
+# 6. DEPLOY vLLM (GPU setup + deploy, bootstrap already done)
 #========================================
 echo "===== Deploying vLLM ====="
-./pipeline-vllm.sh
+"$PIPELINE_DIR/scripts/gpu-setup.sh" "$PIPELINE_DIR"
+source "$PIPELINE_DIR/scripts/fetch-vllm-image.sh"
+"$PIPELINE_DIR/scripts/deploy-vllm.sh" "$PIPELINE_DIR"
+"$PIPELINE_DIR/scripts/get-vllm-pod-info.sh"
 oc get pods -n "$NAMESPACE"
 
 
@@ -162,18 +188,18 @@ REPO_ROOT="$(cd "$PIPELINE_DIR/../../.." && pwd)"
 echo "Creating mock server ConfigMaps..."
 oc create configmap mock-jwks-script -n "$NAMESPACE" \
     --from-file=server.py="$REPO_ROOT/tests/e2e/mock_jwks_server/server.py" \
-    --dry-run=client -o yaml | oc apply -f -
+    --dry-run=client -o yaml | oc apply -n "$NAMESPACE" -f -
 
 oc create configmap mock-mcp-script -n "$NAMESPACE" \
     --from-file=server.py="$REPO_ROOT/tests/e2e/mock_mcp_server/server.py" \
-    --dry-run=client -o yaml | oc apply -f -
+    --dry-run=client -o yaml | oc apply -n "$NAMESPACE" -f -
 
 # Deploy mock server pods and services
 echo "Deploying mock-jwks..."
-oc apply -f "$PIPELINE_DIR/manifests/lightspeed/mock-jwks.yaml"
+oc apply -n "$NAMESPACE" -f "$PIPELINE_DIR/manifests/lightspeed/mock-jwks.yaml"
 
 echo "Deploying mock-mcp..."
-oc apply -f "$PIPELINE_DIR/manifests/lightspeed/mock-mcp.yaml"
+oc apply -n "$NAMESPACE" -f "$PIPELINE_DIR/manifests/lightspeed/mock-mcp.yaml"
 
 # Wait for mock servers to be ready
 echo "Waiting for mock servers to be ready..."
@@ -189,7 +215,39 @@ oc wait pod/mock-jwks pod/mock-mcp \
 echo "✅ Mock servers deployed"
 
 #========================================
-# 8. DEPLOY LIGHTSPEED STACK AND LLAMA STACK
+# 8. BUILD LLAMA STACK IMAGE
+#========================================
+echo "===== Building llama-stack image ====="
+LLAMA_STACK_IMAGE="image-registry.openshift-image-registry.svc:5000/${NAMESPACE}/llama-stack-e2e:latest"
+export LLAMA_STACK_IMAGE
+
+# Create BuildConfig (idempotent)
+oc new-build --name=llama-stack-e2e \
+  --binary \
+  --strategy=docker \
+  --image="registry.access.redhat.com/ubi9/ubi-minimal" \
+  --to="llama-stack-e2e:latest" \
+  -n "$NAMESPACE" 2>/dev/null || echo "BuildConfig llama-stack-e2e already exists"
+
+# Patch BuildConfig to use test.containerfile instead of Dockerfile
+oc patch bc llama-stack-e2e -n "$NAMESPACE" --type=json \
+  -p '[{"op":"replace","path":"/spec/strategy/dockerStrategy/dockerfilePath","value":"test.containerfile"}]' 2>/dev/null || true
+
+# Build from repo root
+oc start-build llama-stack-e2e \
+  --from-dir="$REPO_ROOT" \
+  --follow \
+  -n "$NAMESPACE" || { echo "❌ llama-stack image build failed"; exit 1; }
+
+echo "✅ llama-stack image built: $LLAMA_STACK_IMAGE"
+
+# Allow default SA to pull from the internal registry
+oc policy add-role-to-user system:image-puller \
+  system:serviceaccount:${NAMESPACE}:default \
+  -n "$NAMESPACE" 2>/dev/null || true
+
+#========================================
+# 9. DEPLOY LIGHTSPEED STACK AND LLAMA STACK
 #========================================
 echo "===== Deploying Services ====="
 
@@ -281,6 +339,15 @@ oc describe pod llama-stack-service -n "$NAMESPACE" || true
 #========================================
 # 9. EXPOSE SERVICE & START PORT-FORWARD
 #========================================
+# Export PID file paths so e2e-ops.sh can find and kill stale port-forwards
+# during test-triggered pod restarts (matches pipeline-konflux.sh).
+export E2E_LSC_PORT_FORWARD_PID_FILE="${E2E_LSC_PORT_FORWARD_PID_FILE:-/tmp/e2e-lightspeed-port-forward.pid}"
+export E2E_LLAMA_PORT_FORWARD_PID_FILE="${E2E_LLAMA_PORT_FORWARD_PID_FILE:-/tmp/e2e-llama-port-forward.pid}"
+export E2E_JWKS_PORT_FORWARD_PID_FILE="${E2E_JWKS_PORT_FORWARD_PID_FILE:-/tmp/e2e-jwks-port-forward.pid}"
+rm -f "$E2E_LSC_PORT_FORWARD_PID_FILE"
+rm -f "$E2E_LLAMA_PORT_FORWARD_PID_FILE"
+rm -f "$E2E_JWKS_PORT_FORWARD_PID_FILE"
+
 oc label pod lightspeed-stack-service pod=lightspeed-stack-service -n $NAMESPACE
 
 oc expose pod lightspeed-stack-service \
@@ -289,20 +356,36 @@ oc expose pod lightspeed-stack-service \
   --type=ClusterIP \
   -n $NAMESPACE
 
-# Kill any existing processes on ports 8080 and 8000
-echo "Checking for existing processes on ports 8080 and 8000..."
-lsof -ti:8080 | xargs kill -9 2>/dev/null || true
-lsof -ti:8000 | xargs kill -9 2>/dev/null || true
+# Kill any existing processes on ports 8080, 8000, and 8321 (lsof may be missing in minimal images)
+echo "Checking for existing processes on ports 8080, 8000, and 8321..."
+if command -v lsof >/dev/null 2>&1; then
+    lsof -ti:8080 | xargs kill -9 2>/dev/null || true
+    lsof -ti:8000 | xargs kill -9 2>/dev/null || true
+    lsof -ti:8321 | xargs kill -9 2>/dev/null || true
+elif command -v fuser >/dev/null 2>&1; then
+    fuser -k 8080/tcp 2>/dev/null || true
+    fuser -k 8000/tcp 2>/dev/null || true
+    fuser -k 8321/tcp 2>/dev/null || true
+fi
 
 # Start port-forward for lightspeed-stack
 echo "Starting port-forward for lightspeed-stack..."
 oc port-forward svc/lightspeed-stack-service-svc 8080:8080 -n $NAMESPACE &
 PF_LCS_PID=$!
+echo "$PF_LCS_PID" >"$E2E_LSC_PORT_FORWARD_PID_FILE"
 
 # Start port-forward for mock-jwks (needed for RBAC tests to get tokens)
 echo "Starting port-forward for mock-jwks..."
 oc port-forward svc/mock-jwks 8000:8000 -n $NAMESPACE &
 PF_JWKS_PID=$!
+echo "$PF_JWKS_PID" >"$E2E_JWKS_PORT_FORWARD_PID_FILE"
+
+# Behave steps that call Llama Stack directly (MCP toolgroups, shields, disrupt/restore)
+# need localhost:8321. Without this forward those tests hit "Connection refused".
+echo "Starting port-forward for llama-stack..."
+oc port-forward svc/llama-stack-service-svc 8321:8321 -n $NAMESPACE &
+PF_LLAMA_PID=$!
+echo "$PF_LLAMA_PID" >"$E2E_LLAMA_PORT_FORWARD_PID_FILE"
 
 # Wait for port-forward to be usable (app may not be listening immediately; port-forward can drop)
 echo "Waiting for port-forward to lightspeed-stack to be ready..."
@@ -313,8 +396,18 @@ for i in $(seq 1 36); do
   fi
   if [ $i -eq 36 ]; then
     echo "❌ Port-forward to lightspeed-stack never became ready (3 min)"
+    echo ""
+    echo "DEBUG: lightspeed-stack-service logs:"
+    oc logs lightspeed-stack-service -n "$NAMESPACE" --tail=100 || true
+    echo ""
+    echo "DEBUG: llama-stack-service logs:"
+    oc logs llama-stack-service -n "$NAMESPACE" --tail=100 || true
+    echo ""
+    echo "DEBUG: Pod status:"
+    oc get pods -n "$NAMESPACE" -o wide || true
     kill $PF_LCS_PID 2>/dev/null || true
     kill $PF_JWKS_PID 2>/dev/null || true
+    kill $PF_LLAMA_PID 2>/dev/null || true
     exit 1
   fi
   # If port-forward process died, restart it (e.g. "connection refused" / "lost connection to pod")
@@ -322,14 +415,42 @@ for i in $(seq 1 36); do
     echo "Port-forward died, restarting (attempt $i)..."
     oc port-forward svc/lightspeed-stack-service-svc 8080:8080 -n $NAMESPACE &
     PF_LCS_PID=$!
+    echo "$PF_LCS_PID" >"$E2E_LSC_PORT_FORWARD_PID_FILE"
+  fi
+  sleep 5
+done
+
+# Wait for Llama Stack port-forward to be usable
+echo "Waiting for Llama Stack port-forward (localhost:8321 /v1/health)..."
+for i in $(seq 1 36); do
+  if curl -sf http://localhost:8321/v1/health > /dev/null 2>&1; then
+    echo "✅ Llama Stack port-forward ready after $(( i * 5 ))s"
+    break
+  fi
+  if [ $i -eq 36 ]; then
+    echo "❌ Port-forward to llama-stack never became healthy (3 min)"
+    oc logs llama-stack-service -n "$NAMESPACE" --tail=100 || true
+    kill $PF_LCS_PID 2>/dev/null || true
+    kill $PF_JWKS_PID 2>/dev/null || true
+    kill $PF_LLAMA_PID 2>/dev/null || true
+    exit 1
+  fi
+  if ! kill -0 $PF_LLAMA_PID 2>/dev/null; then
+    echo "Llama port-forward died, restarting (attempt $i)..."
+    oc port-forward svc/llama-stack-service-svc 8321:8321 -n $NAMESPACE &
+    PF_LLAMA_PID=$!
+    echo "$PF_LLAMA_PID" >"$E2E_LLAMA_PORT_FORWARD_PID_FILE"
   fi
   sleep 5
 done
 
 export E2E_LSC_HOSTNAME="localhost"
 export E2E_JWKS_HOSTNAME="localhost"
+export E2E_DEFAULT_MODEL_OVERRIDE="$MODEL_NAME"
+export E2E_DEFAULT_PROVIDER_OVERRIDE="vllm"
 echo "LCS accessible at: http://$E2E_LSC_HOSTNAME:8080"
 echo "Mock JWKS accessible at: http://$E2E_JWKS_HOSTNAME:8000"
+echo "Llama Stack accessible at: http://localhost:8321"
 
 
 
@@ -352,8 +473,11 @@ TEST_EXIT_CODE=$(cat "$E2E_EXIT_CODE_FILE" 2>/dev/null || echo 1)
 # Kill first so wait doesn't block (if a port-forward is still running, wait would hang)
 kill $PF_LCS_PID 2>/dev/null || true
 kill $PF_JWKS_PID 2>/dev/null || true
+kill $PF_LLAMA_PID 2>/dev/null || true
 wait $PF_LCS_PID 2>/dev/null || true
 wait $PF_JWKS_PID 2>/dev/null || true
+wait $PF_LLAMA_PID 2>/dev/null || true
+rm -f "$E2E_LSC_PORT_FORWARD_PID_FILE" "$E2E_LLAMA_PORT_FORWARD_PID_FILE" "$E2E_JWKS_PORT_FORWARD_PID_FILE"
 set -e
 trap 'echo "❌ Pipeline failed at line $LINENO"; exit 1' ERR
 
diff --git a/tests/e2e-prow/rhoai/scripts/bootstrap.sh b/tests/e2e-prow/rhoai/scripts/bootstrap.sh
index 1718b70e5..ae8444ca8 100755
--- a/tests/e2e-prow/rhoai/scripts/bootstrap.sh
+++ b/tests/e2e-prow/rhoai/scripts/bootstrap.sh
@@ -92,5 +92,11 @@ oc get csv -n openshift-nfd
 
 echo "--> Applying DataScienceCluster from ds-cluster.yaml..."
 oc apply -f "$BASE_DIR/manifests/operators/ds-cluster.yaml"
+sleep 5
+sleep 10
+
+echo "--> Checking DSCInitialization and DSC status..."
+oc get dsci -A -o jsonpath='{range .items[*]}DSCI: {.metadata.name} applicationsNS: {.spec.applicationsNamespace}{"\n"}{end}' 2>/dev/null || echo "No DSCInitialization found"
+oc get dsc -A -o jsonpath='{range .items[*]}DSC: {.metadata.name} phase: {.status.phase}{"\n"}{end}' 2>/dev/null || echo "No DSC status yet"
 
 echo "All files applied successfully. The DataScienceCluster is now provisioning."
diff --git a/tests/e2e-prow/rhoai/scripts/e2e-ops.sh b/tests/e2e-prow/rhoai/scripts/e2e-ops.sh
index 540e2aab2..b98eafab3 100755
--- a/tests/e2e-prow/rhoai/scripts/e2e-ops.sh
+++ b/tests/e2e-prow/rhoai/scripts/e2e-ops.sh
@@ -32,6 +32,7 @@ MANIFEST_DIR="$SCRIPT_DIR/../manifests/lightspeed"
 # Written by pipeline.sh when it starts LCS port-forward; e2e-ops kills this PID before rebinding 8080.
 E2E_LSC_PORT_FORWARD_PID_FILE="${E2E_LSC_PORT_FORWARD_PID_FILE:-/tmp/e2e-lightspeed-port-forward.pid}"
 E2E_LLAMA_PORT_FORWARD_PID_FILE="${E2E_LLAMA_PORT_FORWARD_PID_FILE:-/tmp/e2e-llama-port-forward.pid}"
+E2E_JWKS_PORT_FORWARD_PID_FILE="${E2E_JWKS_PORT_FORWARD_PID_FILE:-/tmp/e2e-jwks-port-forward.pid}"
 
 # ============================================================================
 # Helper functions
@@ -148,6 +149,23 @@ kill_stale_llama_forward() {
     free_local_tcp_port "$port"
 }
 
+# Kill anything likely to hold the mock-jwks local forward (localhost:8000).
+kill_stale_jwks_forward() {
+    local port="${1:-8000}"
+    local saved_pf
+    if [[ -f "$E2E_JWKS_PORT_FORWARD_PID_FILE" ]]; then
+        read -r saved_pf <"$E2E_JWKS_PORT_FORWARD_PID_FILE" 2>/dev/null || true
+        if [[ "$saved_pf" =~ ^[0-9]+$ ]]; then
+            kill -9 "$saved_pf" 2>/dev/null || true
+        fi
+    fi
+    pkill -9 -f "port-forward.*mock-jwks.*${port}:${port}" 2>/dev/null || true
+    pkill -9 -f "oc port-forward svc/mock-jwks ${port}:${port}" 2>/dev/null || true
+    free_local_tcp_port "$port"
+    sleep 1
+    free_local_tcp_port "$port"
+}
+
 # After oc port-forward dies in <2s, show recent oc stderr from the log file.
 e2e_ops_emit_port_forward_immediate_failure_diag() {
     echo "[e2e-ops] /tmp/port-forward.log (tail 25):"
@@ -172,21 +190,30 @@ verify_connectivity() {
     local max_attempts="${1:-6}"
     local local_port="${LOCAL_PORT:-8080}"
     local http_code=""
-    
+
     for ((attempt=1; attempt<=max_attempts; attempt++)); do
-        # Check readiness endpoint - accept 200 or 401 (auth required but service is up)
+        # First check /readiness to see if port-forward is alive (accept 200 or 401)
         http_code=$(curl -s -o /dev/null -w '%{http_code}' --max-time 5 "http://localhost:$local_port/readiness" 2>/dev/null) || http_code="000"
-        
+
         if [[ "$http_code" == "200" || "$http_code" == "401" ]]; then
-            return 0
+            # Port-forward works; now verify the app is fully initialized by hitting
+            # a real endpoint. /v1/models requires the Llama Stack handshake to complete.
+            # Accept 200 (no auth) or 401 (auth enabled) — both prove the full app
+            # stack is up, not just the TCP socket.
+            local models_code
+            models_code=$(curl -s -o /dev/null -w '%{http_code}' --max-time 10 "http://localhost:$local_port/v1/models" 2>/dev/null) || models_code="000"
+            if [[ "$models_code" == "200" || "$models_code" == "401" ]]; then
+                return 0
+            fi
+            echo "[e2e-ops] /readiness=$http_code but /v1/models=$models_code (app still initializing, attempt $attempt/$max_attempts)"
         fi
-        
+
         if [[ $attempt -lt $max_attempts ]]; then
-            sleep 2
+            sleep 5
         fi
     done
-    
-    echo "Connectivity check failed (HTTP: ${http_code:-unknown})"
+
+    echo "Connectivity check failed (readiness: ${http_code:-unknown})"
     return 1
 }
 
@@ -235,33 +262,50 @@ wait_for_llama_stack_http_health() {
 
 cmd_restart_lightspeed() {
     echo "Restarting lightspeed-stack service..."
-    
+
+    # LCS hangs at startup if Llama Stack is unreachable (blocks Llama handshake,
+    # never opens port 8080, readiness probe never passes).  Ensure Llama Stack
+    # is healthy before recreating the LCS pod.
+    if ! _llama_stack_http_health_once 2>/dev/null; then
+        echo "⚠️  Llama Stack not healthy — restoring before LCS restart..."
+        cmd_restart_llama_stack || echo "⚠️  Llama Stack restore failed; LCS may be slow to start"
+    fi
+
     # Delete existing pod (short wait so hook stays within timeout; force if needed)
     timeout 20 oc delete pod lightspeed-stack-service -n "$NAMESPACE" --ignore-not-found=true --wait=true 2>/dev/null || {
         oc delete pod lightspeed-stack-service -n "$NAMESPACE" --ignore-not-found=true --force --grace-period=0 2>/dev/null || true
         sleep 2
     }
     
-    # Apply manifest (expand LIGHTSPEED_STACK_IMAGE)
+    # Apply manifest (expand LIGHTSPEED_STACK_IMAGE only; filter prevents blanking other $VAR refs)
     LIGHTSPEED_STACK_IMAGE="${LIGHTSPEED_STACK_IMAGE:-quay.io/lightspeed-core/lightspeed-stack:dev-latest}"
     export LIGHTSPEED_STACK_IMAGE
     _ls_manifest="$MANIFEST_DIR/lightspeed-stack.yaml"
-    if command -v envsubst >/dev/null 2>&1; then
-        envsubst < "$_ls_manifest" | oc apply -n "$NAMESPACE" -f -
-    else
-        sed "s|\${LIGHTSPEED_STACK_IMAGE}|${LIGHTSPEED_STACK_IMAGE}|g" "$_ls_manifest" |
-            oc apply -n "$NAMESPACE" -f -
-    fi
-    
-    # Wait for pod to be ready (TCP probe passes when app listens on 8080)
-    wait_for_pod "lightspeed-stack-service" 40
+    sed "s|\${LIGHTSPEED_STACK_IMAGE}|${LIGHTSPEED_STACK_IMAGE}|g" "$_ls_manifest" |
+        oc apply -n "$NAMESPACE" -f -
     
+    # Wait for pod to be ready (TCP probe passes when app listens on 8080).
+    # Don't let a timeout here abort the function — still attempt port-forward
+    # and diagnostics so later scenarios have a chance to recover.
+    local pod_ready=true
+    if ! wait_for_pod "lightspeed-stack-service" 40; then
+        pod_ready=false
+        echo "⚠️  Pod not ready within 120s — dumping diagnostics:"
+        oc describe pod lightspeed-stack-service -n "$NAMESPACE" 2>&1 | tail -30 || true
+        oc logs lightspeed-stack-service -n "$NAMESPACE" --tail=40 2>&1 || true
+    fi
+
     # Re-label pod for service discovery
     oc label pod lightspeed-stack-service pod=lightspeed-stack-service -n "$NAMESPACE" --overwrite
-    
-    # Re-establish port-forward
+
+    # Re-establish port-forwards (may succeed even if readiness was slow)
     cmd_restart_port_forward
-    
+    cmd_restart_jwks_port_forward || echo "⚠️  Mock JWKS port-forward failed (RBAC tests may fail)"
+
+    if [[ "$pod_ready" == "false" ]]; then
+        echo "⚠️  Lightspeed restart completed but pod was slow to become ready"
+        return 1
+    fi
     echo "✓ Lightspeed restart complete"
 }
 
@@ -291,12 +335,9 @@ cmd_restart_llama_stack() {
         fi
     else
         # Prow: vLLM Llama Stack image (matches pipeline.sh / pipeline-services.sh)
-        if command -v envsubst >/dev/null 2>&1; then
-            envsubst < "$MANIFEST_DIR/llama-stack.yaml" | oc apply -n "$NAMESPACE" -f -
-        else
-            sed "s|\${LLAMA_STACK_IMAGE}|${LLAMA_STACK_IMAGE:-}|g" "$MANIFEST_DIR/llama-stack.yaml" |
-                oc apply -n "$NAMESPACE" -f -
-        fi
+        # Use sed instead of envsubst to avoid blanking $VAR references in embedded bash scripts
+        sed "s|\${LLAMA_STACK_IMAGE}|${LLAMA_STACK_IMAGE:-}|g" "$MANIFEST_DIR/llama-stack-prow.yaml" |
+            oc apply -n "$NAMESPACE" -f -
         wait_for_pod "llama-stack-service" 24
         echo "Labeling pod for service..."
         oc label pod llama-stack-service pod=llama-stack-service -n "$NAMESPACE" --overwrite
@@ -453,6 +494,66 @@ cmd_restart_llama_port_forward() {
     return 1
 }
 
+cmd_restart_jwks_port_forward() {
+    local local_port="${LOCAL_JWKS_PORT:-8000}"
+    local remote_port="${REMOTE_JWKS_PORT:-8000}"
+    local max_attempts=4
+    local pf_pid
+    local jwks_pf_log="/tmp/port-forward-jwks.log"
+
+    # Check if existing forward is still alive
+    if [[ -f "$E2E_JWKS_PORT_FORWARD_PID_FILE" ]]; then
+        local saved_pf
+        read -r saved_pf <"$E2E_JWKS_PORT_FORWARD_PID_FILE" 2>/dev/null || true
+        if [[ "$saved_pf" =~ ^[0-9]+$ ]] && kill -0 "$saved_pf" 2>/dev/null; then
+            local http_code
+            http_code=$(curl -s -o /dev/null -w '%{http_code}' --max-time 3 "http://127.0.0.1:$local_port/tokens" 2>/dev/null) || http_code="000"
+            if [[ "$http_code" != "000" ]]; then
+                echo "✓ Mock JWKS port-forward already healthy (PID: $saved_pf)"
+                return 0
+            fi
+        fi
+    fi
+
+    echo "Re-establishing mock-jwks port-forward on $local_port:$remote_port..."
+
+    for ((attempt=1; attempt<=max_attempts; attempt++)); do
+        kill_stale_jwks_forward "$local_port"
+        sleep 2
+
+        echo "JWKS port-forward attempt $attempt/$max_attempts"
+
+        : >"$jwks_pf_log"
+        nohup oc port-forward svc/mock-jwks "$local_port:$remote_port" -n "$NAMESPACE" \
+            </dev/null >"$jwks_pf_log" 2>&1 &
+        pf_pid=$!
+        disown "$pf_pid" 2>/dev/null || true
+        sleep 3
+
+        if ! kill -0 "$pf_pid" 2>/dev/null; then
+            echo "JWKS port-forward process exited immediately"
+            continue
+        fi
+
+        local http_code
+        http_code=$(curl -s -o /dev/null -w '%{http_code}' --max-time 5 "http://127.0.0.1:$local_port/tokens" 2>/dev/null) || http_code="000"
+        if [[ "$http_code" != "000" ]]; then
+            echo "$pf_pid" >"$E2E_JWKS_PORT_FORWARD_PID_FILE"
+            echo "✓ Mock JWKS port-forward established (PID: $pf_pid)"
+            return 0
+        fi
+
+        if [[ $attempt -lt $max_attempts ]]; then
+            echo "JWKS forward attempt $attempt failed, retrying..."
+            kill -9 "$pf_pid" 2>/dev/null || true
+            sleep 2
+        fi
+    done
+
+    echo "Failed to establish mock-jwks port-forward on :$local_port"
+    return 1
+}
+
 cmd_wait_for_pod() {
     local pod_name="${1:?Pod name required}"
     local max_attempts="${2:-24}"
@@ -462,16 +563,24 @@ cmd_wait_for_pod() {
 cmd_update_configmap() {
     local configmap_name="${1:?ConfigMap name required}"
     local source_file="${2:?Source file required}"
-    
+
     echo "Updating ConfigMap $configmap_name from $source_file..."
-    
-    # Delete existing configmap
-    oc delete configmap "$configmap_name" -n "$NAMESPACE" --ignore-not-found=true
-    
-    # Create new configmap from the source file
-    oc create configmap "$configmap_name" -n "$NAMESPACE" \
-        --from-file="lightspeed-stack.yaml=$source_file"
-    
+
+    if [[ ! -f "$source_file" ]]; then
+        echo "ERROR: source file does not exist: $source_file" >&2
+        return 1
+    fi
+
+    # Use dry-run + apply to avoid the delete-then-create race.
+    # If delete succeeds but create fails the ConfigMap is gone and every
+    # subsequent attempt cascades into failure.
+    if ! oc create configmap "$configmap_name" -n "$NAMESPACE" \
+            --from-file="lightspeed-stack.yaml=$source_file" \
+            --dry-run=client -o yaml | oc apply -n "$NAMESPACE" -f -; then
+        echo "ERROR: oc apply for ConfigMap $configmap_name failed" >&2
+        return 1
+    fi
+
     echo "✓ ConfigMap $configmap_name updated successfully"
 }
 
@@ -515,6 +624,9 @@ case "$COMMAND" in
     restart-llama-port-forward)
         cmd_restart_llama_port_forward
         ;;
+    restart-jwks-port-forward)
+        cmd_restart_jwks_port_forward
+        ;;
     restart-port-forward)
         cmd_restart_port_forward
         ;;
diff --git a/tests/e2e/features/environment.py b/tests/e2e/features/environment.py
index e519217e0..ca2474578 100644
--- a/tests/e2e/features/environment.py
+++ b/tests/e2e/features/environment.py
@@ -17,13 +17,20 @@
 from behave.runner import Context
 
 from tests.e2e.features.steps.common import (
+    get_llama_stack_hostname,
+    get_llama_stack_port,
     reset_active_lightspeed_stack_config_basename,
 )
-from tests.e2e.features.steps.health import reset_llama_stack_disrupt_once_tracking
+from tests.e2e.features.steps.health import (
+    get_llama_stack_was_running,
+    reset_llama_stack_disrupt_once_tracking,
+    reset_llama_stack_was_running,
+)
 from tests.e2e.utils.llama_stack_utils import register_shield
 from tests.e2e.utils.prow_utils import (
     restart_pod,
     restore_llama_stack_pod,
+    run_e2e_ops,
 )
 from tests.e2e.utils.utils import (
     is_prow_environment,
@@ -133,6 +140,49 @@ def before_all(context: Context) -> None:
             )
 
 
+def _ensure_prow_port_forward(context: Context) -> None:
+    """Check that the lightspeed port-forward is alive; restart it if dead.
+
+    Probes localhost:{E2E_LSC_PORT}/readiness — if it fails, calls e2e-ops
+    restart-port-forward to re-establish the tunnel before the scenario runs.
+    """
+    host = os.getenv("E2E_LSC_HOSTNAME", "localhost")
+    port = os.getenv("E2E_LSC_PORT", "8080")
+    url = f"http://{host}:{port}/readiness"
+    try:
+        resp = requests.get(url, timeout=5)
+        if resp.status_code in (200, 401):
+            return
+    except requests.RequestException:
+        pass
+
+    print("[before_scenario] Port-forward appears dead, restarting...")
+    try:
+        result = run_e2e_ops("restart-port-forward", timeout=60)
+        print(result.stdout, end="")
+        if result.returncode == 0:
+            print("[before_scenario] Port-forward re-established")
+            return
+        print(result.stderr, end="")
+    except subprocess.TimeoutExpired:
+        pass
+
+    # Port-forward alone failed — the pod itself may be dead (e.g. Llama Stack
+    # was never restored after a disruption feature). Attempt a full restart,
+    # which also checks Llama health before recreating LCS.
+    print("[before_scenario] Port-forward failed; attempting full pod restart...")
+    try:
+        result = run_e2e_ops("restart-lightspeed", timeout=200)
+        print(result.stdout, end="")
+        if result.returncode != 0:
+            print(result.stderr, end="")
+            print("[before_scenario] Warning: full pod restart failed")
+        else:
+            print("[before_scenario] Pod restart + port-forward re-established")
+    except subprocess.TimeoutExpired:
+        print("[before_scenario] Warning: full pod restart timed out")
+
+
 def before_scenario(context: Context, scenario: Scenario) -> None:
     """Run before each scenario is run.
 
@@ -157,6 +207,17 @@ def before_scenario(context: Context, scenario: Scenario) -> None:
         scenario.skip("Skipped in library mode (no separate llama-stack container)")
         return
 
+    # Skip scenarios that depend on services not deployed in Prow/OpenShift
+    # (e.g. mock-tls-inference, proxy sidecars only available in Docker Compose)
+    if is_prow_environment() and "skip-in-prow" in scenario.effective_tags:
+        scenario.skip("Skipped in Prow (requires Docker Compose services)")
+        return
+
+    # In Prow, verify the lightspeed port-forward is alive before each scenario.
+    # Port-forwards can silently die between scenarios (e.g. pod restart, TCP reset).
+    if is_prow_environment():
+        _ensure_prow_port_forward(context)
+
     context.scenario_lightspeed_override_active = False
     context.lightspeed_stack_skip_restart = False
 
@@ -253,7 +314,7 @@ def _print_llama_stack_diagnostics() -> None:
     print("--- end diagnostics ---")
 
 
-def _restore_llama_stack(context: Context) -> None:
+def _restore_llama_stack() -> None:
     """Restore Llama Stack connection after disruption."""
     if is_prow_environment():
         # Recreate llama pod, then restart LCS so in-process clients reconnect (Llama IP/pod changed).
@@ -306,7 +367,7 @@ def _restore_llama_stack(context: Context) -> None:
                         "llama-stack",
                         "curl",
                         "-sf",
-                        f"http://{context.hostname_llama}:{context.port_llama}/v1/health",
+                        f"http://{get_llama_stack_hostname()}:{get_llama_stack_port()}/v1/health",
                     ],
                     capture_output=True,
                     timeout=5,
@@ -382,11 +443,12 @@ def after_feature(context: Context, feature: Feature) -> None:
     when ``context.feedback_e2e_conversation_cleanup`` is set by feedback steps,
     delete tracked feedback test conversations.
     """
-    # Restore Llama Stack FIRST (before any lightspeed-stack restart)
-    llama_was_running = getattr(context, "llama_stack_was_running", False)
-    if llama_was_running:
-        _restore_llama_stack(context)
-        context.llama_stack_was_running = False
+    # Restore Llama Stack FIRST (before any lightspeed-stack restart).
+    # Read from module-level state — Behave clears custom context attributes
+    # between scenarios, so context.llama_stack_was_running is unreliable here.
+    if get_llama_stack_was_running():
+        _restore_llama_stack()
+        reset_llama_stack_was_running()
 
     if getattr(context, "feedback_e2e_conversation_cleanup", False):
         token = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva"
diff --git a/tests/e2e/features/proxy.feature b/tests/e2e/features/proxy.feature
index aaab54f4e..d4d9a49f0 100644
--- a/tests/e2e/features/proxy.feature
+++ b/tests/e2e/features/proxy.feature
@@ -1,4 +1,4 @@
-@e2e_group_3 @skip-in-library-mode
+@e2e_group_3 @skip-in-library-mode @skip-in-prow
 Feature: Proxy and TLS networking tests for Llama Stack providers
 
   Verify that the Lightspeed Stack works correctly when Llama Stack's
diff --git a/tests/e2e/features/steps/common.py b/tests/e2e/features/steps/common.py
index d90e8084e..5b7b3e715 100644
--- a/tests/e2e/features/steps/common.py
+++ b/tests/e2e/features/steps/common.py
@@ -21,12 +21,26 @@
 # YAML across scenarios in one feature. Mutate the dict entry (no global).
 _active_lightspeed_stack_config_basename: dict[str, Optional[str]] = {"basename": None}
 
+# Behave clears user attributes on ``context`` between scenarios; store
+# Llama Stack endpoint info at module level so ``after_feature`` can see it.
+_llama_stack_endpoint: dict[str, str] = {"hostname": "localhost", "port": "8321"}
+
 
 def reset_active_lightspeed_stack_config_basename() -> None:
     """Reset before each feature; see ``environment.before_feature``."""
     _active_lightspeed_stack_config_basename["basename"] = None
 
 
+def get_llama_stack_hostname() -> str:
+    """Return the Llama Stack hostname surviving per-scenario context clearing."""
+    return _llama_stack_endpoint["hostname"]
+
+
+def get_llama_stack_port() -> str:
+    """Return the Llama Stack port surviving per-scenario context clearing."""
+    return _llama_stack_endpoint["port"]
+
+
 @given("The service is started locally")
 def service_is_started_locally(context: Context) -> None:
     """Check the service status.
@@ -46,6 +60,8 @@ def service_is_started_locally(context: Context) -> None:
     else:
         context.hostname_llama = "localhost"
     context.port_llama = os.getenv("E2E_LLAMA_PORT", "8321")
+    _llama_stack_endpoint["hostname"] = context.hostname_llama
+    _llama_stack_endpoint["port"] = context.port_llama
 
 
 @given('the Lightspeed stack configuration directory is "{directory}"')
diff --git a/tests/e2e/features/steps/health.py b/tests/e2e/features/steps/health.py
index b42ffe859..dd5243c5a 100644
--- a/tests/e2e/features/steps/health.py
+++ b/tests/e2e/features/steps/health.py
@@ -13,10 +13,25 @@
 # Mutate one dict entry so we need not reassign a module-level bool (no global).
 _llama_stack_disrupt_once: dict[str, bool] = {"applied": False}
 
+# Behave clears user attributes on ``context`` between scenarios; store
+# ``was_running`` at module level so ``after_feature`` can still see it.
+_llama_stack_was_running: dict[str, bool] = {"value": False}
+
+
+def get_llama_stack_was_running() -> bool:
+    """Return whether Llama Stack was running before the disruption step."""
+    return _llama_stack_was_running["value"]
+
+
+def reset_llama_stack_was_running() -> None:
+    """Clear the module-level was_running flag after restoration."""
+    _llama_stack_was_running["value"] = False
+
 
 def reset_llama_stack_disrupt_once_tracking() -> None:
     """Reset before each feature; see ``environment.before_feature``."""
     _llama_stack_disrupt_once["applied"] = False
+    _llama_stack_was_running["value"] = False
 
 
 @given("The llama-stack connection is disrupted")
@@ -50,13 +65,18 @@ def llama_stack_connection_broken(context: Context) -> None:
         print("Llama Stack disruption skipped (already applied once this feature)")
         return
 
-    # Store original state for restoration (only on the real disruption path)
+    # Store original state for restoration (only on the real disruption path).
+    # Write to both context (backward compat) and module-level dict (survives
+    # Behave's per-scenario context clearing).
     context.llama_stack_was_running = False
+    _llama_stack_was_running["value"] = False
 
     if is_prow_environment():
         from tests.e2e.utils.prow_utils import disrupt_llama_stack_pod
 
-        context.llama_stack_was_running = disrupt_llama_stack_pod()
+        was_running = disrupt_llama_stack_pod()
+        context.llama_stack_was_running = was_running
+        _llama_stack_was_running["value"] = was_running
         _llama_stack_disrupt_once["applied"] = True
         return
 
@@ -71,6 +91,7 @@ def llama_stack_connection_broken(context: Context) -> None:
 
         if result.stdout.strip():
             context.llama_stack_was_running = True
+            _llama_stack_was_running["value"] = True
             subprocess.run(
                 ["docker", "stop", "llama-stack"], check=True, capture_output=True
             )
diff --git a/tests/e2e/features/tls.feature b/tests/e2e/features/tls.feature
index 5a2d77338..a900b1c0f 100644
--- a/tests/e2e/features/tls.feature
+++ b/tests/e2e/features/tls.feature
@@ -1,4 +1,4 @@
-@e2e_group_1 @skip-in-library-mode
+@e2e_group_1 @skip-in-library-mode @skip-in-prow
 Feature: TLS configuration for remote inference providers
   Validate that Llama Stack's NetworkConfig.tls settings are applied correctly
   when connecting to a remote inference provider over HTTPS.
diff --git a/tests/e2e/utils/prow_utils.py b/tests/e2e/utils/prow_utils.py
index 60e9a7a71..ca06727ea 100644
--- a/tests/e2e/utils/prow_utils.py
+++ b/tests/e2e/utils/prow_utils.py
@@ -197,7 +197,7 @@ def remove_configmap_backup(backup_key: str) -> None:
 
 
 def _recreate_configmap(configmap_name: str, source_file: str) -> None:
-    """Delete and recreate a ConfigMap from a file.
+    """Update a ConfigMap from a file via oc apply.
 
     Args:
         configmap_name: Name of the ConfigMap.
@@ -205,6 +205,8 @@ def _recreate_configmap(configmap_name: str, source_file: str) -> None:
     """
     result = run_e2e_ops("update-configmap", [configmap_name, source_file], timeout=60)
     if result.returncode != 0:
+        print(f"update-configmap stdout: {result.stdout}")
+        print(f"update-configmap stderr: {result.stderr}")
         raise subprocess.CalledProcessError(
             result.returncode, "update-configmap", result.stderr
         )