From 340c0052ac04af0517aad1554f4b533c49608e3d Mon Sep 17 00:00:00 2001
From: Radovan Fuchs <rfuchs@rfuchs-thinkpadp1gen7.tpb.csb>
Date: Fri, 27 Mar 2026 13:38:00 +0100
Subject: [PATCH 01/18] bump rhoai image version in prow tests

---
 tests/e2e-prow/rhoai/pipeline.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/e2e-prow/rhoai/pipeline.sh b/tests/e2e-prow/rhoai/pipeline.sh
index 73585cb82..0a6d2c6c0 100755
--- a/tests/e2e-prow/rhoai/pipeline.sh
+++ b/tests/e2e-prow/rhoai/pipeline.sh
@@ -13,7 +13,7 @@ MODEL_NAME="meta-llama/Llama-3.1-8B-Instruct"
 PIPELINE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 
 # RHOAI llama-stack image
-LLAMA_STACK_IMAGE="${LLAMA_STACK_IMAGE:-quay.io/rhoai/odh-llama-stack-core-rhel9:rhoai-3.3}"
+LLAMA_STACK_IMAGE="${LLAMA_STACK_IMAGE:-quay.io/rhoai/odh-llama-stack-core-rhel9:rhoai-3.4-ea.2"
 echo "Using llama-stack image: $LLAMA_STACK_IMAGE"
 export LLAMA_STACK_IMAGE
 

From a53c2e1b45db8860b34c46f0ebc3ce6584792150 Mon Sep 17 00:00:00 2001
From: are-ces <195810094+are-ces@users.noreply.github.com>
Date: Sat, 28 Mar 2026 15:55:09 +0100
Subject: [PATCH 02/18] Fix missing closing brace in LLAMA_STACK_IMAGE
 parameter expansion

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 tests/e2e-prow/rhoai/pipeline.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/e2e-prow/rhoai/pipeline.sh b/tests/e2e-prow/rhoai/pipeline.sh
index 0a6d2c6c0..567d07c86 100755
--- a/tests/e2e-prow/rhoai/pipeline.sh
+++ b/tests/e2e-prow/rhoai/pipeline.sh
@@ -13,7 +13,7 @@ MODEL_NAME="meta-llama/Llama-3.1-8B-Instruct"
 PIPELINE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 
 # RHOAI llama-stack image
-LLAMA_STACK_IMAGE="${LLAMA_STACK_IMAGE:-quay.io/rhoai/odh-llama-stack-core-rhel9:rhoai-3.4-ea.2"
+LLAMA_STACK_IMAGE="${LLAMA_STACK_IMAGE:-quay.io/rhoai/odh-llama-stack-core-rhel9:rhoai-3.4-ea.2}"
 echo "Using llama-stack image: $LLAMA_STACK_IMAGE"
 export LLAMA_STACK_IMAGE
 

From 9105daf5c6ed2bf86f9aeb0cf3f69bdbbe3d93e2 Mon Sep 17 00:00:00 2001
From: are-ces <195810094+are-ces@users.noreply.github.com>
Date: Sun, 29 Mar 2026 20:01:47 +0200
Subject: [PATCH 03/18] Fix prow pipeline: in-cluster image build, RAG config,
 port-forward fix

- Build llama-stack image in OpenShift internal registry via oc new-build/start-build
- Add image-puller role for default SA to pull from internal registry
- Add FAISS_VECTOR_STORE_ID and KV_RAG_PATH env vars to lightspeed-stack pod
- Add inference, byok_rag, and rag sections to prow lightspeed-stack configs
- Use envsubst with specific variable scoping in pipeline-services.sh
- Fix free_local_tcp_port to only kill LISTEN sockets (was killing behave process)
- Add MCP token secrets and empty OpenAI secret to pipeline.sh
- Add rlsapi_v1_infer action to prow RBAC config
- Simplify llama-stack.yaml to use pre-built image

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .../lightspeed/lightspeed-stack.yaml          |  3 +-
 .../manifests/lightspeed/llama-stack.yaml     | 78 +++++++++++++++---
 tests/e2e-prow/rhoai/pipeline-services.sh     | 21 ++---
 tests/e2e-prow/rhoai/pipeline.sh              | 80 ++++++++++++++++---
 4 files changed, 150 insertions(+), 32 deletions(-)

diff --git a/tests/e2e-prow/rhoai/manifests/lightspeed/lightspeed-stack.yaml b/tests/e2e-prow/rhoai/manifests/lightspeed/lightspeed-stack.yaml
index 55f9a9310..b10da3c5c 100644
--- a/tests/e2e-prow/rhoai/manifests/lightspeed/lightspeed-stack.yaml
+++ b/tests/e2e-prow/rhoai/manifests/lightspeed/lightspeed-stack.yaml
@@ -22,13 +22,14 @@ spec:
             secretKeyRef:
               name: llama-stack-ip-secret
               key: key
-        # Same vars as docker-compose / server-mode YAML (${env.FAISS_VECTOR_STORE_ID} in byok_rag).
         - name: FAISS_VECTOR_STORE_ID
           valueFrom:
             secretKeyRef:
               name: faiss-vector-store-secret
               key: id
               optional: true
+        - name: KV_RAG_PATH
+          value: "/app-root/src/.llama/storage/rag/kv_store.db"
       image: ${LIGHTSPEED_STACK_IMAGE}
       ports:
         - containerPort: 8080
diff --git a/tests/e2e-prow/rhoai/manifests/lightspeed/llama-stack.yaml b/tests/e2e-prow/rhoai/manifests/lightspeed/llama-stack.yaml
index de22831f6..b04c43063 100644
--- a/tests/e2e-prow/rhoai/manifests/lightspeed/llama-stack.yaml
+++ b/tests/e2e-prow/rhoai/manifests/lightspeed/llama-stack.yaml
@@ -1,30 +1,65 @@
+# Llama Stack pod: uses pre-built image from in-cluster build.
+#
+# The image is built by pipeline.sh using oc new-build + test.containerfile.
+# Only a small init container extracts the RAG DB into the shared volume.
+#
+# Requires: ConfigMap llama-stack-config (run.yaml), ConfigMap rag-data (kv_store.db.gz).
+# Requires: Image built as ${LLAMA_STACK_IMAGE} (set by pipeline.sh).
+#
 apiVersion: v1
 kind: Pod
 metadata:
   name: llama-stack-service
+  labels:
+    pod: llama-stack-service
 spec:
-  imagePullSecrets:
-    - name: quay-lightspeed-pull-secret
+  securityContext:
+    seccompProfile:
+      type: RuntimeDefault
   initContainers:
     - name: setup-rag-data
       image: busybox:latest
+      securityContext:
+        allowPrivilegeEscalation: false
+        capabilities:
+          drop: ["ALL"]
+        runAsNonRoot: true
+        runAsUser: 65534
+        seccompProfile:
+          type: RuntimeDefault
       command:
         - /bin/sh
         - -c
         - |
-          mkdir -p /data/storage/rag
-          gunzip -c /rag-data/kv_store.db.gz > /data/storage/rag/kv_store.db
+          mkdir -p /data/src/.llama/storage/rag /data/src/.llama/storage/files
+          chmod -R 777 /data
+          gunzip -c /rag-data/kv_store.db.gz > /data/src/.llama/storage/rag/kv_store.db
+          chmod -R 777 /data
           echo "RAG data extracted successfully"
-          ls -la /data/storage/rag/
       volumeMounts:
-        - name: app-root
+        - name: rag-storage
           mountPath: /data
         - name: rag-data
           mountPath: /rag-data
   containers:
     - name: llama-stack-container
-      command: ["llama", "stack", "run", "/opt/app-root/run.yaml"]
+      image: ${LLAMA_STACK_IMAGE}
+      securityContext:
+        allowPrivilegeEscalation: false
+        capabilities:
+          drop: ["ALL"]
+        runAsNonRoot: true
+        runAsUser: 1001
+        seccompProfile:
+          type: RuntimeDefault
+      workingDir: /opt/app-root
       env:
+        - name: KV_STORE_PATH
+          value: "/opt/app-root/src/.llama/storage/kv_store.db"
+        - name: KV_RAG_PATH
+          value: "/opt/app-root/src/.llama/storage/rag/kv_store.db"
+        - name: SQL_STORE_PATH
+          value: "/opt/app-root/src/.llama/storage/sql_store.db"
         - name: KSVC_URL
           valueFrom:
             secretKeyRef:
@@ -37,22 +72,43 @@ spec:
               key: key
         - name: INFERENCE_MODEL
           value: "meta-llama/Llama-3.1-8B-Instruct"
+        - name: OPENAI_API_KEY
+          valueFrom:
+            secretKeyRef:
+              name: openai-api-key-secret
+              key: key
+              optional: true
+        - name: E2E_OPENAI_MODEL
+          value: "gpt-4o-mini"
         - name: FAISS_VECTOR_STORE_ID
           valueFrom:
             secretKeyRef:
               name: faiss-vector-store-secret
               key: id
-      image: ${LLAMA_STACK_IMAGE}
       ports:
         - containerPort: 8321
+      readinessProbe:
+        httpGet:
+          path: /v1/health
+          port: 8321
+        initialDelaySeconds: 20
+        periodSeconds: 5
+        failureThreshold: 36
+      livenessProbe:
+        httpGet:
+          path: /v1/health
+          port: 8321
+        initialDelaySeconds: 120
+        periodSeconds: 20
+        failureThreshold: 3
       volumeMounts:
-        - name: app-root
-          mountPath: /opt/app-root/src/.llama
+        - name: rag-storage
+          mountPath: /opt/app-root/src/.llama/storage/rag
         - name: config
           mountPath: /opt/app-root/run.yaml
           subPath: run.yaml
   volumes:
-    - name: app-root
+    - name: rag-storage
       emptyDir: {}
     - name: config
       configMap:
diff --git a/tests/e2e-prow/rhoai/pipeline-services.sh b/tests/e2e-prow/rhoai/pipeline-services.sh
index cd33ab9d5..73aed5671 100755
--- a/tests/e2e-prow/rhoai/pipeline-services.sh
+++ b/tests/e2e-prow/rhoai/pipeline-services.sh
@@ -1,27 +1,30 @@
 #!/bin/bash
 
 BASE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+NAMESPACE="${NAMESPACE:-e2e-rhoai-dsc}"
 
-# Deploy llama-stack
-envsubst < "$BASE_DIR/manifests/lightspeed/llama-stack.yaml" | oc apply -f -
+# Deploy llama-stack (substitute only LLAMA_STACK_IMAGE, leave other ${} intact)
+envsubst '${LLAMA_STACK_IMAGE}' < "$BASE_DIR/manifests/lightspeed/llama-stack.yaml" | oc apply -n "$NAMESPACE" -f -
 
 oc wait pod/llama-stack-service \
-  -n e2e-rhoai-dsc --for=condition=Ready --timeout=600s
+  -n "$NAMESPACE" --for=condition=Ready --timeout=600s
 
 # Get url address of llama-stack pod
-oc label pod llama-stack-service pod=llama-stack-service -n e2e-rhoai-dsc
+oc label pod llama-stack-service pod=llama-stack-service -n "$NAMESPACE"
 
 oc expose pod llama-stack-service \
   --name=llama-stack-service-svc \
   --port=8321 \
   --type=ClusterIP \
-  -n e2e-rhoai-dsc
+  -n "$NAMESPACE"
 
-export E2E_LLAMA_HOSTNAME="llama-stack-service-svc.e2e-rhoai-dsc.svc.cluster.local"
+export E2E_LLAMA_HOSTNAME="llama-stack-service-svc.${NAMESPACE}.svc.cluster.local"
 
 oc create secret generic llama-stack-ip-secret \
     --from-literal=key="$E2E_LLAMA_HOSTNAME" \
-    -n e2e-rhoai-dsc || echo "Secret exists"
+    -n "$NAMESPACE" || echo "Secret exists"
 
-# Deploy lightspeed-stack
-oc apply -f "$BASE_DIR/manifests/lightspeed/lightspeed-stack.yaml"
+# Deploy lightspeed-stack (substitute only LIGHTSPEED_STACK_IMAGE, leave other ${} intact)
+LIGHTSPEED_STACK_IMAGE="${LIGHTSPEED_STACK_IMAGE:-quay.io/lightspeed-core/lightspeed-stack:dev-latest}"
+export LIGHTSPEED_STACK_IMAGE
+envsubst '${LIGHTSPEED_STACK_IMAGE}' < "$BASE_DIR/manifests/lightspeed/lightspeed-stack.yaml" | oc apply -n "$NAMESPACE" -f -
diff --git a/tests/e2e-prow/rhoai/pipeline.sh b/tests/e2e-prow/rhoai/pipeline.sh
index 567d07c86..59bbe1a1b 100755
--- a/tests/e2e-prow/rhoai/pipeline.sh
+++ b/tests/e2e-prow/rhoai/pipeline.sh
@@ -8,14 +8,15 @@ export RUNNING_PROW=true
 #========================================
 # 1. GLOBAL CONFIG
 #========================================
-NAMESPACE="e2e-rhoai-dsc"
+NAMESPACE="${NAMESPACE:-e2e-rhoai-dsc}"
+export NAMESPACE
 MODEL_NAME="meta-llama/Llama-3.1-8B-Instruct"
 PIPELINE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 
-# RHOAI llama-stack image
-LLAMA_STACK_IMAGE="${LLAMA_STACK_IMAGE:-quay.io/rhoai/odh-llama-stack-core-rhel9:rhoai-3.4-ea.2}"
-echo "Using llama-stack image: $LLAMA_STACK_IMAGE"
-export LLAMA_STACK_IMAGE
+# RHOAI llama-stack image (unused when building from source via llama-stack-openai.yaml)
+# LLAMA_STACK_IMAGE="${LLAMA_STACK_IMAGE:-quay.io/rhoai/odh-llama-stack-core-rhel9:rhoai-3.4-ea.2}"
+# echo "Using llama-stack image: $LLAMA_STACK_IMAGE"
+# export LLAMA_STACK_IMAGE
 
 #========================================
 # 2. ENVIRONMENT SETUP
@@ -56,6 +57,22 @@ create_secret() {
 
 create_secret hf-token-secret --from-literal=token="$HUGGING_FACE_HUB_TOKEN"
 create_secret vllm-api-key-secret --from-literal=key="$VLLM_API_KEY"
+create_secret openai-api-key-secret --from-literal=key=""
+
+# MCP token secrets for lightspeed-stack
+REPO_ROOT="$(cd "$PIPELINE_DIR/../../.." && pwd)"
+if [ -f "$REPO_ROOT/tests/e2e/secrets/mcp-token" ]; then
+  oc create secret generic mcp-file-auth-token -n "$NAMESPACE" \
+    --from-file=token="$REPO_ROOT/tests/e2e/secrets/mcp-token" \
+    --dry-run=client -o yaml | oc apply -n "$NAMESPACE" -f -
+  echo "✅ mcp-file-auth-token secret applied"
+fi
+if [ -f "$REPO_ROOT/tests/e2e/secrets/invalid-mcp-token" ]; then
+  oc create secret generic mcp-invalid-file-auth-token -n "$NAMESPACE" \
+    --from-file=token="$REPO_ROOT/tests/e2e/secrets/invalid-mcp-token" \
+    --dry-run=client -o yaml | oc apply -n "$NAMESPACE" -f -
+  echo "✅ mcp-invalid-file-auth-token secret applied"
+fi
 
 # Create Quay pull secret for llama-stack images
 echo "Creating Quay pull secret..."
@@ -79,7 +96,7 @@ curl -sL -o tool_chat_template_llama3.1_json.jinja \
     || { echo "❌ Failed to download jinja template"; exit 1; }
 
 oc create configmap vllm-chat-template -n "$NAMESPACE" \
-    --from-file=tool_chat_template_llama3.1_json.jinja --dry-run=client -o yaml | oc apply -f -
+    --from-file=tool_chat_template_llama3.1_json.jinja --dry-run=client -o yaml | oc apply -n "$NAMESPACE" -f -
 
 
 #========================================
@@ -162,18 +179,18 @@ REPO_ROOT="$(cd "$PIPELINE_DIR/../../.." && pwd)"
 echo "Creating mock server ConfigMaps..."
 oc create configmap mock-jwks-script -n "$NAMESPACE" \
     --from-file=server.py="$REPO_ROOT/tests/e2e/mock_jwks_server/server.py" \
-    --dry-run=client -o yaml | oc apply -f -
+    --dry-run=client -o yaml | oc apply -n "$NAMESPACE" -f -
 
 oc create configmap mock-mcp-script -n "$NAMESPACE" \
     --from-file=server.py="$REPO_ROOT/tests/e2e/mock_mcp_server/server.py" \
-    --dry-run=client -o yaml | oc apply -f -
+    --dry-run=client -o yaml | oc apply -n "$NAMESPACE" -f -
 
 # Deploy mock server pods and services
 echo "Deploying mock-jwks..."
-oc apply -f "$PIPELINE_DIR/manifests/lightspeed/mock-jwks.yaml"
+oc apply -n "$NAMESPACE" -f "$PIPELINE_DIR/manifests/lightspeed/mock-jwks.yaml"
 
 echo "Deploying mock-mcp..."
-oc apply -f "$PIPELINE_DIR/manifests/lightspeed/mock-mcp.yaml"
+oc apply -n "$NAMESPACE" -f "$PIPELINE_DIR/manifests/lightspeed/mock-mcp.yaml"
 
 # Wait for mock servers to be ready
 echo "Waiting for mock servers to be ready..."
@@ -189,7 +206,39 @@ oc wait pod/mock-jwks pod/mock-mcp \
 echo "✅ Mock servers deployed"
 
 #========================================
-# 8. DEPLOY LIGHTSPEED STACK AND LLAMA STACK
+# 8. BUILD LLAMA STACK IMAGE
+#========================================
+echo "===== Building llama-stack image ====="
+LLAMA_STACK_IMAGE="image-registry.openshift-image-registry.svc:5000/${NAMESPACE}/llama-stack-e2e:latest"
+export LLAMA_STACK_IMAGE
+
+# Create BuildConfig (idempotent)
+oc new-build --name=llama-stack-e2e \
+  --binary \
+  --strategy=docker \
+  --image="registry.access.redhat.com/ubi9/ubi-minimal" \
+  --to="llama-stack-e2e:latest" \
+  -n "$NAMESPACE" 2>/dev/null || echo "BuildConfig llama-stack-e2e already exists"
+
+# Patch BuildConfig to use test.containerfile instead of Dockerfile
+oc patch bc llama-stack-e2e -n "$NAMESPACE" --type=json \
+  -p '[{"op":"replace","path":"/spec/strategy/dockerStrategy/dockerfilePath","value":"test.containerfile"}]' 2>/dev/null || true
+
+# Build from repo root
+oc start-build llama-stack-e2e \
+  --from-dir="$REPO_ROOT" \
+  --follow \
+  -n "$NAMESPACE" || { echo "❌ llama-stack image build failed"; exit 1; }
+
+echo "✅ llama-stack image built: $LLAMA_STACK_IMAGE"
+
+# Allow default SA to pull from the internal registry
+oc policy add-role-to-user system:image-puller \
+  system:serviceaccount:${NAMESPACE}:default \
+  -n "$NAMESPACE" 2>/dev/null || true
+
+#========================================
+# 9. DEPLOY LIGHTSPEED STACK AND LLAMA STACK
 #========================================
 echo "===== Deploying Services ====="
 
@@ -313,6 +362,15 @@ for i in $(seq 1 36); do
   fi
   if [ $i -eq 36 ]; then
     echo "❌ Port-forward to lightspeed-stack never became ready (3 min)"
+    echo ""
+    echo "DEBUG: lightspeed-stack-service logs:"
+    oc logs lightspeed-stack-service -n "$NAMESPACE" --tail=100 || true
+    echo ""
+    echo "DEBUG: llama-stack-service logs:"
+    oc logs llama-stack-service -n "$NAMESPACE" --tail=100 || true
+    echo ""
+    echo "DEBUG: Pod status:"
+    oc get pods -n "$NAMESPACE" -o wide || true
     kill $PF_LCS_PID 2>/dev/null || true
     kill $PF_JWKS_PID 2>/dev/null || true
     exit 1

From dc88586dc4e6c99404a362470e017631714693f7 Mon Sep 17 00:00:00 2001
From: are-ces <195810094+are-ces@users.noreply.github.com>
Date: Mon, 30 Mar 2026 08:54:54 +0200
Subject: [PATCH 04/18] Add namespace diagnostic logging to prow pipeline

Add DEBUG NS checkpoints to trace when e2e-rhoai-dsc namespace
disappears during operator bootstrapping.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 tests/e2e-prow/rhoai/pipeline.sh            | 2 ++
 tests/e2e-prow/rhoai/scripts/bootstrap.sh   | 1 +
 tests/e2e-prow/rhoai/scripts/deploy-vllm.sh | 1 +
 tests/e2e-prow/rhoai/scripts/gpu-setup.sh   | 1 +
 4 files changed, 5 insertions(+)

diff --git a/tests/e2e-prow/rhoai/pipeline.sh b/tests/e2e-prow/rhoai/pipeline.sh
index 59bbe1a1b..13ff0bbd4 100755
--- a/tests/e2e-prow/rhoai/pipeline.sh
+++ b/tests/e2e-prow/rhoai/pipeline.sh
@@ -43,6 +43,7 @@ oc whoami
 #========================================
 echo "===== Creating namespace & secrets ====="
 oc get ns "$NAMESPACE" >/dev/null 2>&1 || oc create namespace "$NAMESPACE"
+echo "DEBUG NS: after create -> $(oc get ns $NAMESPACE -o jsonpath='{.status.phase}' 2>&1)"
 
 # Create NFD and NVIDIA namespaces
 oc apply -f "$PIPELINE_DIR/manifests/namespaces/nfd.yaml"
@@ -102,6 +103,7 @@ oc create configmap vllm-chat-template -n "$NAMESPACE" \
 #========================================
 # 5. DEPLOY vLLM
 #========================================
+echo "DEBUG NS: before pipeline-vllm -> $(oc get ns $NAMESPACE -o jsonpath='{.status.phase}' 2>&1)"
 echo "===== Deploying vLLM ====="
 ./pipeline-vllm.sh
 oc get pods -n "$NAMESPACE"
diff --git a/tests/e2e-prow/rhoai/scripts/bootstrap.sh b/tests/e2e-prow/rhoai/scripts/bootstrap.sh
index 1718b70e5..8b7c063c5 100755
--- a/tests/e2e-prow/rhoai/scripts/bootstrap.sh
+++ b/tests/e2e-prow/rhoai/scripts/bootstrap.sh
@@ -94,3 +94,4 @@ echo "--> Applying DataScienceCluster from ds-cluster.yaml..."
 oc apply -f "$BASE_DIR/manifests/operators/ds-cluster.yaml"
 
 echo "All files applied successfully. The DataScienceCluster is now provisioning."
+echo "DEBUG NS: after bootstrap -> $(oc get ns e2e-rhoai-dsc -o jsonpath='{.status.phase}' 2>&1)"
diff --git a/tests/e2e-prow/rhoai/scripts/deploy-vllm.sh b/tests/e2e-prow/rhoai/scripts/deploy-vllm.sh
index 5c3201fa5..ec3292d0b 100755
--- a/tests/e2e-prow/rhoai/scripts/deploy-vllm.sh
+++ b/tests/e2e-prow/rhoai/scripts/deploy-vllm.sh
@@ -67,6 +67,7 @@ echo "✅ GPU capacity available."
 echo "GPU nodes ready:"
 oc get nodes -l nvidia.com/gpu.present=true -o custom-columns=NAME:.metadata.name,GPU:.status.capacity.nvidia\\.com/gpu,INSTANCE:.metadata.labels.node\\.kubernetes\\.io/instance-type
 
+echo "DEBUG NS: before vLLM deploy -> $(oc get ns e2e-rhoai-dsc -o jsonpath='{.status.phase}' 2>&1)"
 echo "Applying vLLM manifests..."
 
 envsubst < "$BASE_DIR/manifests/vllm/vllm-runtime-gpu.yaml" | oc apply -f -
diff --git a/tests/e2e-prow/rhoai/scripts/gpu-setup.sh b/tests/e2e-prow/rhoai/scripts/gpu-setup.sh
index d72d744bb..ab009a1a3 100755
--- a/tests/e2e-prow/rhoai/scripts/gpu-setup.sh
+++ b/tests/e2e-prow/rhoai/scripts/gpu-setup.sh
@@ -206,3 +206,4 @@ echo ""
 echo "ClusterPolicy Status:"
 oc get clusterpolicy gpu-cluster-policy -o jsonpath='{.status.state}'
 echo ""
+echo "DEBUG NS: after gpu-setup -> $(oc get ns e2e-rhoai-dsc -o jsonpath='{.status.phase}' 2>&1)"

From 8aaee739e127067ba3d95ac467623fc2cb007256 Mon Sep 17 00:00:00 2001
From: are-ces <195810094+are-ces@users.noreply.github.com>
Date: Thu, 9 Apr 2026 13:11:14 +0200
Subject: [PATCH 05/18] Remove namespace from cluster-scoped DataScienceCluster
 CR

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 tests/e2e-prow/rhoai/manifests/operators/ds-cluster.yaml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/e2e-prow/rhoai/manifests/operators/ds-cluster.yaml b/tests/e2e-prow/rhoai/manifests/operators/ds-cluster.yaml
index e9b619726..d57226cc1 100644
--- a/tests/e2e-prow/rhoai/manifests/operators/ds-cluster.yaml
+++ b/tests/e2e-prow/rhoai/manifests/operators/ds-cluster.yaml
@@ -2,7 +2,6 @@ apiVersion: datasciencecluster.opendatahub.io/v1
 kind: DataScienceCluster
 metadata:
   name: default-dsc
-  namespace: e2e-rhoai-dsc
 spec:
   serviceMesh:
     managementState: Managed

From 6bc89989258c0459c620a41921197b5b5ed96e87 Mon Sep 17 00:00:00 2001
From: are-ces <195810094+are-ces@users.noreply.github.com>
Date: Thu, 9 Apr 2026 15:33:12 +0200
Subject: [PATCH 06/18] Add model/provider override env vars to prow pipeline

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 tests/e2e-prow/rhoai/pipeline.sh | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/e2e-prow/rhoai/pipeline.sh b/tests/e2e-prow/rhoai/pipeline.sh
index 13ff0bbd4..b8b7a03a6 100755
--- a/tests/e2e-prow/rhoai/pipeline.sh
+++ b/tests/e2e-prow/rhoai/pipeline.sh
@@ -388,6 +388,8 @@ done
 
 export E2E_LSC_HOSTNAME="localhost"
 export E2E_JWKS_HOSTNAME="localhost"
+export E2E_DEFAULT_MODEL_OVERRIDE="$MODEL_NAME"
+export E2E_DEFAULT_PROVIDER_OVERRIDE="vllm"
 echo "LCS accessible at: http://$E2E_LSC_HOSTNAME:8080"
 echo "Mock JWKS accessible at: http://$E2E_JWKS_HOSTNAME:8000"
 

From 1f2bebb2c2fc9e47cf60f1aeac8b41703458d7ad Mon Sep 17 00:00:00 2001
From: are-ces <195810094+are-ces@users.noreply.github.com>
Date: Fri, 10 Apr 2026 09:23:49 +0200
Subject: [PATCH 07/18] Fix prow pipeline: run bootstrap before namespace
 creation

The RHOAI operator deletes the e2e-rhoai-dsc namespace during DSC
reconciliation. Reorder pipeline to run operator bootstrap first,
then create namespace and secrets after DSC settles.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 tests/e2e-prow/rhoai/pipeline.sh          | 28 +++++++++++++++--------
 tests/e2e-prow/rhoai/scripts/bootstrap.sh | 26 ++++++++++++++++++++-
 2 files changed, 43 insertions(+), 11 deletions(-)

diff --git a/tests/e2e-prow/rhoai/pipeline.sh b/tests/e2e-prow/rhoai/pipeline.sh
index b8b7a03a6..3c38ccf2c 100755
--- a/tests/e2e-prow/rhoai/pipeline.sh
+++ b/tests/e2e-prow/rhoai/pipeline.sh
@@ -39,16 +39,22 @@ oc version
 oc whoami
 
 #========================================
-# 3. CREATE NAMESPACE & SECRETS
+# 3. BOOTSTRAP OPERATORS & DSC (before namespace — DSC operator may delete it)
 #========================================
-echo "===== Creating namespace & secrets ====="
-oc get ns "$NAMESPACE" >/dev/null 2>&1 || oc create namespace "$NAMESPACE"
-echo "DEBUG NS: after create -> $(oc get ns $NAMESPACE -o jsonpath='{.status.phase}' 2>&1)"
-
-# Create NFD and NVIDIA namespaces
+echo "===== Bootstrapping operators ====="
+# Create NFD and NVIDIA namespaces (needed by operator subscriptions)
 oc apply -f "$PIPELINE_DIR/manifests/namespaces/nfd.yaml"
 oc apply -f "$PIPELINE_DIR/manifests/namespaces/nvidia-operator.yaml"
 
+# Install operators and apply DataScienceCluster (this may delete/recreate namespaces)
+"$PIPELINE_DIR/scripts/bootstrap.sh" "$PIPELINE_DIR"
+
+#========================================
+# 4. CREATE NAMESPACE & SECRETS (after DSC settles)
+#========================================
+echo "===== Creating namespace & secrets ====="
+oc get ns "$NAMESPACE" >/dev/null 2>&1 || oc create namespace "$NAMESPACE"
+echo "DEBUG NS: after create -> $(oc get ns $NAMESPACE -o jsonpath='{.status.phase}' 2>&1)"
 
 create_secret() {
     local name=$1; shift
@@ -88,7 +94,7 @@ oc secrets link default quay-lightspeed-pull-secret --for=pull -n "$NAMESPACE" 2
 
 
 #========================================
-# 4. CONFIGMAPS
+# 5. CONFIGMAPS
 #========================================
 echo "===== Setting up configmaps ====="
 
@@ -101,11 +107,13 @@ oc create configmap vllm-chat-template -n "$NAMESPACE" \
 
 
 #========================================
-# 5. DEPLOY vLLM
+# 6. DEPLOY vLLM (GPU setup + deploy, bootstrap already done)
 #========================================
-echo "DEBUG NS: before pipeline-vllm -> $(oc get ns $NAMESPACE -o jsonpath='{.status.phase}' 2>&1)"
 echo "===== Deploying vLLM ====="
-./pipeline-vllm.sh
+"$PIPELINE_DIR/scripts/gpu-setup.sh" "$PIPELINE_DIR"
+source "$PIPELINE_DIR/scripts/fetch-vllm-image.sh"
+"$PIPELINE_DIR/scripts/deploy-vllm.sh" "$PIPELINE_DIR"
+"$PIPELINE_DIR/scripts/get-vllm-pod-info.sh"
 oc get pods -n "$NAMESPACE"
 
 
diff --git a/tests/e2e-prow/rhoai/scripts/bootstrap.sh b/tests/e2e-prow/rhoai/scripts/bootstrap.sh
index 8b7c063c5..d5ff94f3d 100755
--- a/tests/e2e-prow/rhoai/scripts/bootstrap.sh
+++ b/tests/e2e-prow/rhoai/scripts/bootstrap.sh
@@ -20,15 +20,23 @@ wait_for_operator() {
 }
 
 # APPLY OPERATOR SUBSCRIPTIONS
+NAMESPACE="${NAMESPACE:-e2e-rhoai-dsc}"
+ns_check() { echo "DEBUG NS ($1): $(oc get ns $NAMESPACE -o jsonpath='{.status.phase}' 2>&1)"; }
+
+ns_check "before operatorgroups"
 echo "--> Applying OperatorGroups from operatorgroup.yaml..."
 oc apply -f "$BASE_DIR/manifests/operators/operatorgroup.yaml"
+ns_check "after operatorgroups"
 
 sleep 10
+ns_check "after 10s sleep (post operatorgroups)"
 
 echo "--> Applying Operator Subscriptions from operators.yaml..."
 oc apply -f "$BASE_DIR/manifests/operators/operators.yaml"
+ns_check "after operator subscriptions"
 
 sleep 10
+ns_check "after 10s sleep (post subscriptions)"
 
 # WAIT FOR GPU OPERATOR NAMESPACE AND OPERATORGROUP
 echo "--> Ensuring GPU Operator namespace and OperatorGroup are ready..."
@@ -50,8 +58,11 @@ echo "--> Waiting for Operators to be installed. This can take several minutes..
 oc wait --for=condition=established --timeout=300s crd/clusterserviceversions.operators.coreos.com
 
 wait_for_operator "operators.coreos.com/servicemeshoperator.openshift-operators" "openshift-operators" "Service Mesh Operator"
+ns_check "after Service Mesh Operator ready"
 wait_for_operator "operators.coreos.com/serverless-operator.openshift-operators" "openshift-operators" "Serverless Operator"
+ns_check "after Serverless Operator ready"
 wait_for_operator "operators.coreos.com/rhods-operator.openshift-operators" "openshift-operators" "RHODS Operator"
+ns_check "after RHODS Operator ready"
 
 # Verify GPU operator InstallPlan was created before waiting for CSV
 echo "  -> Verifying GPU Operator InstallPlan was created..."
@@ -79,19 +90,32 @@ done
 echo "  -> InstallPlan created successfully"
 
 wait_for_operator "operators.coreos.com/gpu-operator-certified.nvidia-gpu-operator" "nvidia-gpu-operator" "GPU Operator"
+ns_check "after GPU Operator ready"
 wait_for_operator "operators.coreos.com/nfd.openshift-nfd" "openshift-nfd" "NFD Operator"
+ns_check "after NFD Operator ready"
 
 echo "  -> Waiting for NFD CRD to be established..."
 oc wait --for=condition=established --timeout=300s crd/nodefeaturediscoveries.nfd.openshift.io
 
 echo "--> All operators are ready."
+ns_check "after all operators ready"
 
 oc get csv -n openshift-operators
 oc get csv -n nvidia-gpu-operator
 oc get csv -n openshift-nfd
 
+ns_check "before DSC apply"
 echo "--> Applying DataScienceCluster from ds-cluster.yaml..."
 oc apply -f "$BASE_DIR/manifests/operators/ds-cluster.yaml"
+ns_check "immediately after DSC apply"
+sleep 5
+ns_check "5s after DSC apply"
+sleep 10
+ns_check "15s after DSC apply"
+
+echo "--> Checking DSCInitialization and DSC status..."
+oc get dsci -A -o jsonpath='{range .items[*]}DSCI: {.metadata.name} applicationsNS: {.spec.applicationsNamespace}{"\n"}{end}' 2>/dev/null || echo "No DSCInitialization found"
+oc get dsc -A -o jsonpath='{range .items[*]}DSC: {.metadata.name} phase: {.status.phase}{"\n"}{end}' 2>/dev/null || echo "No DSC status yet"
 
 echo "All files applied successfully. The DataScienceCluster is now provisioning."
-echo "DEBUG NS: after bootstrap -> $(oc get ns e2e-rhoai-dsc -o jsonpath='{.status.phase}' 2>&1)"
+ns_check "end of bootstrap"

From c3174992f75cd67e75a9dde0e8e9a51e8ac28d5f Mon Sep 17 00:00:00 2001
From: are-ces <195810094+are-ces@users.noreply.github.com>
Date: Fri, 10 Apr 2026 12:36:31 +0200
Subject: [PATCH 08/18] Hardcode NAMESPACE to avoid Prow env override

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 tests/e2e-prow/rhoai/pipeline.sh | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tests/e2e-prow/rhoai/pipeline.sh b/tests/e2e-prow/rhoai/pipeline.sh
index 3c38ccf2c..696c04277 100755
--- a/tests/e2e-prow/rhoai/pipeline.sh
+++ b/tests/e2e-prow/rhoai/pipeline.sh
@@ -8,7 +8,7 @@ export RUNNING_PROW=true
 #========================================
 # 1. GLOBAL CONFIG
 #========================================
-NAMESPACE="${NAMESPACE:-e2e-rhoai-dsc}"
+NAMESPACE="e2e-rhoai-dsc"
 export NAMESPACE
 MODEL_NAME="meta-llama/Llama-3.1-8B-Instruct"
 PIPELINE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
@@ -22,10 +22,10 @@ PIPELINE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 # 2. ENVIRONMENT SETUP
 #========================================
 echo "===== Setting up environment variables ====="
-export HUGGING_FACE_HUB_TOKEN=$(cat /var/run/huggingface/hf-token-ces-lcore-test || true)
-export VLLM_API_KEY=$(cat /var/run/vllm/vllm-api-key-lcore-test || true)
-export QUAY_ROBOT_NAME=$(cat /var/run/quay-aipcc-name/lcore-quay-name-lcore-test || true)
-export QUAY_ROBOT_PASSWORD=$(cat /var/run/quay-aipcc-password/lcore-quay-password-lcore-test || true)
+# export HUGGING_FACE_HUB_TOKEN=$(cat /var/run/huggingface/hf-token-ces-lcore-test || true)
+# export VLLM_API_KEY=$(cat /var/run/vllm/vllm-api-key-lcore-test || true)
+# export QUAY_ROBOT_NAME=$(cat /var/run/quay-aipcc-name/lcore-quay-name-lcore-test || true)
+# export QUAY_ROBOT_PASSWORD=$(cat /var/run/quay-aipcc-password/lcore-quay-password-lcore-test || true)
 
 
 [[ -n "$HUGGING_FACE_HUB_TOKEN" ]] && echo "✅ HUGGING_FACE_HUB_TOKEN is set" || { echo "❌ Missing HUGGING_FACE_HUB_TOKEN"; exit 1; }

From 4e2a4bdffaf511c67d3ca06f3afd7578b481a2d2 Mon Sep 17 00:00:00 2001
From: are-ces <195810094+are-ces@users.noreply.github.com>
Date: Fri, 10 Apr 2026 16:11:33 +0200
Subject: [PATCH 09/18] Re-enable secret exports in prow pipeline

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 tests/e2e-prow/rhoai/pipeline.sh | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/e2e-prow/rhoai/pipeline.sh b/tests/e2e-prow/rhoai/pipeline.sh
index 696c04277..2f15b2c93 100755
--- a/tests/e2e-prow/rhoai/pipeline.sh
+++ b/tests/e2e-prow/rhoai/pipeline.sh
@@ -22,10 +22,10 @@ PIPELINE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 # 2. ENVIRONMENT SETUP
 #========================================
 echo "===== Setting up environment variables ====="
-# export HUGGING_FACE_HUB_TOKEN=$(cat /var/run/huggingface/hf-token-ces-lcore-test || true)
-# export VLLM_API_KEY=$(cat /var/run/vllm/vllm-api-key-lcore-test || true)
-# export QUAY_ROBOT_NAME=$(cat /var/run/quay-aipcc-name/lcore-quay-name-lcore-test || true)
-# export QUAY_ROBOT_PASSWORD=$(cat /var/run/quay-aipcc-password/lcore-quay-password-lcore-test || true)
+export HUGGING_FACE_HUB_TOKEN=$(cat /var/run/huggingface/hf-token-ces-lcore-test || true)
+export VLLM_API_KEY=$(cat /var/run/vllm/vllm-api-key-lcore-test || true)
+export QUAY_ROBOT_NAME=$(cat /var/run/quay-aipcc-name/lcore-quay-name-lcore-test || true)
+export QUAY_ROBOT_PASSWORD=$(cat /var/run/quay-aipcc-password/lcore-quay-password-lcore-test || true)
 
 
 [[ -n "$HUGGING_FACE_HUB_TOKEN" ]] && echo "✅ HUGGING_FACE_HUB_TOKEN is set" || { echo "❌ Missing HUGGING_FACE_HUB_TOKEN"; exit 1; }

From f032f4abef364c37f7dbe1b15fe90b64cd0afff8 Mon Sep 17 00:00:00 2001
From: are-ces <195810094+are-ces@users.noreply.github.com>
Date: Fri, 17 Apr 2026 12:39:08 +0200
Subject: [PATCH 10/18] Add enrichment and RAG restore to prow llama-stack
 manifest

Rename llama-stack.yaml to llama-stack-prow.yaml and add:
- Config enrichment via llama_stack_configuration.py
- restore_rag_seed() to re-inflate RAG db after enrichment
- PYTHONPATH, lightspeed-stack.yaml mount, rag-data mount
- materialize-run-yaml init container
- Model/provider overrides in inline_rag e2e tests

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .../lightspeed/llama-stack-prow.yaml          | 203 ++++++++++++++++++
 .../manifests/lightspeed/llama-stack.yaml     | 118 ----------
 tests/e2e-prow/rhoai/pipeline-services.sh     |   2 +-
 tests/e2e-prow/rhoai/scripts/e2e-ops.sh       |   4 +-
 4 files changed, 206 insertions(+), 121 deletions(-)
 create mode 100644 tests/e2e-prow/rhoai/manifests/lightspeed/llama-stack-prow.yaml
 delete mode 100644 tests/e2e-prow/rhoai/manifests/lightspeed/llama-stack.yaml

diff --git a/tests/e2e-prow/rhoai/manifests/lightspeed/llama-stack-prow.yaml b/tests/e2e-prow/rhoai/manifests/lightspeed/llama-stack-prow.yaml
new file mode 100644
index 000000000..bf03f2871
--- /dev/null
+++ b/tests/e2e-prow/rhoai/manifests/lightspeed/llama-stack-prow.yaml
@@ -0,0 +1,203 @@
+# Llama Stack pod for Prow: uses pre-built image with enrichment + RAG restore.
+#
+# Requires: ConfigMap llama-stack-config (run.yaml), ConfigMap rag-data (kv_store.db.gz),
+#           ConfigMap lightspeed-stack-config (lightspeed-stack.yaml).
+# Requires: Image built as ${LLAMA_STACK_IMAGE} (set by pipeline.sh).
+#
+apiVersion: v1
+kind: Pod
+metadata:
+  name: llama-stack-service
+  labels:
+    pod: llama-stack-service
+spec:
+  securityContext:
+    seccompProfile:
+      type: RuntimeDefault
+  initContainers:
+    - name: setup-rag-data
+      image: busybox:latest
+      securityContext:
+        allowPrivilegeEscalation: false
+        capabilities:
+          drop: ["ALL"]
+        runAsNonRoot: true
+        runAsUser: 65534
+        seccompProfile:
+          type: RuntimeDefault
+      command:
+        - /bin/sh
+        - -c
+        - |
+          set -e
+          mkdir -p /data/src/.llama/storage/rag /data/src/.llama/storage/files /data/.e2e-rag-seed
+          if [ ! -f /rag-data/kv_store.db.gz ]; then
+            echo "FATAL: missing /rag-data/kv_store.db.gz"
+            ls -la /rag-data || true
+            exit 1
+          fi
+          gunzip -c /rag-data/kv_store.db.gz > /data/.e2e-rag-seed/kv_store.db
+          cp -f /data/.e2e-rag-seed/kv_store.db /data/src/.llama/storage/rag/kv_store.db
+          chmod -R 777 /data
+          echo "RAG data extracted successfully"
+      volumeMounts:
+        - name: rag-storage
+          mountPath: /data
+        - name: rag-data
+          mountPath: /rag-data
+    - name: materialize-run-yaml
+      image: busybox:latest
+      securityContext:
+        allowPrivilegeEscalation: false
+        capabilities:
+          drop: ["ALL"]
+        runAsNonRoot: true
+        runAsUser: 65534
+        seccompProfile:
+          type: RuntimeDefault
+      command:
+        - /bin/sh
+        - -c
+        - |
+          set -e
+          cp /cm/run.yaml /work/run.yaml
+          chmod 664 /work/run.yaml
+      volumeMounts:
+        - name: config-cm
+          mountPath: /cm
+          readOnly: true
+        - name: rag-storage
+          mountPath: /work
+  containers:
+    - name: llama-stack-container
+      image: ${LLAMA_STACK_IMAGE}
+      securityContext:
+        allowPrivilegeEscalation: false
+        capabilities:
+          drop: ["ALL"]
+        runAsNonRoot: true
+        runAsUser: 1001
+        seccompProfile:
+          type: RuntimeDefault
+      workingDir: /opt/app-root
+      env:
+        - name: PYTHONPATH
+          value: "/opt/app-root/src"
+        - name: HOME
+          value: "/opt/app-root/src"
+        - name: KV_STORE_PATH
+          value: "/opt/app-root/src/.llama/storage/kv_store.db"
+        - name: KV_RAG_PATH
+          value: "/opt/app-root/src/.llama/storage/rag/kv_store.db"
+        - name: SQL_STORE_PATH
+          value: "/opt/app-root/src/.llama/storage/sql_store.db"
+        - name: KSVC_URL
+          valueFrom:
+            secretKeyRef:
+              name: api-url-secret
+              key: key
+        - name: VLLM_API_KEY
+          valueFrom:
+            secretKeyRef:
+              name: vllm-api-key-secret
+              key: key
+        - name: INFERENCE_MODEL
+          value: "meta-llama/Llama-3.1-8B-Instruct"
+        - name: OPENAI_API_KEY
+          valueFrom:
+            secretKeyRef:
+              name: openai-api-key-secret
+              key: key
+              optional: true
+        - name: E2E_OPENAI_MODEL
+          value: "gpt-4o-mini"
+        - name: FAISS_VECTOR_STORE_ID
+          valueFrom:
+            secretKeyRef:
+              name: faiss-vector-store-secret
+              key: id
+        - name: E2E_LLAMA_HOSTNAME
+          valueFrom:
+            secretKeyRef:
+              name: llama-stack-ip-secret
+              key: key
+      command:
+        - /bin/bash
+        - -c
+        - |
+          set -e
+          RAG_SEED="/opt/app-root/src/.llama/storage/.e2e-rag-seed/kv_store.db"
+          RAG_CM_GZ="/opt/app-root/rag-data-cm/kv_store.db.gz"
+          RAG_WORK="${KV_RAG_PATH:-/opt/app-root/src/.llama/storage/rag/kv_store.db}"
+          restore_rag_seed() {
+            mkdir -p "$(dirname "$RAG_WORK")"
+            if [[ -f "$RAG_CM_GZ" ]]; then
+              RAG_WORK="$RAG_WORK" RAG_CM_GZ="$RAG_CM_GZ" python3 -c 'import gzip, os, shutil, sys; r, g = os.environ["RAG_WORK"], os.environ["RAG_CM_GZ"]; t = r + ".tmp"; i = gzip.open(g, "rb"); o = open(t, "wb"); shutil.copyfileobj(i, o); i.close(); o.close(); sz = os.path.getsize(t); (sz >= 1048576) or (print("FATAL: RAG from ConfigMap too small:", sz, file=sys.stderr) or sys.exit(1)); os.replace(t, r); os.chmod(r, 0o664)' || exit 1
+            elif [[ -f "$RAG_SEED" ]]; then
+              cp -f "$RAG_SEED" "$RAG_WORK"
+              chmod 664 "$RAG_WORK" 2>/dev/null || true
+            fi
+          }
+          restore_rag_seed
+          INPUT_CONFIG="${LLAMA_STACK_CONFIG:-/opt/app-root/run.yaml}"
+          ENRICHED_CONFIG="/opt/app-root/run.yaml"
+          LIGHTSPEED_CONFIG="${LIGHTSPEED_CONFIG:-/opt/app-root/lightspeed-stack.yaml}"
+          ENV_FILE="/opt/app-root/.env"
+          if [[ -f "$LIGHTSPEED_CONFIG" ]]; then
+            echo "Enriching llama-stack config..."
+            ENRICHMENT_FAILED=0
+            python3 /opt/app-root/src/llama_stack_configuration.py \
+              -c "$LIGHTSPEED_CONFIG" \
+              -i "$INPUT_CONFIG" \
+              -o "$ENRICHED_CONFIG" \
+              -e "$ENV_FILE" 2>&1 || ENRICHMENT_FAILED=1
+            if [[ -f "$ENV_FILE" ]]; then
+              set -a && . "$ENV_FILE" && set +a
+            fi
+            if [[ -f "$ENRICHED_CONFIG" ]] && [[ "$ENRICHMENT_FAILED" -eq 0 ]]; then
+              echo "Using enriched config: $ENRICHED_CONFIG"
+              restore_rag_seed
+              exec llama stack run "$ENRICHED_CONFIG"
+            fi
+          fi
+          echo "Using original config: $INPUT_CONFIG"
+          restore_rag_seed
+          exec llama stack run "$INPUT_CONFIG"
+      ports:
+        - containerPort: 8321
+      readinessProbe:
+        httpGet:
+          path: /v1/health
+          port: 8321
+        initialDelaySeconds: 20
+        periodSeconds: 5
+        failureThreshold: 36
+      livenessProbe:
+        httpGet:
+          path: /v1/health
+          port: 8321
+        initialDelaySeconds: 120
+        periodSeconds: 20
+        failureThreshold: 3
+      volumeMounts:
+        - name: rag-storage
+          mountPath: /opt/app-root/src/.llama/storage
+        - name: lightspeed-config
+          mountPath: /opt/app-root/lightspeed-stack.yaml
+          subPath: lightspeed-stack.yaml
+          readOnly: true
+        - name: rag-data
+          mountPath: /opt/app-root/rag-data-cm
+          readOnly: true
+  volumes:
+    - name: rag-storage
+      emptyDir: {}
+    - name: config-cm
+      configMap:
+        name: llama-stack-config
+    - name: lightspeed-config
+      configMap:
+        name: lightspeed-stack-config
+    - name: rag-data
+      configMap:
+        name: rag-data
diff --git a/tests/e2e-prow/rhoai/manifests/lightspeed/llama-stack.yaml b/tests/e2e-prow/rhoai/manifests/lightspeed/llama-stack.yaml
deleted file mode 100644
index b04c43063..000000000
--- a/tests/e2e-prow/rhoai/manifests/lightspeed/llama-stack.yaml
+++ /dev/null
@@ -1,118 +0,0 @@
-# Llama Stack pod: uses pre-built image from in-cluster build.
-#
-# The image is built by pipeline.sh using oc new-build + test.containerfile.
-# Only a small init container extracts the RAG DB into the shared volume.
-#
-# Requires: ConfigMap llama-stack-config (run.yaml), ConfigMap rag-data (kv_store.db.gz).
-# Requires: Image built as ${LLAMA_STACK_IMAGE} (set by pipeline.sh).
-#
-apiVersion: v1
-kind: Pod
-metadata:
-  name: llama-stack-service
-  labels:
-    pod: llama-stack-service
-spec:
-  securityContext:
-    seccompProfile:
-      type: RuntimeDefault
-  initContainers:
-    - name: setup-rag-data
-      image: busybox:latest
-      securityContext:
-        allowPrivilegeEscalation: false
-        capabilities:
-          drop: ["ALL"]
-        runAsNonRoot: true
-        runAsUser: 65534
-        seccompProfile:
-          type: RuntimeDefault
-      command:
-        - /bin/sh
-        - -c
-        - |
-          mkdir -p /data/src/.llama/storage/rag /data/src/.llama/storage/files
-          chmod -R 777 /data
-          gunzip -c /rag-data/kv_store.db.gz > /data/src/.llama/storage/rag/kv_store.db
-          chmod -R 777 /data
-          echo "RAG data extracted successfully"
-      volumeMounts:
-        - name: rag-storage
-          mountPath: /data
-        - name: rag-data
-          mountPath: /rag-data
-  containers:
-    - name: llama-stack-container
-      image: ${LLAMA_STACK_IMAGE}
-      securityContext:
-        allowPrivilegeEscalation: false
-        capabilities:
-          drop: ["ALL"]
-        runAsNonRoot: true
-        runAsUser: 1001
-        seccompProfile:
-          type: RuntimeDefault
-      workingDir: /opt/app-root
-      env:
-        - name: KV_STORE_PATH
-          value: "/opt/app-root/src/.llama/storage/kv_store.db"
-        - name: KV_RAG_PATH
-          value: "/opt/app-root/src/.llama/storage/rag/kv_store.db"
-        - name: SQL_STORE_PATH
-          value: "/opt/app-root/src/.llama/storage/sql_store.db"
-        - name: KSVC_URL
-          valueFrom:
-            secretKeyRef:
-              name: api-url-secret
-              key: key
-        - name: VLLM_API_KEY
-          valueFrom:
-            secretKeyRef:
-              name: vllm-api-key-secret
-              key: key
-        - name: INFERENCE_MODEL
-          value: "meta-llama/Llama-3.1-8B-Instruct"
-        - name: OPENAI_API_KEY
-          valueFrom:
-            secretKeyRef:
-              name: openai-api-key-secret
-              key: key
-              optional: true
-        - name: E2E_OPENAI_MODEL
-          value: "gpt-4o-mini"
-        - name: FAISS_VECTOR_STORE_ID
-          valueFrom:
-            secretKeyRef:
-              name: faiss-vector-store-secret
-              key: id
-      ports:
-        - containerPort: 8321
-      readinessProbe:
-        httpGet:
-          path: /v1/health
-          port: 8321
-        initialDelaySeconds: 20
-        periodSeconds: 5
-        failureThreshold: 36
-      livenessProbe:
-        httpGet:
-          path: /v1/health
-          port: 8321
-        initialDelaySeconds: 120
-        periodSeconds: 20
-        failureThreshold: 3
-      volumeMounts:
-        - name: rag-storage
-          mountPath: /opt/app-root/src/.llama/storage/rag
-        - name: config
-          mountPath: /opt/app-root/run.yaml
-          subPath: run.yaml
-  volumes:
-    - name: rag-storage
-      emptyDir: {}
-    - name: config
-      configMap:
-        name: llama-stack-config
-    - name: rag-data
-      configMap:
-        name: rag-data
diff --git a/tests/e2e-prow/rhoai/pipeline-services.sh b/tests/e2e-prow/rhoai/pipeline-services.sh
index 73aed5671..5f3c9a1ae 100755
--- a/tests/e2e-prow/rhoai/pipeline-services.sh
+++ b/tests/e2e-prow/rhoai/pipeline-services.sh
@@ -4,7 +4,7 @@ BASE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 NAMESPACE="${NAMESPACE:-e2e-rhoai-dsc}"
 
 # Deploy llama-stack (substitute only LLAMA_STACK_IMAGE, leave other ${} intact)
-envsubst '${LLAMA_STACK_IMAGE}' < "$BASE_DIR/manifests/lightspeed/llama-stack.yaml" | oc apply -n "$NAMESPACE" -f -
+envsubst '${LLAMA_STACK_IMAGE}' < "$BASE_DIR/manifests/lightspeed/llama-stack-prow.yaml" | oc apply -n "$NAMESPACE" -f -
 
 oc wait pod/llama-stack-service \
   -n "$NAMESPACE" --for=condition=Ready --timeout=600s
diff --git a/tests/e2e-prow/rhoai/scripts/e2e-ops.sh b/tests/e2e-prow/rhoai/scripts/e2e-ops.sh
index 540e2aab2..372b1f72c 100755
--- a/tests/e2e-prow/rhoai/scripts/e2e-ops.sh
+++ b/tests/e2e-prow/rhoai/scripts/e2e-ops.sh
@@ -292,9 +292,9 @@ cmd_restart_llama_stack() {
     else
         # Prow: vLLM Llama Stack image (matches pipeline.sh / pipeline-services.sh)
         if command -v envsubst >/dev/null 2>&1; then
-            envsubst < "$MANIFEST_DIR/llama-stack.yaml" | oc apply -n "$NAMESPACE" -f -
+            envsubst < "$MANIFEST_DIR/llama-stack-prow.yaml" | oc apply -n "$NAMESPACE" -f -
         else
-            sed "s|\${LLAMA_STACK_IMAGE}|${LLAMA_STACK_IMAGE:-}|g" "$MANIFEST_DIR/llama-stack.yaml" |
+            sed "s|\${LLAMA_STACK_IMAGE}|${LLAMA_STACK_IMAGE:-}|g" "$MANIFEST_DIR/llama-stack-prow.yaml" |
                 oc apply -n "$NAMESPACE" -f -
         fi
         wait_for_pod "llama-stack-service" 24

From a33aff69804b7c564530484587caaea0023105ad Mon Sep 17 00:00:00 2001
From: are-ces <195810094+are-ces@users.noreply.github.com>
Date: Thu, 23 Apr 2026 16:34:44 +0200
Subject: [PATCH 11/18] Fix prow e2e pipeline: secret ordering, config path,
 and test runner

- Create llama-stack-ip-secret before deploying the pod to fix
  chicken-and-egg dependency where the pod requires the secret as
  a non-optional env var
- Add LLAMA_STACK_CONFIG env var pointing to the correct emptyDir
  mount path where materialize-run-yaml init container places run.yaml
- Use make test-e2e-local instead of test-e2e to avoid macOS-incompatible
  script -c flag
- Remove DEBUG NS echo lines from pipeline scripts

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .../lightspeed/llama-stack-prow.yaml          |  4 +++-
 tests/e2e-prow/rhoai/pipeline-services.sh     | 14 +++++++-------
 tests/e2e-prow/rhoai/pipeline.sh              |  1 -
 tests/e2e-prow/rhoai/scripts/bootstrap.sh     | 19 -------------------
 tests/e2e-prow/rhoai/scripts/deploy-vllm.sh   |  1 -
 tests/e2e-prow/rhoai/scripts/gpu-setup.sh     |  1 -
 6 files changed, 10 insertions(+), 30 deletions(-)

diff --git a/tests/e2e-prow/rhoai/manifests/lightspeed/llama-stack-prow.yaml b/tests/e2e-prow/rhoai/manifests/lightspeed/llama-stack-prow.yaml
index bf03f2871..757933c3d 100644
--- a/tests/e2e-prow/rhoai/manifests/lightspeed/llama-stack-prow.yaml
+++ b/tests/e2e-prow/rhoai/manifests/lightspeed/llama-stack-prow.yaml
@@ -38,7 +38,7 @@ spec:
           fi
           gunzip -c /rag-data/kv_store.db.gz > /data/.e2e-rag-seed/kv_store.db
           cp -f /data/.e2e-rag-seed/kv_store.db /data/src/.llama/storage/rag/kv_store.db
-          chmod -R 777 /data
+          chmod -R 777 /data/src /data/.e2e-rag-seed
           echo "RAG data extracted successfully"
       volumeMounts:
         - name: rag-storage
@@ -111,6 +111,8 @@ spec:
               optional: true
         - name: E2E_OPENAI_MODEL
           value: "gpt-4o-mini"
+        - name: LLAMA_STACK_CONFIG
+          value: "/opt/app-root/src/.llama/storage/run.yaml"
         - name: FAISS_VECTOR_STORE_ID
           valueFrom:
             secretKeyRef:
diff --git a/tests/e2e-prow/rhoai/pipeline-services.sh b/tests/e2e-prow/rhoai/pipeline-services.sh
index 5f3c9a1ae..1db04b6ea 100755
--- a/tests/e2e-prow/rhoai/pipeline-services.sh
+++ b/tests/e2e-prow/rhoai/pipeline-services.sh
@@ -3,13 +3,19 @@
 BASE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 NAMESPACE="${NAMESPACE:-e2e-rhoai-dsc}"
 
+# Create llama-stack-ip-secret before deploying the pod (it references the secret as an env var)
+export E2E_LLAMA_HOSTNAME="llama-stack-service-svc.${NAMESPACE}.svc.cluster.local"
+oc create secret generic llama-stack-ip-secret \
+    --from-literal=key="$E2E_LLAMA_HOSTNAME" \
+    -n "$NAMESPACE" 2>/dev/null || echo "Secret llama-stack-ip-secret exists"
+
 # Deploy llama-stack (substitute only LLAMA_STACK_IMAGE, leave other ${} intact)
 envsubst '${LLAMA_STACK_IMAGE}' < "$BASE_DIR/manifests/lightspeed/llama-stack-prow.yaml" | oc apply -n "$NAMESPACE" -f -
 
 oc wait pod/llama-stack-service \
   -n "$NAMESPACE" --for=condition=Ready --timeout=600s
 
-# Get url address of llama-stack pod
+# Expose llama-stack service
 oc label pod llama-stack-service pod=llama-stack-service -n "$NAMESPACE"
 
 oc expose pod llama-stack-service \
@@ -18,12 +24,6 @@ oc expose pod llama-stack-service \
   --type=ClusterIP \
   -n "$NAMESPACE"
 
-export E2E_LLAMA_HOSTNAME="llama-stack-service-svc.${NAMESPACE}.svc.cluster.local"
-
-oc create secret generic llama-stack-ip-secret \
-    --from-literal=key="$E2E_LLAMA_HOSTNAME" \
-    -n "$NAMESPACE" || echo "Secret exists"
-
 # Deploy lightspeed-stack (substitute only LIGHTSPEED_STACK_IMAGE, leave other ${} intact)
 LIGHTSPEED_STACK_IMAGE="${LIGHTSPEED_STACK_IMAGE:-quay.io/lightspeed-core/lightspeed-stack:dev-latest}"
 export LIGHTSPEED_STACK_IMAGE
diff --git a/tests/e2e-prow/rhoai/pipeline.sh b/tests/e2e-prow/rhoai/pipeline.sh
index 2f15b2c93..505c07815 100755
--- a/tests/e2e-prow/rhoai/pipeline.sh
+++ b/tests/e2e-prow/rhoai/pipeline.sh
@@ -54,7 +54,6 @@ oc apply -f "$PIPELINE_DIR/manifests/namespaces/nvidia-operator.yaml"
 #========================================
 echo "===== Creating namespace & secrets ====="
 oc get ns "$NAMESPACE" >/dev/null 2>&1 || oc create namespace "$NAMESPACE"
-echo "DEBUG NS: after create -> $(oc get ns $NAMESPACE -o jsonpath='{.status.phase}' 2>&1)"
 
 create_secret() {
     local name=$1; shift
diff --git a/tests/e2e-prow/rhoai/scripts/bootstrap.sh b/tests/e2e-prow/rhoai/scripts/bootstrap.sh
index d5ff94f3d..ae8444ca8 100755
--- a/tests/e2e-prow/rhoai/scripts/bootstrap.sh
+++ b/tests/e2e-prow/rhoai/scripts/bootstrap.sh
@@ -20,23 +20,15 @@ wait_for_operator() {
 }
 
 # APPLY OPERATOR SUBSCRIPTIONS
-NAMESPACE="${NAMESPACE:-e2e-rhoai-dsc}"
-ns_check() { echo "DEBUG NS ($1): $(oc get ns $NAMESPACE -o jsonpath='{.status.phase}' 2>&1)"; }
-
-ns_check "before operatorgroups"
 echo "--> Applying OperatorGroups from operatorgroup.yaml..."
 oc apply -f "$BASE_DIR/manifests/operators/operatorgroup.yaml"
-ns_check "after operatorgroups"
 
 sleep 10
-ns_check "after 10s sleep (post operatorgroups)"
 
 echo "--> Applying Operator Subscriptions from operators.yaml..."
 oc apply -f "$BASE_DIR/manifests/operators/operators.yaml"
-ns_check "after operator subscriptions"
 
 sleep 10
-ns_check "after 10s sleep (post subscriptions)"
 
 # WAIT FOR GPU OPERATOR NAMESPACE AND OPERATORGROUP
 echo "--> Ensuring GPU Operator namespace and OperatorGroup are ready..."
@@ -58,11 +50,8 @@ echo "--> Waiting for Operators to be installed. This can take several minutes..
 oc wait --for=condition=established --timeout=300s crd/clusterserviceversions.operators.coreos.com
 
 wait_for_operator "operators.coreos.com/servicemeshoperator.openshift-operators" "openshift-operators" "Service Mesh Operator"
-ns_check "after Service Mesh Operator ready"
 wait_for_operator "operators.coreos.com/serverless-operator.openshift-operators" "openshift-operators" "Serverless Operator"
-ns_check "after Serverless Operator ready"
 wait_for_operator "operators.coreos.com/rhods-operator.openshift-operators" "openshift-operators" "RHODS Operator"
-ns_check "after RHODS Operator ready"
 
 # Verify GPU operator InstallPlan was created before waiting for CSV
 echo "  -> Verifying GPU Operator InstallPlan was created..."
@@ -90,32 +79,24 @@ done
 echo "  -> InstallPlan created successfully"
 
 wait_for_operator "operators.coreos.com/gpu-operator-certified.nvidia-gpu-operator" "nvidia-gpu-operator" "GPU Operator"
-ns_check "after GPU Operator ready"
 wait_for_operator "operators.coreos.com/nfd.openshift-nfd" "openshift-nfd" "NFD Operator"
-ns_check "after NFD Operator ready"
 
 echo "  -> Waiting for NFD CRD to be established..."
 oc wait --for=condition=established --timeout=300s crd/nodefeaturediscoveries.nfd.openshift.io
 
 echo "--> All operators are ready."
-ns_check "after all operators ready"
 
 oc get csv -n openshift-operators
 oc get csv -n nvidia-gpu-operator
 oc get csv -n openshift-nfd
 
-ns_check "before DSC apply"
 echo "--> Applying DataScienceCluster from ds-cluster.yaml..."
 oc apply -f "$BASE_DIR/manifests/operators/ds-cluster.yaml"
-ns_check "immediately after DSC apply"
 sleep 5
-ns_check "5s after DSC apply"
 sleep 10
-ns_check "15s after DSC apply"
 
 echo "--> Checking DSCInitialization and DSC status..."
 oc get dsci -A -o jsonpath='{range .items[*]}DSCI: {.metadata.name} applicationsNS: {.spec.applicationsNamespace}{"\n"}{end}' 2>/dev/null || echo "No DSCInitialization found"
 oc get dsc -A -o jsonpath='{range .items[*]}DSC: {.metadata.name} phase: {.status.phase}{"\n"}{end}' 2>/dev/null || echo "No DSC status yet"
 
 echo "All files applied successfully. The DataScienceCluster is now provisioning."
-ns_check "end of bootstrap"
diff --git a/tests/e2e-prow/rhoai/scripts/deploy-vllm.sh b/tests/e2e-prow/rhoai/scripts/deploy-vllm.sh
index ec3292d0b..5c3201fa5 100755
--- a/tests/e2e-prow/rhoai/scripts/deploy-vllm.sh
+++ b/tests/e2e-prow/rhoai/scripts/deploy-vllm.sh
@@ -67,7 +67,6 @@ echo "✅ GPU capacity available."
 echo "GPU nodes ready:"
 oc get nodes -l nvidia.com/gpu.present=true -o custom-columns=NAME:.metadata.name,GPU:.status.capacity.nvidia\\.com/gpu,INSTANCE:.metadata.labels.node\\.kubernetes\\.io/instance-type
 
-echo "DEBUG NS: before vLLM deploy -> $(oc get ns e2e-rhoai-dsc -o jsonpath='{.status.phase}' 2>&1)"
 echo "Applying vLLM manifests..."
 
 envsubst < "$BASE_DIR/manifests/vllm/vllm-runtime-gpu.yaml" | oc apply -f -
diff --git a/tests/e2e-prow/rhoai/scripts/gpu-setup.sh b/tests/e2e-prow/rhoai/scripts/gpu-setup.sh
index ab009a1a3..d72d744bb 100755
--- a/tests/e2e-prow/rhoai/scripts/gpu-setup.sh
+++ b/tests/e2e-prow/rhoai/scripts/gpu-setup.sh
@@ -206,4 +206,3 @@ echo ""
 echo "ClusterPolicy Status:"
 oc get clusterpolicy gpu-cluster-policy -o jsonpath='{.status.state}'
 echo ""
-echo "DEBUG NS: after gpu-setup -> $(oc get ns e2e-rhoai-dsc -o jsonpath='{.status.phase}' 2>&1)"

From 3422717b748c2f1ef9c20e32388a8bf286e38586 Mon Sep 17 00:00:00 2001
From: are-ces <195810094+are-ces@users.noreply.github.com>
Date: Fri, 24 Apr 2026 16:10:12 +0200
Subject: [PATCH 12/18] Fix e2e-ops restart failures and mock-jwks port-forward

- Replace unfiltered envsubst with sed in e2e-ops.sh restart commands
  to prevent blanking $VAR references in embedded bash scripts
- Add mock-jwks port-forward management (kill/restart/health check)
  so RBAC and MCP tests don't fail with connection refused on :8000
- Restart mock-jwks port-forward as part of lightspeed restart
- Increase vLLM max-model-len from 2048 to 32768 to avoid context
  length errors with RAG queries

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .../manifests/vllm/vllm-runtime-cpu.yaml      |   2 +-
 .../manifests/vllm/vllm-runtime-gpu.yaml      |   2 +-
 tests/e2e-prow/rhoai/scripts/e2e-ops.sh       | 105 +++++++++++++++---
 3 files changed, 92 insertions(+), 17 deletions(-)

diff --git a/tests/e2e-prow/rhoai/manifests/vllm/vllm-runtime-cpu.yaml b/tests/e2e-prow/rhoai/manifests/vllm/vllm-runtime-cpu.yaml
index 4c3f5e7bd..990dc2df3 100644
--- a/tests/e2e-prow/rhoai/manifests/vllm/vllm-runtime-cpu.yaml
+++ b/tests/e2e-prow/rhoai/manifests/vllm/vllm-runtime-cpu.yaml
@@ -24,7 +24,7 @@ spec:
         - --port
         - "8080"
         - --max-model-len
-        - "2048"
+        - "32768"
       image: quay.io/rh-ee-cpompeia/vllm-cpu:latest
       name: kserve-container
       env:
diff --git a/tests/e2e-prow/rhoai/manifests/vllm/vllm-runtime-gpu.yaml b/tests/e2e-prow/rhoai/manifests/vllm/vllm-runtime-gpu.yaml
index b7597991c..e925890d2 100644
--- a/tests/e2e-prow/rhoai/manifests/vllm/vllm-runtime-gpu.yaml
+++ b/tests/e2e-prow/rhoai/manifests/vllm/vllm-runtime-gpu.yaml
@@ -24,7 +24,7 @@ spec:
         - --port
         - "8080"
         - --max-model-len
-        - "2048"
+        - "32768"
         - --gpu-memory-utilization
         - "0.9"
       image: ${VLLM_IMAGE}
diff --git a/tests/e2e-prow/rhoai/scripts/e2e-ops.sh b/tests/e2e-prow/rhoai/scripts/e2e-ops.sh
index 372b1f72c..278d5f8f8 100755
--- a/tests/e2e-prow/rhoai/scripts/e2e-ops.sh
+++ b/tests/e2e-prow/rhoai/scripts/e2e-ops.sh
@@ -32,6 +32,7 @@ MANIFEST_DIR="$SCRIPT_DIR/../manifests/lightspeed"
 # Written by pipeline.sh when it starts LCS port-forward; e2e-ops kills this PID before rebinding 8080.
 E2E_LSC_PORT_FORWARD_PID_FILE="${E2E_LSC_PORT_FORWARD_PID_FILE:-/tmp/e2e-lightspeed-port-forward.pid}"
 E2E_LLAMA_PORT_FORWARD_PID_FILE="${E2E_LLAMA_PORT_FORWARD_PID_FILE:-/tmp/e2e-llama-port-forward.pid}"
+E2E_JWKS_PORT_FORWARD_PID_FILE="${E2E_JWKS_PORT_FORWARD_PID_FILE:-/tmp/e2e-jwks-port-forward.pid}"
 
 # ============================================================================
 # Helper functions
@@ -148,6 +149,23 @@ kill_stale_llama_forward() {
     free_local_tcp_port "$port"
 }
 
+# Kill anything likely to hold the mock-jwks local forward (localhost:8000).
+kill_stale_jwks_forward() {
+    local port="${1:-8000}"
+    local saved_pf
+    if [[ -f "$E2E_JWKS_PORT_FORWARD_PID_FILE" ]]; then
+        read -r saved_pf <"$E2E_JWKS_PORT_FORWARD_PID_FILE" 2>/dev/null || true
+        if [[ "$saved_pf" =~ ^[0-9]+$ ]]; then
+            kill -9 "$saved_pf" 2>/dev/null || true
+        fi
+    fi
+    pkill -9 -f "port-forward.*mock-jwks.*${port}:${port}" 2>/dev/null || true
+    pkill -9 -f "oc port-forward svc/mock-jwks ${port}:${port}" 2>/dev/null || true
+    free_local_tcp_port "$port"
+    sleep 1
+    free_local_tcp_port "$port"
+}
+
 # After oc port-forward dies in <2s, show recent oc stderr from the log file.
 e2e_ops_emit_port_forward_immediate_failure_diag() {
     echo "[e2e-ops] /tmp/port-forward.log (tail 25):"
@@ -242,16 +260,12 @@ cmd_restart_lightspeed() {
         sleep 2
     }
     
-    # Apply manifest (expand LIGHTSPEED_STACK_IMAGE)
+    # Apply manifest (expand LIGHTSPEED_STACK_IMAGE only; filter prevents blanking other $VAR refs)
     LIGHTSPEED_STACK_IMAGE="${LIGHTSPEED_STACK_IMAGE:-quay.io/lightspeed-core/lightspeed-stack:dev-latest}"
     export LIGHTSPEED_STACK_IMAGE
     _ls_manifest="$MANIFEST_DIR/lightspeed-stack.yaml"
-    if command -v envsubst >/dev/null 2>&1; then
-        envsubst < "$_ls_manifest" | oc apply -n "$NAMESPACE" -f -
-    else
-        sed "s|\${LIGHTSPEED_STACK_IMAGE}|${LIGHTSPEED_STACK_IMAGE}|g" "$_ls_manifest" |
-            oc apply -n "$NAMESPACE" -f -
-    fi
+    sed "s|\${LIGHTSPEED_STACK_IMAGE}|${LIGHTSPEED_STACK_IMAGE}|g" "$_ls_manifest" |
+        oc apply -n "$NAMESPACE" -f -
     
     # Wait for pod to be ready (TCP probe passes when app listens on 8080)
     wait_for_pod "lightspeed-stack-service" 40
@@ -259,9 +273,10 @@ cmd_restart_lightspeed() {
     # Re-label pod for service discovery
     oc label pod lightspeed-stack-service pod=lightspeed-stack-service -n "$NAMESPACE" --overwrite
     
-    # Re-establish port-forward
+    # Re-establish port-forwards
     cmd_restart_port_forward
-    
+    cmd_restart_jwks_port_forward || echo "⚠️  Mock JWKS port-forward failed (RBAC tests may fail)"
+
     echo "✓ Lightspeed restart complete"
 }
 
@@ -291,12 +306,9 @@ cmd_restart_llama_stack() {
         fi
     else
         # Prow: vLLM Llama Stack image (matches pipeline.sh / pipeline-services.sh)
-        if command -v envsubst >/dev/null 2>&1; then
-            envsubst < "$MANIFEST_DIR/llama-stack-prow.yaml" | oc apply -n "$NAMESPACE" -f -
-        else
-            sed "s|\${LLAMA_STACK_IMAGE}|${LLAMA_STACK_IMAGE:-}|g" "$MANIFEST_DIR/llama-stack-prow.yaml" |
-                oc apply -n "$NAMESPACE" -f -
-        fi
+        # Use sed instead of envsubst to avoid blanking $VAR references in embedded bash scripts
+        sed "s|\${LLAMA_STACK_IMAGE}|${LLAMA_STACK_IMAGE:-}|g" "$MANIFEST_DIR/llama-stack-prow.yaml" |
+            oc apply -n "$NAMESPACE" -f -
         wait_for_pod "llama-stack-service" 24
         echo "Labeling pod for service..."
         oc label pod llama-stack-service pod=llama-stack-service -n "$NAMESPACE" --overwrite
@@ -453,6 +465,66 @@ cmd_restart_llama_port_forward() {
     return 1
 }
 
+cmd_restart_jwks_port_forward() {
+    local local_port="${LOCAL_JWKS_PORT:-8000}"
+    local remote_port="${REMOTE_JWKS_PORT:-8000}"
+    local max_attempts=4
+    local pf_pid
+    local jwks_pf_log="/tmp/port-forward-jwks.log"
+
+    # Check if existing forward is still alive
+    if [[ -f "$E2E_JWKS_PORT_FORWARD_PID_FILE" ]]; then
+        local saved_pf
+        read -r saved_pf <"$E2E_JWKS_PORT_FORWARD_PID_FILE" 2>/dev/null || true
+        if [[ "$saved_pf" =~ ^[0-9]+$ ]] && kill -0 "$saved_pf" 2>/dev/null; then
+            local http_code
+            http_code=$(curl -s -o /dev/null -w '%{http_code}' --max-time 3 "http://127.0.0.1:$local_port/tokens" 2>/dev/null) || http_code="000"
+            if [[ "$http_code" != "000" ]]; then
+                echo "✓ Mock JWKS port-forward already healthy (PID: $saved_pf)"
+                return 0
+            fi
+        fi
+    fi
+
+    echo "Re-establishing mock-jwks port-forward on $local_port:$remote_port..."
+
+    for ((attempt=1; attempt<=max_attempts; attempt++)); do
+        kill_stale_jwks_forward "$local_port"
+        sleep 2
+
+        echo "JWKS port-forward attempt $attempt/$max_attempts"
+
+        : >"$jwks_pf_log"
+        nohup oc port-forward svc/mock-jwks "$local_port:$remote_port" -n "$NAMESPACE" \
+            </dev/null >"$jwks_pf_log" 2>&1 &
+        pf_pid=$!
+        disown "$pf_pid" 2>/dev/null || true
+        sleep 3
+
+        if ! kill -0 "$pf_pid" 2>/dev/null; then
+            echo "JWKS port-forward process exited immediately"
+            continue
+        fi
+
+        local http_code
+        http_code=$(curl -s -o /dev/null -w '%{http_code}' --max-time 5 "http://127.0.0.1:$local_port/tokens" 2>/dev/null) || http_code="000"
+        if [[ "$http_code" != "000" ]]; then
+            echo "$pf_pid" >"$E2E_JWKS_PORT_FORWARD_PID_FILE"
+            echo "✓ Mock JWKS port-forward established (PID: $pf_pid)"
+            return 0
+        fi
+
+        if [[ $attempt -lt $max_attempts ]]; then
+            echo "JWKS forward attempt $attempt failed, retrying..."
+            kill -9 "$pf_pid" 2>/dev/null || true
+            sleep 2
+        fi
+    done
+
+    echo "Failed to establish mock-jwks port-forward on :$local_port"
+    return 1
+}
+
 cmd_wait_for_pod() {
     local pod_name="${1:?Pod name required}"
     local max_attempts="${2:-24}"
@@ -515,6 +587,9 @@ case "$COMMAND" in
     restart-llama-port-forward)
         cmd_restart_llama_port_forward
         ;;
+    restart-jwks-port-forward)
+        cmd_restart_jwks_port_forward
+        ;;
     restart-port-forward)
         cmd_restart_port_forward
         ;;

From 56c70f5d011a05c6cb856cff4c14e02b316aebd1 Mon Sep 17 00:00:00 2001
From: are-ces <195810094+are-ces@users.noreply.github.com>
Date: Fri, 24 Apr 2026 16:32:06 +0200
Subject: [PATCH 13/18] Improve port-forward resilience for Prow E2E tests

verify_connectivity now checks /v1/models returns 200 (not just
/readiness) to ensure the app is fully initialized before declaring
success. before_scenario in the test framework probes the port-forward
before each scenario and auto-restarts it via e2e-ops if dead.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 tests/e2e-prow/rhoai/scripts/e2e-ops.sh | 23 ++++++++++------
 tests/e2e/features/environment.py       | 35 +++++++++++++++++++++++++
 2 files changed, 50 insertions(+), 8 deletions(-)

diff --git a/tests/e2e-prow/rhoai/scripts/e2e-ops.sh b/tests/e2e-prow/rhoai/scripts/e2e-ops.sh
index 278d5f8f8..c3a3a7e05 100755
--- a/tests/e2e-prow/rhoai/scripts/e2e-ops.sh
+++ b/tests/e2e-prow/rhoai/scripts/e2e-ops.sh
@@ -190,21 +190,28 @@ verify_connectivity() {
     local max_attempts="${1:-6}"
     local local_port="${LOCAL_PORT:-8080}"
     local http_code=""
-    
+
     for ((attempt=1; attempt<=max_attempts; attempt++)); do
-        # Check readiness endpoint - accept 200 or 401 (auth required but service is up)
+        # First check /readiness to see if port-forward is alive (accept 200 or 401)
         http_code=$(curl -s -o /dev/null -w '%{http_code}' --max-time 5 "http://localhost:$local_port/readiness" 2>/dev/null) || http_code="000"
-        
+
         if [[ "$http_code" == "200" || "$http_code" == "401" ]]; then
-            return 0
+            # Port-forward works; now verify the app is fully initialized by hitting
+            # a real endpoint. /v1/models requires the Llama Stack handshake to complete.
+            local models_code
+            models_code=$(curl -s -o /dev/null -w '%{http_code}' --max-time 10 "http://localhost:$local_port/v1/models" 2>/dev/null) || models_code="000"
+            if [[ "$models_code" == "200" ]]; then
+                return 0
+            fi
+            echo "[e2e-ops] /readiness=$http_code but /v1/models=$models_code (app still initializing, attempt $attempt/$max_attempts)"
         fi
-        
+
         if [[ $attempt -lt $max_attempts ]]; then
-            sleep 2
+            sleep 5
         fi
     done
-    
-    echo "Connectivity check failed (HTTP: ${http_code:-unknown})"
+
+    echo "Connectivity check failed (readiness: ${http_code:-unknown})"
     return 1
 }
 
diff --git a/tests/e2e/features/environment.py b/tests/e2e/features/environment.py
index e519217e0..42d9bd0dd 100644
--- a/tests/e2e/features/environment.py
+++ b/tests/e2e/features/environment.py
@@ -24,6 +24,7 @@
 from tests.e2e.utils.prow_utils import (
     restart_pod,
     restore_llama_stack_pod,
+    run_e2e_ops,
 )
 from tests.e2e.utils.utils import (
     is_prow_environment,
@@ -133,6 +134,35 @@ def before_all(context: Context) -> None:
             )
 
 
+def _ensure_prow_port_forward(context: Context) -> None:
+    """Check that the lightspeed port-forward is alive; restart it if dead.
+
+    Probes localhost:{E2E_LSC_PORT}/readiness — if it fails, calls e2e-ops
+    restart-port-forward to re-establish the tunnel before the scenario runs.
+    """
+    host = os.getenv("E2E_LSC_HOSTNAME", "localhost")
+    port = os.getenv("E2E_LSC_PORT", "8080")
+    url = f"http://{host}:{port}/readiness"
+    try:
+        resp = requests.get(url, timeout=5)
+        if resp.status_code in (200, 401):
+            return
+    except requests.RequestException:
+        pass
+
+    print("[before_scenario] Port-forward appears dead, restarting...")
+    try:
+        result = run_e2e_ops("restart-port-forward", timeout=60)
+        print(result.stdout, end="")
+        if result.returncode != 0:
+            print(result.stderr, end="")
+            print("[before_scenario] Warning: port-forward restart failed")
+        else:
+            print("[before_scenario] Port-forward re-established")
+    except subprocess.TimeoutExpired:
+        print("[before_scenario] Warning: port-forward restart timed out")
+
+
 def before_scenario(context: Context, scenario: Scenario) -> None:
     """Run before each scenario is run.
 
@@ -157,6 +187,11 @@ def before_scenario(context: Context, scenario: Scenario) -> None:
         scenario.skip("Skipped in library mode (no separate llama-stack container)")
         return
 
+    # In Prow, verify the lightspeed port-forward is alive before each scenario.
+    # Port-forwards can silently die between scenarios (e.g. pod restart, TCP reset).
+    if is_prow_environment():
+        _ensure_prow_port_forward(context)
+
     context.scenario_lightspeed_override_active = False
     context.lightspeed_stack_skip_restart = False
 

From 986c6f36e2396f5d37834c02788d2f1ad950b39c Mon Sep 17 00:00:00 2001
From: are-ces <195810094+are-ces@users.noreply.github.com>
Date: Fri, 24 Apr 2026 16:43:42 +0200
Subject: [PATCH 14/18] Fix update-configmap cascade failure and surface oc
 errors

Replace fragile oc delete + oc create with oc create --dry-run | oc apply
so a failed update leaves the ConfigMap intact instead of deleted. The
old approach caused 156 errored scenarios: if create failed after delete
succeeded, the ConfigMap was gone and every subsequent update also failed.

Also print stdout/stderr from e2e-ops on failure so the actual oc error
is visible in test logs.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 tests/e2e-prow/rhoai/scripts/e2e-ops.sh | 26 ++++++++++++++++---------
 tests/e2e/utils/prow_utils.py           |  4 +++-
 2 files changed, 20 insertions(+), 10 deletions(-)

diff --git a/tests/e2e-prow/rhoai/scripts/e2e-ops.sh b/tests/e2e-prow/rhoai/scripts/e2e-ops.sh
index c3a3a7e05..e06ceac2c 100755
--- a/tests/e2e-prow/rhoai/scripts/e2e-ops.sh
+++ b/tests/e2e-prow/rhoai/scripts/e2e-ops.sh
@@ -541,16 +541,24 @@ cmd_wait_for_pod() {
 cmd_update_configmap() {
     local configmap_name="${1:?ConfigMap name required}"
     local source_file="${2:?Source file required}"
-    
+
     echo "Updating ConfigMap $configmap_name from $source_file..."
-    
-    # Delete existing configmap
-    oc delete configmap "$configmap_name" -n "$NAMESPACE" --ignore-not-found=true
-    
-    # Create new configmap from the source file
-    oc create configmap "$configmap_name" -n "$NAMESPACE" \
-        --from-file="lightspeed-stack.yaml=$source_file"
-    
+
+    if [[ ! -f "$source_file" ]]; then
+        echo "ERROR: source file does not exist: $source_file" >&2
+        return 1
+    fi
+
+    # Use dry-run + apply to avoid the delete-then-create race.
+    # If delete succeeds but create fails the ConfigMap is gone and every
+    # subsequent attempt cascades into failure.
+    if ! oc create configmap "$configmap_name" -n "$NAMESPACE" \
+            --from-file="lightspeed-stack.yaml=$source_file" \
+            --dry-run=client -o yaml | oc apply -n "$NAMESPACE" -f -; then
+        echo "ERROR: oc apply for ConfigMap $configmap_name failed" >&2
+        return 1
+    fi
+
     echo "✓ ConfigMap $configmap_name updated successfully"
 }
 
diff --git a/tests/e2e/utils/prow_utils.py b/tests/e2e/utils/prow_utils.py
index 60e9a7a71..ca06727ea 100644
--- a/tests/e2e/utils/prow_utils.py
+++ b/tests/e2e/utils/prow_utils.py
@@ -197,7 +197,7 @@ def remove_configmap_backup(backup_key: str) -> None:
 
 
 def _recreate_configmap(configmap_name: str, source_file: str) -> None:
-    """Delete and recreate a ConfigMap from a file.
+    """Update a ConfigMap from a file via oc apply.
 
     Args:
         configmap_name: Name of the ConfigMap.
@@ -205,6 +205,8 @@ def _recreate_configmap(configmap_name: str, source_file: str) -> None:
     """
     result = run_e2e_ops("update-configmap", [configmap_name, source_file], timeout=60)
     if result.returncode != 0:
+        print(f"update-configmap stdout: {result.stdout}")
+        print(f"update-configmap stderr: {result.stderr}")
         raise subprocess.CalledProcessError(
             result.returncode, "update-configmap", result.stderr
         )

From 54c289be83180578eea318c4805b0b542a93a1fa Mon Sep 17 00:00:00 2001
From: are-ces <195810094+are-ces@users.noreply.github.com>
Date: Mon, 27 Apr 2026 09:09:44 +0200
Subject: [PATCH 15/18] Fix verify_connectivity for auth-enabled Prow
 environments
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

On Prow, both /readiness and /v1/models return 401 when auth is enabled.
The previous fix only accepted 200 from /v1/models, causing connectivity
checks to always fail and port-forward to be declared dead. Accept 401
as valid — it proves the full app stack is running, not just the socket.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 tests/e2e-prow/rhoai/scripts/e2e-ops.sh | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/e2e-prow/rhoai/scripts/e2e-ops.sh b/tests/e2e-prow/rhoai/scripts/e2e-ops.sh
index e06ceac2c..c083beefb 100755
--- a/tests/e2e-prow/rhoai/scripts/e2e-ops.sh
+++ b/tests/e2e-prow/rhoai/scripts/e2e-ops.sh
@@ -198,9 +198,11 @@ verify_connectivity() {
         if [[ "$http_code" == "200" || "$http_code" == "401" ]]; then
             # Port-forward works; now verify the app is fully initialized by hitting
             # a real endpoint. /v1/models requires the Llama Stack handshake to complete.
+            # Accept 200 (no auth) or 401 (auth enabled) — both prove the full app
+            # stack is up, not just the TCP socket.
             local models_code
             models_code=$(curl -s -o /dev/null -w '%{http_code}' --max-time 10 "http://localhost:$local_port/v1/models" 2>/dev/null) || models_code="000"
-            if [[ "$models_code" == "200" ]]; then
+            if [[ "$models_code" == "200" || "$models_code" == "401" ]]; then
                 return 0
             fi
             echo "[e2e-ops] /readiness=$http_code but /v1/models=$models_code (app still initializing, attempt $attempt/$max_attempts)"

From 90da0d1b1105d77f41935d7bb346c37122e5947c Mon Sep 17 00:00:00 2001
From: are-ces <195810094+are-ces@users.noreply.github.com>
Date: Mon, 27 Apr 2026 15:57:09 +0200
Subject: [PATCH 16/18] Fix Llama Stack disruption cascade and pipeline
 port-forward coordination

Llama Stack disruption tests left the pod dead after the feature because
Behave clears custom context attributes between scenarios, so
after_feature never saw llama_stack_was_running=True. This caused 59+
subsequent scenarios to cascade-fail with Connection refused.

Three fixes:
- Store was_running in module-level state (survives Behave context resets)
  so after_feature reliably triggers _restore_llama_stack
- Add restart-lightspeed fallback in before_scenario when port-forward
  alone fails (recovers from dead pods, not just dead tunnels)
- Align pipeline.sh with pipeline-konflux.sh: export PID file paths for
  e2e-ops.sh, start Llama Stack port-forward on :8321, and use
  lsof/fuser fallback for port cleanup in minimal images

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 tests/e2e-prow/rhoai/pipeline.sh        | 63 +++++++++++++++++++++++--
 tests/e2e-prow/rhoai/scripts/e2e-ops.sh | 32 ++++++++++---
 tests/e2e/features/environment.py       | 35 ++++++++++----
 tests/e2e/features/steps/health.py      | 25 +++++++++-
 4 files changed, 135 insertions(+), 20 deletions(-)

diff --git a/tests/e2e-prow/rhoai/pipeline.sh b/tests/e2e-prow/rhoai/pipeline.sh
index 505c07815..39f6fcdba 100755
--- a/tests/e2e-prow/rhoai/pipeline.sh
+++ b/tests/e2e-prow/rhoai/pipeline.sh
@@ -339,6 +339,15 @@ oc describe pod llama-stack-service -n "$NAMESPACE" || true
 #========================================
 # 9. EXPOSE SERVICE & START PORT-FORWARD
 #========================================
+# Export PID file paths so e2e-ops.sh can find and kill stale port-forwards
+# during test-triggered pod restarts (matches pipeline-konflux.sh).
+export E2E_LSC_PORT_FORWARD_PID_FILE="${E2E_LSC_PORT_FORWARD_PID_FILE:-/tmp/e2e-lightspeed-port-forward.pid}"
+export E2E_LLAMA_PORT_FORWARD_PID_FILE="${E2E_LLAMA_PORT_FORWARD_PID_FILE:-/tmp/e2e-llama-port-forward.pid}"
+export E2E_JWKS_PORT_FORWARD_PID_FILE="${E2E_JWKS_PORT_FORWARD_PID_FILE:-/tmp/e2e-jwks-port-forward.pid}"
+rm -f "$E2E_LSC_PORT_FORWARD_PID_FILE"
+rm -f "$E2E_LLAMA_PORT_FORWARD_PID_FILE"
+rm -f "$E2E_JWKS_PORT_FORWARD_PID_FILE"
+
 oc label pod lightspeed-stack-service pod=lightspeed-stack-service -n $NAMESPACE
 
 oc expose pod lightspeed-stack-service \
@@ -347,20 +356,36 @@ oc expose pod lightspeed-stack-service \
   --type=ClusterIP \
   -n $NAMESPACE
 
-# Kill any existing processes on ports 8080 and 8000
-echo "Checking for existing processes on ports 8080 and 8000..."
-lsof -ti:8080 | xargs kill -9 2>/dev/null || true
-lsof -ti:8000 | xargs kill -9 2>/dev/null || true
+# Kill any existing processes on ports 8080, 8000, and 8321 (lsof may be missing in minimal images)
+echo "Checking for existing processes on ports 8080, 8000, and 8321..."
+if command -v lsof >/dev/null 2>&1; then
+    lsof -ti:8080 | xargs kill -9 2>/dev/null || true
+    lsof -ti:8000 | xargs kill -9 2>/dev/null || true
+    lsof -ti:8321 | xargs kill -9 2>/dev/null || true
+elif command -v fuser >/dev/null 2>&1; then
+    fuser -k 8080/tcp 2>/dev/null || true
+    fuser -k 8000/tcp 2>/dev/null || true
+    fuser -k 8321/tcp 2>/dev/null || true
+fi
 
 # Start port-forward for lightspeed-stack
 echo "Starting port-forward for lightspeed-stack..."
 oc port-forward svc/lightspeed-stack-service-svc 8080:8080 -n $NAMESPACE &
 PF_LCS_PID=$!
+echo "$PF_LCS_PID" >"$E2E_LSC_PORT_FORWARD_PID_FILE"
 
 # Start port-forward for mock-jwks (needed for RBAC tests to get tokens)
 echo "Starting port-forward for mock-jwks..."
 oc port-forward svc/mock-jwks 8000:8000 -n $NAMESPACE &
 PF_JWKS_PID=$!
+echo "$PF_JWKS_PID" >"$E2E_JWKS_PORT_FORWARD_PID_FILE"
+
+# Behave steps that call Llama Stack directly (MCP toolgroups, shields, disrupt/restore)
+# need localhost:8321. Without this forward those tests hit "Connection refused".
+echo "Starting port-forward for llama-stack..."
+oc port-forward svc/llama-stack-service-svc 8321:8321 -n $NAMESPACE &
+PF_LLAMA_PID=$!
+echo "$PF_LLAMA_PID" >"$E2E_LLAMA_PORT_FORWARD_PID_FILE"
 
 # Wait for port-forward to be usable (app may not be listening immediately; port-forward can drop)
 echo "Waiting for port-forward to lightspeed-stack to be ready..."
@@ -382,6 +407,7 @@ for i in $(seq 1 36); do
     oc get pods -n "$NAMESPACE" -o wide || true
     kill $PF_LCS_PID 2>/dev/null || true
     kill $PF_JWKS_PID 2>/dev/null || true
+    kill $PF_LLAMA_PID 2>/dev/null || true
     exit 1
   fi
   # If port-forward process died, restart it (e.g. "connection refused" / "lost connection to pod")
@@ -389,6 +415,31 @@ for i in $(seq 1 36); do
     echo "Port-forward died, restarting (attempt $i)..."
     oc port-forward svc/lightspeed-stack-service-svc 8080:8080 -n $NAMESPACE &
     PF_LCS_PID=$!
+    echo "$PF_LCS_PID" >"$E2E_LSC_PORT_FORWARD_PID_FILE"
+  fi
+  sleep 5
+done
+
+# Wait for Llama Stack port-forward to be usable
+echo "Waiting for Llama Stack port-forward (localhost:8321 /v1/health)..."
+for i in $(seq 1 36); do
+  if curl -sf http://localhost:8321/v1/health > /dev/null 2>&1; then
+    echo "✅ Llama Stack port-forward ready after $(( i * 5 ))s"
+    break
+  fi
+  if [ $i -eq 36 ]; then
+    echo "❌ Port-forward to llama-stack never became healthy (3 min)"
+    oc logs llama-stack-service -n "$NAMESPACE" --tail=100 || true
+    kill $PF_LCS_PID 2>/dev/null || true
+    kill $PF_JWKS_PID 2>/dev/null || true
+    kill $PF_LLAMA_PID 2>/dev/null || true
+    exit 1
+  fi
+  if ! kill -0 $PF_LLAMA_PID 2>/dev/null; then
+    echo "Llama port-forward died, restarting (attempt $i)..."
+    oc port-forward svc/llama-stack-service-svc 8321:8321 -n $NAMESPACE &
+    PF_LLAMA_PID=$!
+    echo "$PF_LLAMA_PID" >"$E2E_LLAMA_PORT_FORWARD_PID_FILE"
   fi
   sleep 5
 done
@@ -399,6 +450,7 @@ export E2E_DEFAULT_MODEL_OVERRIDE="$MODEL_NAME"
 export E2E_DEFAULT_PROVIDER_OVERRIDE="vllm"
 echo "LCS accessible at: http://$E2E_LSC_HOSTNAME:8080"
 echo "Mock JWKS accessible at: http://$E2E_JWKS_HOSTNAME:8000"
+echo "Llama Stack accessible at: http://localhost:8321"
 
 
 
@@ -421,8 +473,11 @@ TEST_EXIT_CODE=$(cat "$E2E_EXIT_CODE_FILE" 2>/dev/null || echo 1)
 # Kill first so wait doesn't block (if a port-forward is still running, wait would hang)
 kill $PF_LCS_PID 2>/dev/null || true
 kill $PF_JWKS_PID 2>/dev/null || true
+kill $PF_LLAMA_PID 2>/dev/null || true
 wait $PF_LCS_PID 2>/dev/null || true
 wait $PF_JWKS_PID 2>/dev/null || true
+wait $PF_LLAMA_PID 2>/dev/null || true
+rm -f "$E2E_LSC_PORT_FORWARD_PID_FILE" "$E2E_LLAMA_PORT_FORWARD_PID_FILE" "$E2E_JWKS_PORT_FORWARD_PID_FILE"
 set -e
 trap 'echo "❌ Pipeline failed at line $LINENO"; exit 1' ERR
 
diff --git a/tests/e2e-prow/rhoai/scripts/e2e-ops.sh b/tests/e2e-prow/rhoai/scripts/e2e-ops.sh
index c083beefb..b98eafab3 100755
--- a/tests/e2e-prow/rhoai/scripts/e2e-ops.sh
+++ b/tests/e2e-prow/rhoai/scripts/e2e-ops.sh
@@ -262,7 +262,15 @@ wait_for_llama_stack_http_health() {
 
 cmd_restart_lightspeed() {
     echo "Restarting lightspeed-stack service..."
-    
+
+    # LCS hangs at startup if Llama Stack is unreachable (blocks Llama handshake,
+    # never opens port 8080, readiness probe never passes).  Ensure Llama Stack
+    # is healthy before recreating the LCS pod.
+    if ! _llama_stack_http_health_once 2>/dev/null; then
+        echo "⚠️  Llama Stack not healthy — restoring before LCS restart..."
+        cmd_restart_llama_stack || echo "⚠️  Llama Stack restore failed; LCS may be slow to start"
+    fi
+
     # Delete existing pod (short wait so hook stays within timeout; force if needed)
     timeout 20 oc delete pod lightspeed-stack-service -n "$NAMESPACE" --ignore-not-found=true --wait=true 2>/dev/null || {
         oc delete pod lightspeed-stack-service -n "$NAMESPACE" --ignore-not-found=true --force --grace-period=0 2>/dev/null || true
@@ -276,16 +284,28 @@ cmd_restart_lightspeed() {
     sed "s|\${LIGHTSPEED_STACK_IMAGE}|${LIGHTSPEED_STACK_IMAGE}|g" "$_ls_manifest" |
         oc apply -n "$NAMESPACE" -f -
     
-    # Wait for pod to be ready (TCP probe passes when app listens on 8080)
-    wait_for_pod "lightspeed-stack-service" 40
-    
+    # Wait for pod to be ready (TCP probe passes when app listens on 8080).
+    # Don't let a timeout here abort the function — still attempt port-forward
+    # and diagnostics so later scenarios have a chance to recover.
+    local pod_ready=true
+    if ! wait_for_pod "lightspeed-stack-service" 40; then
+        pod_ready=false
+        echo "⚠️  Pod not ready within 120s — dumping diagnostics:"
+        oc describe pod lightspeed-stack-service -n "$NAMESPACE" 2>&1 | tail -30 || true
+        oc logs lightspeed-stack-service -n "$NAMESPACE" --tail=40 2>&1 || true
+    fi
+
     # Re-label pod for service discovery
     oc label pod lightspeed-stack-service pod=lightspeed-stack-service -n "$NAMESPACE" --overwrite
-    
-    # Re-establish port-forwards
+
+    # Re-establish port-forwards (may succeed even if readiness was slow)
     cmd_restart_port_forward
     cmd_restart_jwks_port_forward || echo "⚠️  Mock JWKS port-forward failed (RBAC tests may fail)"
 
+    if [[ "$pod_ready" == "false" ]]; then
+        echo "⚠️  Lightspeed restart completed but pod was slow to become ready"
+        return 1
+    fi
     echo "✓ Lightspeed restart complete"
 }
 
diff --git a/tests/e2e/features/environment.py b/tests/e2e/features/environment.py
index 42d9bd0dd..b0f98bed5 100644
--- a/tests/e2e/features/environment.py
+++ b/tests/e2e/features/environment.py
@@ -19,7 +19,11 @@
 from tests.e2e.features.steps.common import (
     reset_active_lightspeed_stack_config_basename,
 )
-from tests.e2e.features.steps.health import reset_llama_stack_disrupt_once_tracking
+from tests.e2e.features.steps.health import (
+    get_llama_stack_was_running,
+    reset_llama_stack_disrupt_once_tracking,
+    reset_llama_stack_was_running,
+)
 from tests.e2e.utils.llama_stack_utils import register_shield
 from tests.e2e.utils.prow_utils import (
     restart_pod,
@@ -154,13 +158,27 @@ def _ensure_prow_port_forward(context: Context) -> None:
     try:
         result = run_e2e_ops("restart-port-forward", timeout=60)
         print(result.stdout, end="")
+        if result.returncode == 0:
+            print("[before_scenario] Port-forward re-established")
+            return
+        print(result.stderr, end="")
+    except subprocess.TimeoutExpired:
+        pass
+
+    # Port-forward alone failed — the pod itself may be dead (e.g. Llama Stack
+    # was never restored after a disruption feature). Attempt a full restart,
+    # which also checks Llama health before recreating LCS.
+    print("[before_scenario] Port-forward failed; attempting full pod restart...")
+    try:
+        result = run_e2e_ops("restart-lightspeed", timeout=200)
+        print(result.stdout, end="")
         if result.returncode != 0:
             print(result.stderr, end="")
-            print("[before_scenario] Warning: port-forward restart failed")
+            print("[before_scenario] Warning: full pod restart failed")
         else:
-            print("[before_scenario] Port-forward re-established")
+            print("[before_scenario] Pod restart + port-forward re-established")
     except subprocess.TimeoutExpired:
-        print("[before_scenario] Warning: port-forward restart timed out")
+        print("[before_scenario] Warning: full pod restart timed out")
 
 
 def before_scenario(context: Context, scenario: Scenario) -> None:
@@ -417,11 +435,12 @@ def after_feature(context: Context, feature: Feature) -> None:
     when ``context.feedback_e2e_conversation_cleanup`` is set by feedback steps,
     delete tracked feedback test conversations.
     """
-    # Restore Llama Stack FIRST (before any lightspeed-stack restart)
-    llama_was_running = getattr(context, "llama_stack_was_running", False)
-    if llama_was_running:
+    # Restore Llama Stack FIRST (before any lightspeed-stack restart).
+    # Read from module-level state — Behave clears custom context attributes
+    # between scenarios, so context.llama_stack_was_running is unreliable here.
+    if get_llama_stack_was_running():
         _restore_llama_stack(context)
-        context.llama_stack_was_running = False
+        reset_llama_stack_was_running()
 
     if getattr(context, "feedback_e2e_conversation_cleanup", False):
         token = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva"
diff --git a/tests/e2e/features/steps/health.py b/tests/e2e/features/steps/health.py
index b42ffe859..dd5243c5a 100644
--- a/tests/e2e/features/steps/health.py
+++ b/tests/e2e/features/steps/health.py
@@ -13,10 +13,25 @@
 # Mutate one dict entry so we need not reassign a module-level bool (no global).
 _llama_stack_disrupt_once: dict[str, bool] = {"applied": False}
 
+# Behave clears user attributes on ``context`` between scenarios; store
+# ``was_running`` at module level so ``after_feature`` can still see it.
+_llama_stack_was_running: dict[str, bool] = {"value": False}
+
+
+def get_llama_stack_was_running() -> bool:
+    """Return whether Llama Stack was running before the disruption step."""
+    return _llama_stack_was_running["value"]
+
+
+def reset_llama_stack_was_running() -> None:
+    """Clear the module-level was_running flag after restoration."""
+    _llama_stack_was_running["value"] = False
+
 
 def reset_llama_stack_disrupt_once_tracking() -> None:
     """Reset before each feature; see ``environment.before_feature``."""
     _llama_stack_disrupt_once["applied"] = False
+    _llama_stack_was_running["value"] = False
 
 
 @given("The llama-stack connection is disrupted")
@@ -50,13 +65,18 @@ def llama_stack_connection_broken(context: Context) -> None:
         print("Llama Stack disruption skipped (already applied once this feature)")
         return
 
-    # Store original state for restoration (only on the real disruption path)
+    # Store original state for restoration (only on the real disruption path).
+    # Write to both context (backward compat) and module-level dict (survives
+    # Behave's per-scenario context clearing).
     context.llama_stack_was_running = False
+    _llama_stack_was_running["value"] = False
 
     if is_prow_environment():
         from tests.e2e.utils.prow_utils import disrupt_llama_stack_pod
 
-        context.llama_stack_was_running = disrupt_llama_stack_pod()
+        was_running = disrupt_llama_stack_pod()
+        context.llama_stack_was_running = was_running
+        _llama_stack_was_running["value"] = was_running
         _llama_stack_disrupt_once["applied"] = True
         return
 
@@ -71,6 +91,7 @@ def llama_stack_connection_broken(context: Context) -> None:
 
         if result.stdout.strip():
             context.llama_stack_was_running = True
+            _llama_stack_was_running["value"] = True
             subprocess.run(
                 ["docker", "stop", "llama-stack"], check=True, capture_output=True
             )

From b033f92b59fa3569c41aede619fc891706e6dd37 Mon Sep 17 00:00:00 2001
From: are-ces <195810094+are-ces@users.noreply.github.com>
Date: Tue, 28 Apr 2026 09:33:46 +0200
Subject: [PATCH 17/18] Skip TLS and proxy e2e tests in Prow (no Docker Compose
 services)

TLS and proxy features depend on mock-tls-inference and proxy sidecars
that are only deployed via Docker Compose, not in the OpenShift cluster.
Every TLS scenario burned 200s waiting for a provider that never exists,
consuming ~63 min of the 4h Prow timeout for guaranteed failures.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 tests/e2e/features/environment.py | 6 ++++++
 tests/e2e/features/proxy.feature  | 2 +-
 tests/e2e/features/tls.feature    | 2 +-
 3 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/tests/e2e/features/environment.py b/tests/e2e/features/environment.py
index b0f98bed5..77b8bd1f7 100644
--- a/tests/e2e/features/environment.py
+++ b/tests/e2e/features/environment.py
@@ -205,6 +205,12 @@ def before_scenario(context: Context, scenario: Scenario) -> None:
         scenario.skip("Skipped in library mode (no separate llama-stack container)")
         return
 
+    # Skip scenarios that depend on services not deployed in Prow/OpenShift
+    # (e.g. mock-tls-inference, proxy sidecars only available in Docker Compose)
+    if is_prow_environment() and "skip-in-prow" in scenario.effective_tags:
+        scenario.skip("Skipped in Prow (requires Docker Compose services)")
+        return
+
     # In Prow, verify the lightspeed port-forward is alive before each scenario.
     # Port-forwards can silently die between scenarios (e.g. pod restart, TCP reset).
     if is_prow_environment():
diff --git a/tests/e2e/features/proxy.feature b/tests/e2e/features/proxy.feature
index aaab54f4e..d4d9a49f0 100644
--- a/tests/e2e/features/proxy.feature
+++ b/tests/e2e/features/proxy.feature
@@ -1,4 +1,4 @@
-@e2e_group_3 @skip-in-library-mode
+@e2e_group_3 @skip-in-library-mode @skip-in-prow
 Feature: Proxy and TLS networking tests for Llama Stack providers
 
   Verify that the Lightspeed Stack works correctly when Llama Stack's
diff --git a/tests/e2e/features/tls.feature b/tests/e2e/features/tls.feature
index 5a2d77338..a900b1c0f 100644
--- a/tests/e2e/features/tls.feature
+++ b/tests/e2e/features/tls.feature
@@ -1,4 +1,4 @@
-@e2e_group_1 @skip-in-library-mode
+@e2e_group_1 @skip-in-library-mode @skip-in-prow
 Feature: TLS configuration for remote inference providers
   Validate that Llama Stack's NetworkConfig.tls settings are applied correctly
   when connecting to a remote inference provider over HTTPS.

From aa6ab2e0abbe2a6722e8b559ad8e1ca27a55e2e8 Mon Sep 17 00:00:00 2001
From: are-ces <195810094+are-ces@users.noreply.github.com>
Date: Tue, 28 Apr 2026 15:51:00 +0200
Subject: [PATCH 18/18] Fix after_feature AttributeError on hostname_llama

Behave clears custom context attributes between scenarios, so
hostname_llama/port_llama are gone by the time after_feature runs.
Store them in module-level state (same pattern as llama_stack_was_running).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 tests/e2e/features/environment.py  |  8 +++++---
 tests/e2e/features/steps/common.py | 16 ++++++++++++++++
 2 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/tests/e2e/features/environment.py b/tests/e2e/features/environment.py
index 77b8bd1f7..ca2474578 100644
--- a/tests/e2e/features/environment.py
+++ b/tests/e2e/features/environment.py
@@ -17,6 +17,8 @@
 from behave.runner import Context
 
 from tests.e2e.features.steps.common import (
+    get_llama_stack_hostname,
+    get_llama_stack_port,
     reset_active_lightspeed_stack_config_basename,
 )
 from tests.e2e.features.steps.health import (
@@ -312,7 +314,7 @@ def _print_llama_stack_diagnostics() -> None:
     print("--- end diagnostics ---")
 
 
-def _restore_llama_stack(context: Context) -> None:
+def _restore_llama_stack() -> None:
     """Restore Llama Stack connection after disruption."""
     if is_prow_environment():
         # Recreate llama pod, then restart LCS so in-process clients reconnect (Llama IP/pod changed).
@@ -365,7 +367,7 @@ def _restore_llama_stack(context: Context) -> None:
                         "llama-stack",
                         "curl",
                         "-sf",
-                        f"http://{context.hostname_llama}:{context.port_llama}/v1/health",
+                        f"http://{get_llama_stack_hostname()}:{get_llama_stack_port()}/v1/health",
                     ],
                     capture_output=True,
                     timeout=5,
@@ -445,7 +447,7 @@ def after_feature(context: Context, feature: Feature) -> None:
     # Read from module-level state — Behave clears custom context attributes
     # between scenarios, so context.llama_stack_was_running is unreliable here.
     if get_llama_stack_was_running():
-        _restore_llama_stack(context)
+        _restore_llama_stack()
         reset_llama_stack_was_running()
 
     if getattr(context, "feedback_e2e_conversation_cleanup", False):
diff --git a/tests/e2e/features/steps/common.py b/tests/e2e/features/steps/common.py
index d90e8084e..5b7b3e715 100644
--- a/tests/e2e/features/steps/common.py
+++ b/tests/e2e/features/steps/common.py
@@ -21,12 +21,26 @@
 # YAML across scenarios in one feature. Mutate the dict entry (no global).
 _active_lightspeed_stack_config_basename: dict[str, Optional[str]] = {"basename": None}
 
+# Behave clears user attributes on ``context`` between scenarios; store
+# Llama Stack endpoint info at module level so ``after_feature`` can see it.
+_llama_stack_endpoint: dict[str, str] = {"hostname": "localhost", "port": "8321"}
+
 
 def reset_active_lightspeed_stack_config_basename() -> None:
     """Reset before each feature; see ``environment.before_feature``."""
     _active_lightspeed_stack_config_basename["basename"] = None
 
 
+def get_llama_stack_hostname() -> str:
+    """Return the Llama Stack hostname surviving per-scenario context clearing."""
+    return _llama_stack_endpoint["hostname"]
+
+
+def get_llama_stack_port() -> str:
+    """Return the Llama Stack port surviving per-scenario context clearing."""
+    return _llama_stack_endpoint["port"]
+
+
 @given("The service is started locally")
 def service_is_started_locally(context: Context) -> None:
     """Check the service status.
@@ -46,6 +60,8 @@ def service_is_started_locally(context: Context) -> None:
     else:
         context.hostname_llama = "localhost"
     context.port_llama = os.getenv("E2E_LLAMA_PORT", "8321")
+    _llama_stack_endpoint["hostname"] = context.hostname_llama
+    _llama_stack_endpoint["port"] = context.port_llama
 
 
 @given('the Lightspeed stack configuration directory is "{directory}"')