-
Notifications
You must be signed in to change notification settings - Fork 82
LCORE-1497: Fix RHOAI Prow e2e pipeline failures #1613
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
340c005
a53c2e1
9105daf
dc88586
8aaee73
6bc8998
1f2bebb
c317499
4e2a4bd
f032f4a
a33aff6
3422717
56c70f5
986c6f3
54c289b
90da0d1
b033f92
aa6ab2e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,205 @@ | ||
| # Llama Stack pod for Prow: uses pre-built image with enrichment + RAG restore. | ||
| # | ||
| # Requires: ConfigMap llama-stack-config (run.yaml), ConfigMap rag-data (kv_store.db.gz), | ||
| # ConfigMap lightspeed-stack-config (lightspeed-stack.yaml). | ||
| # Requires: Image built as ${LLAMA_STACK_IMAGE} (set by pipeline.sh). | ||
| # | ||
| apiVersion: v1 | ||
| kind: Pod | ||
| metadata: | ||
| name: llama-stack-service | ||
| labels: | ||
| pod: llama-stack-service | ||
| spec: | ||
| securityContext: | ||
| seccompProfile: | ||
| type: RuntimeDefault | ||
| initContainers: | ||
| - name: setup-rag-data | ||
| image: busybox:latest | ||
| securityContext: | ||
| allowPrivilegeEscalation: false | ||
| capabilities: | ||
| drop: ["ALL"] | ||
| runAsNonRoot: true | ||
| runAsUser: 65534 | ||
| seccompProfile: | ||
| type: RuntimeDefault | ||
| command: | ||
| - /bin/sh | ||
| - -c | ||
| - | | ||
| set -e | ||
| mkdir -p /data/src/.llama/storage/rag /data/src/.llama/storage/files /data/.e2e-rag-seed | ||
| if [ ! -f /rag-data/kv_store.db.gz ]; then | ||
| echo "FATAL: missing /rag-data/kv_store.db.gz" | ||
| ls -la /rag-data || true | ||
| exit 1 | ||
| fi | ||
| gunzip -c /rag-data/kv_store.db.gz > /data/.e2e-rag-seed/kv_store.db | ||
| cp -f /data/.e2e-rag-seed/kv_store.db /data/src/.llama/storage/rag/kv_store.db | ||
| chmod -R 777 /data/src /data/.e2e-rag-seed | ||
| echo "RAG data extracted successfully" | ||
| volumeMounts: | ||
| - name: rag-storage | ||
| mountPath: /data | ||
| - name: rag-data | ||
| mountPath: /rag-data | ||
| - name: materialize-run-yaml | ||
| image: busybox:latest | ||
| securityContext: | ||
| allowPrivilegeEscalation: false | ||
| capabilities: | ||
| drop: ["ALL"] | ||
| runAsNonRoot: true | ||
| runAsUser: 65534 | ||
| seccompProfile: | ||
| type: RuntimeDefault | ||
| command: | ||
| - /bin/sh | ||
| - -c | ||
| - | | ||
| set -e | ||
| cp /cm/run.yaml /work/run.yaml | ||
| chmod 664 /work/run.yaml | ||
| volumeMounts: | ||
| - name: config-cm | ||
| mountPath: /cm | ||
| readOnly: true | ||
| - name: rag-storage | ||
| mountPath: /work | ||
| containers: | ||
| - name: llama-stack-container | ||
| image: ${LLAMA_STACK_IMAGE} | ||
| securityContext: | ||
| allowPrivilegeEscalation: false | ||
| capabilities: | ||
| drop: ["ALL"] | ||
| runAsNonRoot: true | ||
| runAsUser: 1001 | ||
| seccompProfile: | ||
| type: RuntimeDefault | ||
| workingDir: /opt/app-root | ||
| env: | ||
| - name: PYTHONPATH | ||
| value: "/opt/app-root/src" | ||
| - name: HOME | ||
| value: "/opt/app-root/src" | ||
| - name: KV_STORE_PATH | ||
| value: "/opt/app-root/src/.llama/storage/kv_store.db" | ||
| - name: KV_RAG_PATH | ||
| value: "/opt/app-root/src/.llama/storage/rag/kv_store.db" | ||
| - name: SQL_STORE_PATH | ||
| value: "/opt/app-root/src/.llama/storage/sql_store.db" | ||
| - name: KSVC_URL | ||
| valueFrom: | ||
| secretKeyRef: | ||
| name: api-url-secret | ||
| key: key | ||
| - name: VLLM_API_KEY | ||
| valueFrom: | ||
| secretKeyRef: | ||
| name: vllm-api-key-secret | ||
| key: key | ||
| - name: INFERENCE_MODEL | ||
| value: "meta-llama/Llama-3.1-8B-Instruct" | ||
| - name: OPENAI_API_KEY | ||
| valueFrom: | ||
| secretKeyRef: | ||
| name: openai-api-key-secret | ||
| key: key | ||
| optional: true | ||
| - name: E2E_OPENAI_MODEL | ||
| value: "gpt-4o-mini" | ||
| - name: LLAMA_STACK_CONFIG | ||
| value: "/opt/app-root/src/.llama/storage/run.yaml" | ||
| - name: FAISS_VECTOR_STORE_ID | ||
| valueFrom: | ||
| secretKeyRef: | ||
| name: faiss-vector-store-secret | ||
| key: id | ||
| - name: E2E_LLAMA_HOSTNAME | ||
| valueFrom: | ||
| secretKeyRef: | ||
| name: llama-stack-ip-secret | ||
| key: key | ||
| command: | ||
| - /bin/bash | ||
| - -c | ||
| - | | ||
| set -e | ||
| RAG_SEED="/opt/app-root/src/.llama/storage/.e2e-rag-seed/kv_store.db" | ||
| RAG_CM_GZ="/opt/app-root/rag-data-cm/kv_store.db.gz" | ||
| RAG_WORK="${KV_RAG_PATH:-/opt/app-root/src/.llama/storage/rag/kv_store.db}" | ||
| restore_rag_seed() { | ||
| mkdir -p "$(dirname "$RAG_WORK")" | ||
| if [[ -f "$RAG_CM_GZ" ]]; then | ||
| RAG_WORK="$RAG_WORK" RAG_CM_GZ="$RAG_CM_GZ" python3 -c 'import gzip, os, shutil, sys; r, g = os.environ["RAG_WORK"], os.environ["RAG_CM_GZ"]; t = r + ".tmp"; i = gzip.open(g, "rb"); o = open(t, "wb"); shutil.copyfileobj(i, o); i.close(); o.close(); sz = os.path.getsize(t); (sz >= 1048576) or (print("FATAL: RAG from ConfigMap too small:", sz, file=sys.stderr) or sys.exit(1)); os.replace(t, r); os.chmod(r, 0o664)' || exit 1 | ||
| elif [[ -f "$RAG_SEED" ]]; then | ||
| cp -f "$RAG_SEED" "$RAG_WORK" | ||
| chmod 664 "$RAG_WORK" 2>/dev/null || true | ||
| fi | ||
| } | ||
| restore_rag_seed | ||
| INPUT_CONFIG="${LLAMA_STACK_CONFIG:-/opt/app-root/run.yaml}" | ||
| ENRICHED_CONFIG="/opt/app-root/run.yaml" | ||
| LIGHTSPEED_CONFIG="${LIGHTSPEED_CONFIG:-/opt/app-root/lightspeed-stack.yaml}" | ||
| ENV_FILE="/opt/app-root/.env" | ||
| if [[ -f "$LIGHTSPEED_CONFIG" ]]; then | ||
| echo "Enriching llama-stack config..." | ||
| ENRICHMENT_FAILED=0 | ||
| python3 /opt/app-root/src/llama_stack_configuration.py \ | ||
| -c "$LIGHTSPEED_CONFIG" \ | ||
| -i "$INPUT_CONFIG" \ | ||
| -o "$ENRICHED_CONFIG" \ | ||
| -e "$ENV_FILE" 2>&1 || ENRICHMENT_FAILED=1 | ||
| if [[ -f "$ENV_FILE" ]]; then | ||
| set -a && . "$ENV_FILE" && set +a | ||
| fi | ||
| if [[ -f "$ENRICHED_CONFIG" ]] && [[ "$ENRICHMENT_FAILED" -eq 0 ]]; then | ||
| echo "Using enriched config: $ENRICHED_CONFIG" | ||
| restore_rag_seed | ||
| exec llama stack run "$ENRICHED_CONFIG" | ||
| fi | ||
| fi | ||
| echo "Using original config: $INPUT_CONFIG" | ||
| restore_rag_seed | ||
| exec llama stack run "$INPUT_CONFIG" | ||
| ports: | ||
| - containerPort: 8321 | ||
| readinessProbe: | ||
| httpGet: | ||
| path: /v1/health | ||
| port: 8321 | ||
| initialDelaySeconds: 20 | ||
| periodSeconds: 5 | ||
| failureThreshold: 36 | ||
| livenessProbe: | ||
| httpGet: | ||
| path: /v1/health | ||
| port: 8321 | ||
| initialDelaySeconds: 120 | ||
| periodSeconds: 20 | ||
| failureThreshold: 3 | ||
| volumeMounts: | ||
| - name: rag-storage | ||
| mountPath: /opt/app-root/src/.llama/storage | ||
| - name: lightspeed-config | ||
| mountPath: /opt/app-root/lightspeed-stack.yaml | ||
| subPath: lightspeed-stack.yaml | ||
| readOnly: true | ||
| - name: rag-data | ||
| mountPath: /opt/app-root/rag-data-cm | ||
| readOnly: true | ||
| volumes: | ||
| - name: rag-storage | ||
| emptyDir: {} | ||
| - name: config-cm | ||
| configMap: | ||
| name: llama-stack-config | ||
| - name: lightspeed-config | ||
| configMap: | ||
| name: lightspeed-stack-config | ||
| - name: rag-data | ||
| configMap: | ||
| name: rag-data | ||
This file was deleted.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,27 +1,30 @@ | ||
| #!/bin/bash | ||
|
|
||
| BASE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" | ||
| NAMESPACE="${NAMESPACE:-e2e-rhoai-dsc}" | ||
|
|
||
| # Deploy llama-stack | ||
| envsubst < "$BASE_DIR/manifests/lightspeed/llama-stack.yaml" | oc apply -f - | ||
| # Create llama-stack-ip-secret before deploying the pod (it references the secret as an env var) | ||
| export E2E_LLAMA_HOSTNAME="llama-stack-service-svc.${NAMESPACE}.svc.cluster.local" | ||
| oc create secret generic llama-stack-ip-secret \ | ||
| --from-literal=key="$E2E_LLAMA_HOSTNAME" \ | ||
| -n "$NAMESPACE" 2>/dev/null || echo "Secret llama-stack-ip-secret exists" | ||
|
|
||
| # Deploy llama-stack (substitute only LLAMA_STACK_IMAGE, leave other ${} intact) | ||
| envsubst '${LLAMA_STACK_IMAGE}' < "$BASE_DIR/manifests/lightspeed/llama-stack-prow.yaml" | oc apply -n "$NAMESPACE" -f - | ||
|
|
||
| oc wait pod/llama-stack-service \ | ||
| -n e2e-rhoai-dsc --for=condition=Ready --timeout=600s | ||
| -n "$NAMESPACE" --for=condition=Ready --timeout=600s | ||
|
|
||
| # Get url address of llama-stack pod | ||
| oc label pod llama-stack-service pod=llama-stack-service -n e2e-rhoai-dsc | ||
| # Expose llama-stack service | ||
| oc label pod llama-stack-service pod=llama-stack-service -n "$NAMESPACE" | ||
|
|
||
| oc expose pod llama-stack-service \ | ||
| --name=llama-stack-service-svc \ | ||
| --port=8321 \ | ||
| --type=ClusterIP \ | ||
| -n e2e-rhoai-dsc | ||
|
|
||
| export E2E_LLAMA_HOSTNAME="llama-stack-service-svc.e2e-rhoai-dsc.svc.cluster.local" | ||
|
|
||
| oc create secret generic llama-stack-ip-secret \ | ||
| --from-literal=key="$E2E_LLAMA_HOSTNAME" \ | ||
| -n e2e-rhoai-dsc || echo "Secret exists" | ||
| -n "$NAMESPACE" | ||
|
|
||
| # Deploy lightspeed-stack | ||
| oc apply -f "$BASE_DIR/manifests/lightspeed/lightspeed-stack.yaml" | ||
| # Deploy lightspeed-stack (substitute only LIGHTSPEED_STACK_IMAGE, leave other ${} intact) | ||
| LIGHTSPEED_STACK_IMAGE="${LIGHTSPEED_STACK_IMAGE:-quay.io/lightspeed-core/lightspeed-stack:dev-latest}" | ||
| export LIGHTSPEED_STACK_IMAGE | ||
| envsubst '${LIGHTSPEED_STACK_IMAGE}' < "$BASE_DIR/manifests/lightspeed/lightspeed-stack.yaml" | oc apply -n "$NAMESPACE" -f - | ||
|
Comment on lines
+4
to
+30
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Make this script fail fast on deployment errors. This file still runs without Suggested fix #!/bin/bash
+set -euo pipefail
BASE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
NAMESPACE="${NAMESPACE:-e2e-rhoai-dsc}"
+: "${LLAMA_STACK_IMAGE:?LLAMA_STACK_IMAGE must be set}"🤖 Prompt for AI Agents |
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Seed the shared volume at its mounted root, not under
/data/src/.llama/storage.rag-storageis mounted as/datain this init container but as/opt/app-root/src/.llama/storagein the main container. Writing to/data/src/.llama/storage/...therefore lands at/opt/app-root/src/.llama/storage/src/.llama/storage/..., so the init container never prepopulates the path thatKV_RAG_PATHpoints to.Suggested fix
📝 Committable suggestion
🤖 Prompt for AI Agents