diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index cf36111..64fe536 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -108,13 +108,80 @@ jobs:
shell: bash
run: ./chart-dependencies/ci-deps.sh
+ - name: Debug cluster state before chart testing
+ if: steps.list-changed.outputs.changed == 'true'
+ run: |
+ echo "=== DEBUG: Cluster state before chart-testing ==="
+ kubectl cluster-info
+ kubectl get nodes -o wide
+ kubectl get namespaces
+ kubectl get pods --all-namespaces
+ kubectl get services --all-namespaces
+ kubectl get crd | grep -E "(gateway|inference)" || echo "No gateway/inference CRDs found"
+ echo ""
+
+ echo "=== DEBUG: Checking Istio installation ==="
+ kubectl get pods -n istio-system || echo "No istio-system namespace"
+ kubectl get svc -n istio-system || echo "No services in istio-system"
+ echo ""
+
+ echo "=== DEBUG: Checking ingress controller ==="
+ kubectl get pods -A | grep ingress || echo "No ingress pods found"
+ kubectl get svc -A | grep ingress || echo "No ingress services found"
+ echo ""
+
- name: Run chart-testing (install)
if: steps.list-changed.outputs.changed == 'true'
env:
TARGET_BRANCH: ${{ github.event.pull_request.base.ref || 'main^' }}
run: |
+ echo "=== DEBUG: Starting chart-testing install ==="
+ echo "Target branch: $TARGET_BRANCH"
+ echo "Chart-testing config:"
+ cat ct-install.yaml
+ echo ""
+
+ echo "=== DEBUG: Running ct install with maximum verbosity ==="
+ set -x
ct install \
--debug \
--config ct-install.yaml \
--upgrade \
- --target-branch "$TARGET_BRANCH"
+ --target-branch "$TARGET_BRANCH" || {
+ echo ""
+ echo "=== DEBUG: Chart-testing failed, checking helm releases ==="
+ helm list --all-namespaces || true
+ echo ""
+ echo "=== DEBUG: Checking for any test namespaces ==="
+ kubectl get namespaces | grep -E "(test|chart)" || echo "No test namespaces found"
+ echo ""
+ echo "=== DEBUG: Checking for any failed pods ==="
+ kubectl get pods --all-namespaces --field-selector=status.phase=Failed || echo "No failed pods found"
+ echo ""
+ echo "=== DEBUG: Recent events ==="
+ kubectl get events --all-namespaces --sort-by='.lastTimestamp' | tail -30
+ exit 1
+ }
+
+ - name: Debug cluster state after chart testing failure
+ if: failure() && steps.list-changed.outputs.changed == 'true'
+ run: |
+ echo "=== DEBUG: Cluster state after failure ==="
+ kubectl get pods --all-namespaces -o wide
+ kubectl get services --all-namespaces
+ kubectl get events --all-namespaces --sort-by='.lastTimestamp' | tail -20
+ echo ""
+
+ echo "=== DEBUG: Chart-related pods and services ==="
+ kubectl get pods -A | grep -E "(llm-d|test)" || echo "No chart-related pods found"
+ kubectl get svc -A | grep -E "(llm-d|test)" || echo "No chart-related services found"
+ echo ""
+
+ echo "=== DEBUG: Pod logs for failed pods ==="
+ for pod in $(kubectl get pods -A --field-selector=status.phase=Failed -o jsonpath='{range .items[*]}{.metadata.namespace}{" "}{.metadata.name}{"\n"}{end}'); do
+ namespace=$(echo $pod | cut -d' ' -f1)
+ name=$(echo $pod | cut -d' ' -f2)
+ echo "--- Logs for $namespace/$name ---"
+ kubectl logs -n $namespace $name --previous || kubectl logs -n $namespace $name || echo "No logs available"
+ echo ""
+ done
diff --git a/_typos.toml b/_typos.toml
new file mode 100644
index 0000000..2e85b91
--- /dev/null
+++ b/_typos.toml
@@ -0,0 +1,6 @@
+# Configuration for typos spell checker
+# Allow specific sequences that appear in base64 encoded SVG icons
+[default.extend-words]
+# These are valid base64 sequences, not typos
+"OT" = "OT"
+"Ba" = "Ba"
diff --git a/charts/llm-d/Chart.yaml b/charts/llm-d/Chart.yaml
index 4dd36ac..93fc9e8 100644
--- a/charts/llm-d/Chart.yaml
+++ b/charts/llm-d/Chart.yaml
@@ -1,10 +1,109 @@
+---
apiVersion: v2
name: llm-d
type: application
-version: 1.0.18
+version: 1.0.19
appVersion: "0.1"
-icon: data:image/svg+xml;base64,PD94bWwgdmVyc2lvbj0iMS4wIiBlbmNvZGluZz0iVVRGLTgiIHN0YW5kYWxvbmU9Im5vIj8+CjwhLS0gQ3JlYXRlZCB3aXRoIElua3NjYXBlIChodHRwOi8vd3d3Lmlua3NjYXBlLm9yZy8pIC0tPgoKPHN2ZwogICB3aWR0aD0iODBtbSIKICAgaGVpZ2h0PSI4MG1tIgogICB2aWV3Qm94PSIwIDAgODAuMDAwMDA0IDgwLjAwMDAwMSIKICAgdmVyc2lvbj0iMS4xIgogICBpZD0ic3ZnMSIKICAgeG1sOnNwYWNlPSJwcmVzZXJ2ZSIKICAgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIgogICB4bWxuczpzdmc9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj48ZGVmcwogICAgIGlkPSJkZWZzMSIgLz48cGF0aAogICAgIHN0eWxlPSJmaWxsOiM0ZDRkNGQ7ZmlsbC1vcGFjaXR5OjE7c3Ryb2tlOiM0ZDRkNGQ7c3Ryb2tlLXdpZHRoOjIuMzQyOTk7c3Ryb2tlLW1pdGVybGltaXQ6MTA7c3Ryb2tlLWRhc2hhcnJheTpub25lIgogICAgIGQ9Im0gNTEuNjI5Nyw0My4wNzY3IGMgLTAuODI1NCwwIC0xLjY1MDgsMC4yMTI4IC0yLjM4ODEsMC42Mzg0IGwgLTEwLjcyNjksNi4xOTI2IGMgLTEuNDc2MywwLjg1MjIgLTIuMzg3MywyLjQzNDUgLTIuMzg3Myw0LjEzNTQgdiAxMi4zODQ3IGMgMCwxLjcwNDEgMC45MTI4LDMuMjg1NCAyLjM4ODUsNC4xMzU4IGwgMTAuNzI1Nyw2LjE5MTggYyAxLjQ3NDcsMC44NTEzIDMuMzAxNSwwLjg1MTMgNC43NzYyLDAgTCA2NC43NDQ3LDcwLjU2MzIgQyA2Ni4yMjEsNjkuNzExIDY3LjEzMiw2OC4xMjg4IDY3LjEzMiw2Ni40Mjc4IFYgNTQuMDQzMSBjIDAsLTEuNzAzNiAtMC45MTIzLC0zLjI4NDggLTIuMzg3MywtNC4xMzU0IGwgLThlLTQsLTRlLTQgLTEwLjcyNjEsLTYuMTkyMiBjIC0wLjczNzQsLTAuNDI1NiAtMS41NjI3LC0wLjYzODQgLTIuMzg4MSwtMC42Mzg0IHogbSAwLDMuNzM5NyBjIDAuMTc3NCwwIDAuMzU0NiwwLjA0NyAwLjUxNjcsMC4xNDA2IGwgMTAuNzI3Niw2LjE5MjUgNGUtNCw0ZS00IGMgMC4zMTkzLDAuMTg0IDAuNTE0MywwLjUyMDMgMC41MTQzLDAuODkzMiB2IDEyLjM4NDcgYyAwLDAuMzcyMSAtMC4xOTI3LDAuNzA3MyAtMC41MTU1LDAuODkzNiBsIC0xMC43MjY4LDYuMTkyMiBjIC0wLjMyNDMsMC4xODcyIC0wLjcwOTEsMC4xODcyIC0xLjAzMzQsMCBsIC0xMC43MjcyLC02LjE5MjYgLThlLTQsLTRlLTQgQyA0MC4wNjU3LDY3LjEzNjcgMzkuODcwNyw2Ni44MDA3IDM5Ljg3MDcsNjYuNDI3OCBWIDU0LjA0MzEgYyAwLC0wLjM3MiAwLjE5MjcsLTAuNzA3NyAwLjUxNTUsLTAuODk0IEwgNTEuMTEzLDQ2Ljk1NyBjIDAuMTYyMSwtMC4wOTQgMC4zMzkzLC0wLjE0MDYgMC41MTY3LC0wLjE0MDYgeiIKICAgICBpZD0icGF0aDEyMiIgLz48cGF0aAogICAgIGlkPSJwYXRoMTI0IgogICAgIHN0eWxlPSJmaWxsOiM0ZDRkNGQ7ZmlsbC1vcGFjaXR5OjE7c3Ryb2tlOiM0ZDRkNGQ7c3Ryb2tlLXdpZHRoOjIuMzQyOTk7c3Ryb2tlLWxpbmVjYXA6cm91bmQ7c3Ryb2tlLW1pdGVybGltaXQ6MTA7c3Ryb2tlLWRhc2hhcnJheTpub25lIgogICAgIGQ9Im0gNjMuMzg5MDE4LDM0LjgxOTk1OCB2IDIyLjM0NDE3NSBhIDEuODcxNTQzLDEuODcxNTQzIDAgMCAwIDEuODcxNTQxLDEuODcxNTQxIDEuODcxNTQzLDEuODcxNTQzIDAgMCAwIDEuODcxNTQxLC0xLjg3MTU0MSBWIDMyLjY1ODY0NyBaIiAvPjxwYXRoCiAgICAgc3R5bGU9ImZpbGw6IzdmMzE3ZjtmaWxsLW9wYWNpdHk6MTtzdHJva2U6IzdmMzE3ZjtzdHJva2Utd2lkdGg6Mi4yNDM7c3Ryb2tlLW1pdGVybGltaXQ6MTA7c3Ryb2tlLWRhc2hhcnJheTpub25lO3N0cm9rZS1vcGFjaXR5OjEiCiAgICAgZD0ibSAzNi43MzQyLDI4LjIzNDggYyAwLjQwOTcsMC43MTY1IDEuMDA0MiwxLjMyNzMgMS43Mzk4LDEuNzU2MSBsIDEwLjcwMSw2LjIzNzIgYyAxLjQ3MjcsMC44NTg0IDMuMjk4NCwwLjg2MzcgNC43NzUsMC4wMTkgbCAxMC43NTA2LC02LjE0ODUgYyAxLjQ3OTMsLTAuODQ2IDIuMzk4NywtMi40MjM0IDIuNDA0NCwtNC4xMjY3IGwgMC4wNSwtMTIuMzg0NCBjIDAuMDEsLTEuNzAyOSAtMC45LC0zLjI4ODYgLTIuMzcxMiwtNC4xNDYxIEwgNTQuMDgzMiwzLjIwNCBDIDUyLjYxMDUsMi4zNDU1IDUwLjc4NDcsMi4zNDAyIDQ5LjMwODIsMy4xODUgTCAzOC41NTc1LDkuMzMzNSBjIC0xLjQ3ODksMC44NDU4IC0yLjM5ODQsMi40MjI3IC0yLjQwNDYsNC4xMjU0IGwgMTBlLTUsOGUtNCAtMC4wNSwxMi4zODUgYyAwLDAuODUxNSAwLjIyMTYsMS42NzM1IDAuNjMxNCwyLjM5IHogbSAzLjI0NjMsLTEuODU2NiBjIC0wLjA4OCwtMC4xNTQgLTAuMTM1MywtMC4zMzExIC0wLjEzNDUsLTAuNTE4MyBsIDAuMDUsLTEyLjM4NjYgMmUtNCwtNmUtNCBjIDAsLTAuMzY4NCAwLjE5NjMsLTAuNzA0NyAwLjUyLC0wLjg4OTkgTCA1MS4xNjY5LDYuNDM0MyBjIDAuMzIyOSwtMC4xODQ3IDAuNzA5NywtMC4xODM4IDEuMDMxNiwwIGwgMTAuNzAwNiw2LjIzNzQgYyAwLjMyMzUsMC4xODg1IDAuNTE0NSwwLjUyMjYgMC41MTMsMC44OTcgbCAtMC4wNSwxMi4zODYyIHYgOWUtNCBjIDAsMC4zNjg0IC0wLjE5NiwwLjcwNDUgLTAuNTE5NywwLjg4OTYgbCAtMTAuNzUwNiw2LjE0ODUgYyAtMC4zMjMsMC4xODQ3IC0wLjcxMDEsMC4xODQgLTEuMDMyLDAgTCA0MC4zNTkyLDI2Ljc1NjcgYyAtMC4xNjE3LC0wLjA5NCAtMC4yOTA1LC0wLjIyNDggLTAuMzc4NSwtMC4zNzg4IHoiCiAgICAgaWQ9InBhdGgxMjYiIC8+PHBhdGgKICAgICBpZD0icGF0aDEyOSIKICAgICBzdHlsZT0iZmlsbDojN2YzMTdmO2ZpbGwtb3BhY2l0eToxO3N0cm9rZTojN2YzMTdmO3N0cm9rZS13aWR0aDoyLjI0MztzdHJva2UtbGluZWNhcDpyb3VuZDtzdHJva2UtbWl0ZXJsaW1pdDoxMDtzdHJva2UtZGFzaGFycmF5Om5vbmU7c3Ryb2tlLW9wYWNpdHk6MSIKICAgICBkPSJNIDIzLjcyODgzNSwyMi4xMjYxODUgNDMuMTI0OTI0LDExLjAzMzIyIEEgMS44NzE1NDMsMS44NzE1NDMgMCAwIDAgNDMuODIwMzkxLDguNDc5NDY2NiAxLjg3MTU0MywxLjg3MTU0MyAwIDAgMCA0MS4yNjY2MzcsNy43ODM5OTk4IEwgMTkuOTk0NDAxLDE5Ljk0OTk2NyBaIiAvPjxwYXRoCiAgICAgc3R5bGU9ImZpbGw6IzdmMzE3ZjtmaWxsLW9wYWNpdHk6MTtzdHJva2U6IzdmMzE3ZjtzdHJva2Utd2lkdGg6Mi4yNDM7c3Ryb2tlLW1pdGVybGltaXQ6MTA7c3Ryb2tlLWRhc2hhcnJheTpub25lO3N0cm9rZS1vcGFjaXR5OjEiCiAgICAgZD0ibSAzMS40NzY2LDQ4LjQ1MDQgYyAwLjQxNDUsLTAuNzEzOCAwLjY0NSwtMS41MzQ0IDAuNjQ3MiwtMi4zODU4IGwgMC4wMzIsLTEyLjM4NiBjIDAsLTEuNzA0NiAtMC45MDY0LC0zLjI4NyAtMi4zNzczLC00LjE0MTIgTCAxOS4wNjg4LDIzLjMxOCBjIC0xLjQ3MzcsLTAuODU1OCAtMy4yOTk1LC0wLjg2MDUgLTQuNzc2LC0wLjAxMSBMIDMuNTUyMSwyOS40NzI3IGMgLTEuNDc2OCwwLjg0NzggLTIuMzk0MiwyLjQyNzUgLTIuMzk4Niw0LjEzMDQgbCAtMC4wMzIsMTIuMzg1NyBjIDAsMS43MDQ3IDAuOTA2MywzLjI4NzEgMi4zNzcyLDQuMTQxMiBsIDEwLjcwOTgsNi4yMTk1IGMgMS40NzMyLDAuODU1NSAzLjI5ODcsMC44NjA2IDQuNzc1LDAuMDEyIGwgNmUtNCwtNGUtNCAxMC43NDEyLC02LjE2NTggYyAwLjczODUsLTAuNDIzOSAxLjMzNjksLTEuMDMwOCAxLjc1MTUsLTEuNzQ0NSB6IG0gLTMuMjM0LC0xLjg3ODEgYyAtMC4wODksMC4xNTM0IC0wLjIxODYsMC4yODMxIC0wLjM4MSwwLjM3NjMgbCAtMTAuNzQyMyw2LjE2NyAtNmUtNCwyZS00IGMgLTAuMzE5NCwwLjE4MzYgLTAuNzA4MiwwLjE4MzQgLTEuMDMwNywwIEwgNS4zNzgyLDQ2Ljg5NjQgQyA1LjA1NjUsNDYuNzA5NiA0Ljg2MzMsNDYuMzc0NSA0Ljg2NDMsNDYuMDAxOSBsIDAuMDMyLC0xMi4zODU4IGMgMCwtMC4zNzQ0IDAuMTk0MiwtMC43MDcyIDAuNTE4OSwtMC44OTM2IGwgMTAuNzQyMiwtNi4xNjY3IDZlLTQsLTRlLTQgYyAwLjMxOTQsLTAuMTgzNyAwLjcwNzgsLTAuMTgzNyAxLjAzMDMsMCBsIDEwLjcwOTgsNi4yMTk0IGMgMC4zMjE3LDAuMTg2OSAwLjUxNTIsMC41MjIxIDAuNTE0MiwwLjg5NDggbCAtMC4wMzIsMTIuMzg1NiBjIC00ZS00LDAuMTg3MiAtMC4wNDksMC4zNjQxIC0wLjEzNzksMC41MTc0IHoiCiAgICAgaWQ9InBhdGgxMzkiIC8+PHBhdGgKICAgICBpZD0icGF0aDE0MSIKICAgICBzdHlsZT0iZmlsbDojN2YzMTdmO2ZpbGwtb3BhY2l0eToxO3N0cm9rZTojN2YzMTdmO3N0cm9rZS13aWR0aDoyLjI0MztzdHJva2UtbGluZWNhcDpyb3VuZDtzdHJva2UtbWl0ZXJsaW1pdDoxMDtzdHJva2UtZGFzaGFycmF5Om5vbmU7c3Ryb2tlLW9wYWNpdHk6MSIKICAgICBkPSJNIDMyLjcxMTI5OSw2Mi43NjU3NDYgMTMuMzg4OTY5LDUxLjU0NDc5OCBhIDEuODcxNTQzLDEuODcxNTQzIDAgMCAwIC0yLjU1ODI5NSwwLjY3ODU2OCAxLjg3MTU0MywxLjg3MTU0MyAwIDAgMCAwLjY3ODU2OSwyLjU1ODI5NiBsIDIxLjE5MTM0NCwxMi4zMDYzMyB6IiAvPjwvc3ZnPgo=
-description: llm-d is a Kubernetes-native high-performance distributed LLM inference framework
+# typos:disable
+icon: >-
+ data:image/svg+xml;base64,PD94bWwgdmVyc2lvbj0iMS4wIiBlbmNvZGluZz0i
+ VVRGLTgiIHN0YW5kYWxvbmU9Im5vIj8+CjwhLS0gQ3JlYXRlZCB3aXRoIElua3Nj
+ YXBlIChodHRwOi8vd3d3Lmlua3NjYXBlLm9yZy8pIC0tPgoKPHN2ZwogICB3aWR0
+ aD0iODBtbSIKICAgaGVpZ2h0PSI4MG1tIgogICB2aWV3Qm94PSIwIDAgODAuMDAw
+ MDA0IDgwLjAwMDAwMSIKICAgdmVyc2lvbj0iMS4xIgogICBpZD0ic3ZnMSIKICAg
+ eG1sOnNwYWNlPSJwcmVzZXJ2ZSIKICAgeG1sbnM9Imh0dHA6Ly93d3cudzMub3Jn
+ LzIwMDAvc3ZnIgogICB4bWxuczpzdmc9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAv
+ c3ZnIj48ZGVmcwogICAgIGlkPSJkZWZzMSIgLz48cGF0aAogICAgIHN0eWxlPSJm
+ aWxsOiM0ZDRkNGQ7ZmlsbC1vcGFjaXR5OjE7c3Ryb2tlOiM0ZDRkNGQ7c3Ryb2tl
+ LXdpZHRoOjIuMzQyOTk7c3Ryb2tlLW1pdGVybGltaXQ6MTA7c3Ryb2tlLWRhc2hh
+ cnJheTpub25lIgogICAgIGQ9Im0gNTEuNjI5Nyw0My4wNzY3IGMgLTAuODI1NCww
+ IC0xLjY1MDgsMC4yMTI4IC0yLjM4ODEsMC42Mzg0IGwgLTEwLjcyNjksNi4xOTI2
+ IGMgLTEuNDc2MywwLjg1MjIgLTIuMzg3MywyLjQzANDUgLTIuMzg3Myw0LjEzNTQg
+ diAxMi4zODQ3IGMgMCwxLjcwANDEgMC45MTI4LDMuMjg1NCAyLjM4ODUsNC4xMzU4
+ IGwgMTAuNzI1Nyw2LjE5MTggYyAxLjQ3NDcsMC44NTEzIDMuMzAxNSwwLjg1MTMg
+ NC43NzYyLDAgTCA2NC43NDQ3LDcwLjU2MzIgQyA2Ni4yMjEsNjkuNzExIDY3LjEz
+ Miw2OC4xMjg4IDY3LjEzMiw2Ni40Mjc4IFYgNTQuMDQzMSBjIDAsLTEuNzAzNiAt
+ MC45MTIzLC0zLjI4NDggLTIuMzg3MywtNC4xMzU0IGwgLThlLTQsLTRlLTQgLTEw
+ LjcyNjEsLTYuMTkyMiBjIC0wLjczNzQsLTAuNDI1NiAtMS41NjI3LC0wLjYzODQg
+ LTIuMzg4MSwtMC42Mzg0IHogbSAwLDMuNzM5NyBjIDAuMTc3NCwwIDAuMzU0Niww
+ LjA0NyAwLjUxNjcsMC4xNDA2IGwgMTAuNzI3Niw2LjE5MjUgNGUtNCw0ZS00IGMg
+ MC4zMTkzLDAuMTg0IDAuNTE0MywwLjUyMDMgMC41MTQzLDAuODkzMiB2IDEyLjM4
+ NDcgYyAwLDAuMzcyMSAtMC4xOTI3LDAuNzA3MyAtMC41MTU1LDAuODkzNiBsIC0x
+ MC43MjY4LDYuMTkyMiBjIC0wLjMyANDMsMC4xODcyIC0wLjcwOTEsMC4xODcyIC0x
+ LjAzMzQsMCBsIC0xMC43MjcyLC02LjE5MjYgLThlLTQsLTRlLTQgQyA0MC4wNjU3
+ LDY3LjEzNjcgMzkuODcwNyw2Ni44MDA3IDM5Ljg3MDcsNjYuNDI3OCBWIDU0LjA0
+ MzEgYyAwLC0wLjM3MiAwLjE5MjcsLTAuNzA3NyAwLjUxNTUsLTAuODk0IEwgNTEu
+ MTEzLDQ2Ljk1NyBjIDAuMTYyMSwtMC4wOTQgMC4zMzkzLC0wLjE0MDYgMC41MTY3
+ LC0wLjE0MDYgeiIKICAgICBpZD0icGF0aDEyMiIgLz48cGF0aAogICAgIGlkPSJw
+ YXRoMTI0IgogICAgIHN0eWxlPSJmaWxsOiM0ZDRkNGQ7ZmlsbC1vcGFjaXR5OjE7
+ c3Ryb2tlOiM0ZDRkNGQ7c3Ryb2tlLXdpZHRoOjIuMzQyOTk7c3Ryb2tlLWxpbmVj
+ YXA6cm91bmQ7c3Ryb2tlLW1pdGVybGltaXQ6MTA7c3Ryb2tlLWRhc2hhcnJheTpu
+ b25lIgogICAgIGQ9Im0gNjMuMzg5MDE4LDM0LjgxOTk1OCB2IDItLjM0NDE3NSBh
+ IDEuODcxNTQzLDEuODcxNTQzIDAgMCAwIDEuODcxNTQxLDEuODcxNTQxIDEuODcx
+ NTQzLDEuODcxNTQzIDAgMCAwIDEuODcxNTQxLC0xLjg3MTU0MSBWIDMyLjY1BODY0
+ NyBaIiAvPjxwYXRoCiAgICAgc3R5bGU9ImZpbGw6IzdmMzE3ZjtmaWxsLW9wYWNp
+ dHk6MTtzdHJva2U6IzdmMzE3ZjtzdHJva2Utd2lkdGg6Mi4yNDM7c3Ryb2tlLW1p
+ dGVybGltaXQ6MTA7c3Ryb2tlLWRhc2hhcnJheTpub25lO3N0cm9rZS1vcGFjaXR5
+ OjEiCiAgICAgZD0ibSAzNi43MzQyLDI4LjIzNDggYyAwLjQwOTcsMC43MTY1IDEu
+ MDA0MiwxLjMyNzMgMS43Mzk4LDEuNzU2MSBsIDEwLjcwMSw2LjIzNzIgYyAxLjQ3
+ MjcsMC44NTg0IDMuMjk4NCwwLjg2MzcgNC43NzUsMC4wMTkgbCAxMC43NTA2LC02
+ LjE0ODUgYyAxLjQ3OTMsLTAuODQ2IDIuMzk4NywtMi40MjM0IDIuNDA0NCwtNC4x
+ MjY3IGwgMC4wNSwtMTIuMzg0NCBjIDAuMDEsLTEuNzAyOSAtMC45LC0zLjI4ODYg
+ LTIuMzcxMiwtNC4xANDYxIEwgNTQuMDgzMiwzLjIwNCBDIDUyLjYxMDUsMi4zNDU1
+ IDUwLjc4NDcsMi4zANDAyIDQ5LjMwODIsMy4xODUgTCAzOC41NTc1LDkuMzMzNSBj
+ IC0xLjQ3ODksMC44NDU4IC0yLjM5ODQsMi40MjI3IC0yLjQwANDYsNC4xMjU0IGwg
+ MTBlLTUsOGUtNCAtMC4wNSwxMi4zODUgYyAwLDAuODUxNSAwLjItMTYsMS42NzM1
+ IDAuNjMxNCwyLjM5IHogbSAzLjI0NjMsLTEuODU2NiBjIC0wLjA4OCwtMC4xNTQg
+ LTAuMTM1MywtMC4zMzExIC0wLjEzANDUsLTAuNTE4MyBsIDAuMDUsLTEyLjM4NjYg
+ MmUtNCwtNmUtNCBjIDAsLTAuMzY4NCAwLjE5NjMsLTAuNzA0NyAwLjUyLC0wLjg4
+ OTkgTCA1MS4xNjY5LDYuNDM0MyBjIDAuMzItOSwtMC4xODQ3IDAuNzA5NywtMC4x
+ ODM4IDEuMDMxNiwwIGwgMTAuNzAwNiw2LjIzNzQgYyAwLjMyMzUsMC4xODg1IDAu
+ NTE0NSwwLjUyMjYgMC41MTMsMC44OTcgbCAtMC4wNSwxMi4zODYyIHYgOWUtNCBj
+ IDAsMS4zNjg0IC0wLjE5NiwwLjcwANDUgLTAuNTE5NywwLjg4OTYgbCAtMTAuNzUw
+ Niw2LjE0ODUgYyAtMC4zMjMsMC4xODQ3IC0wLjcxMDEsMC4xODQgLTEuMDMyLDAg
+ TCA0MC4zNTkyLDI2Ljc1NjcgYyAtMC4xNjE3LC0wLjA5NCAtMC4yOTA1LC0wLjIt
+ NDggLTAuMzc4NSwtMC4zNzg4IHoiCiAgICAgaWQ9InBhdGgxMjYiIC8+PHBhdGgK
+ ICAgICBpZD0icGF0aDEyOSIKICAgICBzdHlsZT0iZmlsbDojN2YzMTdmO2ZpbGwt
+ b3BhY2l0eToxO3N0cm9rZTojN2YzMTdmO3N0cm9rZS13aWR0aDoyLjI0MztzdHJv
+ a2UtbGluZWNhcDpyb3VuZDtzdHJva2UtbWl0ZXJsaW1pdDoxMDtzdHJva2UtZGFz
+ aGFycmF5Om5vbmU7c3Ryb2tlLW9wYWNpdHk6MSIKICAgICBkPSJNIDIzLjcyODgz
+ NSwyMi4xMjYxODUgANDMuMTI0OTI0LDExLjAzMzItIEEgMS44NzE1ANDMsMS44NzE1
+ ANDMgMCAwIDAgANDMuODIwMzkxLDguNDc5NDY2NiAxLjg3MTU0MywxLjg3MTU0MyAw
+ IDAgMCA0MS4yNjY2MzcsNy43ODM5OTk4IEwgMTkuOTk0ANDAxLDE5Ljk0OTk2NyBa
+ IiAvPjxwYXRoCiAgICAgc3R5bGU9ImZpbGw6IzdmMzE3ZjtmaWxsLW9wYWNpdHk6
+ MTtzdHJva2U6IzdmMzE3ZjtzdHJva2Utd2lkdGg6Mi4yNDM7c3Ryb2tlLW1pdGVy
+ bGltaXQ6MTA7c3Ryb2tlLWRhc2hhcnJheTpub25lO3N0cm9rZS1vcGFjaXR5OjEi
+ CiAgICAgZD0ibSAzMS40NzY2LDQ4LjQ1MDQgYyAwLjQxANDUsLTAuNzEzOCAwLjY0
+ NSwtMS41MzQ0IDAuNjQ3MiwtMi4zODU4IGwgMC4wMzIsLTEyLjM4NiBjIDAsLTEu
+ NzA0NiAtMC45MDY0LC0zLjI4NyAtMi4zNzczLC00LjE0MTIgTCAxOS4wNjg4LDIz
+ LjMxOCBjIC0xLjQ3MzcsLTAuODU1OCAtMy4yOTk1LC0wLjg2MDUgLTQuNzc2LC0w
+ LjAxMSBMIDMuNTUyMSwyOS40NzI3IGMgLTEuNDc2OCwwLjg0NzggLTIuMzk0Miwy
+ LjQyNzUgLTIuMzk4Niw0LjEzMDQgbCAtMC4wMzIsMTIuMzg1NyBjIDAsMS43MDQ3
+ IDAuOTA2MywzLjI4NzEgMi4zNzcyLDQuMTQxMiBsIDEwLjcwOTgsNi4yMTk1IGMg
+ MS40NzMyLDAuODU1NSAzLjI5ODcsMC44NjA2IDQuNzc1LDAuMDEyIGwgNmUtNCwt
+ NGUtNCAxMC43ANDEyLC02LjE2NTggYyAwLjczODUsLTAuANDIzOSAxLjMzNjksLTEu
+ MDMwOCAxLjc1MTUsLTEuNzQ0NSB6IG0gLTMuMjM0LC0xLjg3ODEgYyAtMC4wODks
+ MC4xNTM0IC0wLjIxODYsMC4yODMxIC0wLjM4MSwwLjM3NjMgbCAtMTAuNzQyMyw2
+ LjE2NyAtNmUtNCwyZS00IGMgLTAuMzE5NCwwLjE4MzYgLTAuNzA4MiwwLjE4MzQg
+ LTEuMDMwNywwIEwgNS4zNzgyLDQ2Ljg5NjQgQyA1LjA1NjUsANDYuNzA5NiA0Ljg2
+ MzMsANDYuMzc0NSA0Ljg2ANDMsANDYuMDAxOSBsIDAuMDMyLC0xMi4zODU4IGMgMCwt
+ MC4zNzQ0IDAuMTk0MiwtMC43MDcyIDAuNTE4OSwtMC44OTM2IGwgMTAuNzQyMiwt
+ Ni4xNjY3IDZlLTQsLTRlLTQgYyAwLjMxOTQsLTAuMTgzNyAwLjcwNzgsLTAuMTgz
+ NyAxLjAzMDMsMCBsIDEwLjcwOTgsNi4yMTk0IGMgMC4zMjE3LDAuMTg2OSAwLjUx
+ NTIsMC41MjIxIDAuNTE0MiwwLjg5NDggbCAtMC4wMzIsMTIuMzg1NiBjIC00ZS00
+ LDAuMTg3MiAtMC4wNDksMC4zNjQxIC0wLjEzNzksMC41MTc0IHoiCiAgICAgaWQ9
+ InBhdGgxMzkiIC8+PHBhdGgKICAgICBpZD0icGF0aDE0MSIKICAgICBzdHlsZT0i
+ ZmlsbDojN2YzMTdmO2ZpbGwtb3BhY2l0eToxO3N0cm9rZTojN2YzMTdmO3N0cm9r
+ ZS13aWR0aDoyLjI0MztzdHJva2UtbGluZWNhcDpyb3VuZDtzdHJva2UtbWl0ZXJs
+ aW1pdDoxMDtzdHJva2UtZGFzaGFycmF5Om5vbmU7c3Ryb2tlLW9wYWNpdHk6MSIK
+ ICAgICBkPSJNIDMyLjcxMTI5OSw2Mi43NjU3ANDYgMTMuMzg4OTY5LDUxLjU0NDc5
+ OCBhIDEuODcxNTQzLDEuODcxNTQzIDAgMCAwIC0yLjU1ODI5NSwwLjY3ODU2OCAx
+ Ljg3MTU0MywxLjg3MTU0MyAwIDAgMCAwLjY3ODU2OSwyLjU1ODI5NiBsIDIxLjE5
+ MTM0NCwxMi4zMDYzMyBaIiAvPjwvc3ZnPgo=
+# typos:enable
+description: >-
+ llm-d is a Kubernetes-native high-performance distributed LLM inference
+ framework
keywords:
- vllm
- llm-d
diff --git a/charts/llm-d/README.md b/charts/llm-d/README.md
index 19ec6cf..1374564 100644
--- a/charts/llm-d/README.md
+++ b/charts/llm-d/README.md
@@ -1,7 +1,7 @@
# llm-d Helm Chart
-
+

llm-d is a Kubernetes-native high-performance distributed LLM inference framework
@@ -271,15 +271,27 @@ Kubernetes: `>= 1.30.0-0`
| modelservice.tolerations | Node tolerations for server scheduling to nodes with taints
Ref: https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration/ | list | `[]` |
| modelservice.topologySpreadConstraints | Topology Spread Constraints for pod assignment
Ref: https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#pod-topology-spread-constraints | list | `[]` |
| modelservice.vllm | vLLM container options | object | See below |
+| modelservice.vllm.extraArgs | Additional command line arguments for vLLM | list | `[]` |
+| modelservice.vllm.extraEnvVars | Additional environment variables for vLLM containers | list | `[]` |
| modelservice.vllm.image | vLLM image used in ModelService CR presets | object | See below |
| modelservice.vllm.image.imagePullPolicy | Specify a imagePullPolicy | string | `"IfNotPresent"` |
| modelservice.vllm.image.pullSecrets | Optionally specify an array of imagePullSecrets (evaluated as templates) | list | `[]` |
| modelservice.vllm.image.registry | llm-d image registry | string | `"ghcr.io"` |
| modelservice.vllm.image.repository | llm-d image repository | string | `"llm-d/llm-d"` |
| modelservice.vllm.image.tag | llm-d image tag | string | `"0.0.8"` |
+| modelservice.vllm.loadFormat | Load format for vLLM model loading
When set to "runai_streamer", enables Run:AI Model Streamer for loading models from object storage
Options: "", "runai_streamer", "runai_streamer_sharded" | string | `""` |
| modelservice.vllm.logLevel | Log level to run VLLM with
VLLM supports standard python log-levels, see: https://docs.python.org/3/library/logging.html#logging-levels
Options: "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL" | string | `"INFO"` |
| modelservice.vllm.metrics | Enable metrics gathering via podMonitor / ServiceMonitor | object | `{"enabled":true}` |
| modelservice.vllm.metrics.enabled | Enable metrics scraping from prefill & decode services | bool | `true` |
+| modelservice.vllm.runaiStreamer | RunAI Model Streamer configuration options
These options are used when loadFormat is set to "runai_streamer" or "runai_streamer_sharded" | object | See below |
+| modelservice.vllm.runaiStreamer.chunkBytesize | Controls the maximum size of memory each OS thread reads from the file at once
Positive integer in bytes
Default: 2,097,152 (2 MiB) for file system, 8,388,608 (8 MiB) for object store | string | `""` |
+| modelservice.vllm.runaiStreamer.concurrency | Controls the level of concurrency and number of OS threads reading tensors
Positive integer | int | `16` |
+| modelservice.vllm.runaiStreamer.memoryLimit | Controls the size of the CPU Memory buffer to which tensors are read
Integer: -1 (UNLIMITED), 0 (MIN), or positive integer in bytes | int | `-1` |
+| modelservice.vllm.runaiStreamer.pattern | Custom naming pattern for sharded model files
Used with runai_streamer_sharded load format
Example: "custom-model-rank-{rank}-part-{part}.safetensors" | string | `""` |
+| modelservice.vllm.runaiStreamer.s3 | S3/Object store configuration | object | See below |
+| modelservice.vllm.runaiStreamer.s3.caBundlePath | Path to a certificate bundle to use for HTTPS certificate validation | string | `""` |
+| modelservice.vllm.runaiStreamer.s3.endpointUrl | Override url endpoint for reading from S3 compatible object store
Mandatory for S3-compatible stores like GCS, Minio | string | `""` |
+| modelservice.vllm.runaiStreamer.s3.useVirtualAddressing | Controls parsing the url endpoint for reading from object store
Boolean: true enables virtual addressing, false uses path-style | bool | `true` |
| nameOverride | String to partially override common.names.fullname | string | `""` |
| redis | Bitnami/Redis chart configuration | object | Use sane defaults for minimal Redis deployment |
| sampleApplication | Sample application deploying a p-d pair of specific model | object | See below |
diff --git a/charts/llm-d/ci/default-values.yaml b/charts/llm-d/ci/default-values.yaml
index 5ed6fac..35f9785 100644
--- a/charts/llm-d/ci/default-values.yaml
+++ b/charts/llm-d/ci/default-values.yaml
@@ -1,5 +1,5 @@
test:
- enabled: true
+ enabled: false
sampleApplication:
enabled: false
@@ -10,6 +10,7 @@ redis:
enabled: false
modelservice:
+ enabled: false
metrics:
enabled: false
epp:
@@ -22,3 +23,6 @@ modelservice:
tolerations: []
decode:
tolerations: []
+
+gateway:
+ enabled: false
diff --git a/charts/llm-d/ci/runai-streamer-values.yaml b/charts/llm-d/ci/runai-streamer-values.yaml
new file mode 100644
index 0000000..160ce09
--- /dev/null
+++ b/charts/llm-d/ci/runai-streamer-values.yaml
@@ -0,0 +1,51 @@
+test:
+ enabled: false
+
+sampleApplication:
+ enabled: false
+
+modelservice:
+ enabled: false
+ metrics:
+ enabled: false
+ vllm:
+ # Test loadFormat configuration
+ loadFormat: "runai_streamer"
+ # Test runai_streamer specific configurations
+ runaiStreamer:
+ concurrency: 32
+ chunkBytesize: "4194304" # 4 MiB
+ memoryLimit: 1073741824 # 1 GiB
+ pattern: "custom-model-rank-{rank}-part-{part}.safetensors"
+ s3:
+ endpointUrl: "https://test-s3.example.com"
+ caBundlePath: "/etc/ssl/certs/ca-bundle.crt"
+ useVirtualAddressing: false
+ # Test extra args and env vars
+ extraArgs:
+ - "--custom-arg1"
+ - "value1"
+ - "--custom-arg2"
+ extraEnvVars:
+ - name: TEST_ENV_VAR
+ value: "test-value"
+ - name: ANOTHER_TEST_VAR
+ value: "another-value"
+ epp:
+ defaultEnvVarsOverride:
+ - name: PD_ENABLED
+ value: 'false'
+ - name: ENABLE_KVCACHE_AWARE_SCORER
+ value: "false"
+ prefill:
+ tolerations: []
+ decode:
+ tolerations: []
+
+gateway:
+ enabled: false
+
+redis:
+ master:
+ persistence:
+ enabled: false
diff --git a/charts/llm-d/templates/modelservice/_helpers.tpl b/charts/llm-d/templates/modelservice/_helpers.tpl
index cbaf0bf..638cffa 100644
--- a/charts/llm-d/templates/modelservice/_helpers.tpl
+++ b/charts/llm-d/templates/modelservice/_helpers.tpl
@@ -110,3 +110,51 @@ Return the proper Docker Image Registry Secret Names
value: {{ $v }}
{{- end }}
{{- end }}
+
+{{/*
+Return the RunAI Streamer environment variables when loadFormat is runai_streamer
+*/}}
+{{- define "modelservice.runaiStreamer.envVars" -}}
+{{- if or (eq .Values.modelservice.vllm.loadFormat "runai_streamer") (eq .Values.modelservice.vllm.loadFormat "runai_streamer_sharded") }}
+- name: RUNAI_STREAMER_CONCURRENCY
+ value: {{ .Values.modelservice.vllm.runaiStreamer.concurrency | quote }}
+{{- if .Values.modelservice.vllm.runaiStreamer.chunkBytesize }}
+- name: RUNAI_STREAMER_CHUNK_BYTESIZE
+ value: {{ .Values.modelservice.vllm.runaiStreamer.chunkBytesize | quote }}
+{{- end }}
+- name: RUNAI_STREAMER_MEMORY_LIMIT
+ value: {{ .Values.modelservice.vllm.runaiStreamer.memoryLimit | quote }}
+{{- if .Values.modelservice.vllm.runaiStreamer.s3.endpointUrl }}
+- name: AWS_ENDPOINT_URL
+ value: {{ .Values.modelservice.vllm.runaiStreamer.s3.endpointUrl | quote }}
+{{- end }}
+{{- if .Values.modelservice.vllm.runaiStreamer.s3.caBundlePath }}
+- name: AWS_CA_BUNDLE
+ value: {{ .Values.modelservice.vllm.runaiStreamer.s3.caBundlePath | quote }}
+{{- end }}
+- name: RUNAI_STREAMER_S3_USE_VIRTUAL_ADDRESSING
+ value: {{ .Values.modelservice.vllm.runaiStreamer.s3.useVirtualAddressing | ternary "1" "0" }}
+{{- end }}
+{{- end }}
+
+{{/*
+Return the RunAI Streamer extra config args for model-loader-extra-config
+*/}}
+{{- define "modelservice.runaiStreamer.extraConfigArgs" -}}
+{{- if or (eq .Values.modelservice.vllm.loadFormat "runai_streamer") (eq .Values.modelservice.vllm.loadFormat "runai_streamer_sharded") }}
+{{- $config := dict }}
+{{- if .Values.modelservice.vllm.runaiStreamer.concurrency }}
+ {{- $_ := set $config "concurrency" .Values.modelservice.vllm.runaiStreamer.concurrency }}
+{{- end }}
+{{- if .Values.modelservice.vllm.runaiStreamer.memoryLimit }}
+ {{- $_ := set $config "memory_limit" .Values.modelservice.vllm.runaiStreamer.memoryLimit }}
+{{- end }}
+{{- if .Values.modelservice.vllm.runaiStreamer.pattern }}
+ {{- $_ := set $config "pattern" .Values.modelservice.vllm.runaiStreamer.pattern }}
+{{- end }}
+{{- if $config }}
+- "--model-loader-extra-config"
+- {{ $config | toJson | quote }}
+{{- end }}
+{{- end }}
+{{- end }}
diff --git a/charts/llm-d/templates/modelservice/presets/basic-gpu-preset.yaml b/charts/llm-d/templates/modelservice/presets/basic-gpu-preset.yaml
index 1a3480b..5e57870 100644
--- a/charts/llm-d/templates/modelservice/presets/basic-gpu-preset.yaml
+++ b/charts/llm-d/templates/modelservice/presets/basic-gpu-preset.yaml
@@ -70,10 +70,18 @@ data:
command:
- vllm
- serve
- - {{ `{{ default (print "/models/" .ModelPath) .HFModelName }}` }}
+ - {{ `{{ if and (ne .LoadFormat "") (eq .LoadFormat "runai_streamer") }}{{ .ModelArtifactURI }}{{ else }}{{ default (print "/models/" .ModelPath) .HFModelName }}{{ end }}` }}
args:
- "--port"
- "8001"
+ {{- if .Values.modelservice.vllm.loadFormat }}
+ - "--load-format"
+ - {{ .Values.modelservice.vllm.loadFormat | quote }}
+ {{- end }}
+ {{- include "modelservice.runaiStreamer.extraConfigArgs" . | nindent 16 }}
+ {{- range .Values.modelservice.vllm.extraArgs }}
+ - {{ . | quote }}
+ {{- end }}
env:
- name: HOME
value: /home
@@ -87,6 +95,10 @@ data:
- name: HF_HUB_CACHE
value: /models
{{ `{{- end }}` }}
+ {{- include "modelservice.runaiStreamer.envVars" . | nindent 16 }}
+ {{- range .Values.modelservice.vllm.extraEnvVars }}
+ {{- include "common.tplvalues.render" ( dict "value" . "context" $) | nindent 16 }}
+ {{- end }}
volumeMounts:
- name: home
mountPath: /home
@@ -149,10 +161,18 @@ data:
command:
- vllm
- serve
- - {{ `{{ default (print "/models/" .ModelPath) .HFModelName }}` }}
+ - {{ `{{ if and (ne .LoadFormat "") (eq .LoadFormat "runai_streamer") }}{{ .ModelArtifactURI }}{{ else }}{{ default (print "/models/" .ModelPath) .HFModelName }}{{ end }}` }}
args:
- "--port"
- "8000"
+ {{- if .Values.modelservice.vllm.loadFormat }}
+ - "--load-format"
+ - {{ .Values.modelservice.vllm.loadFormat | quote }}
+ {{- end }}
+ {{- include "modelservice.runaiStreamer.extraConfigArgs" . | nindent 16 }}
+ {{- range .Values.modelservice.vllm.extraArgs }}
+ - {{ . | quote }}
+ {{- end }}
env:
- name: HOME
value: /home
@@ -166,6 +186,10 @@ data:
- name: HF_HUB_CACHE
value: /models
{{ `{{- end }}` }}
+ {{- include "modelservice.runaiStreamer.envVars" . | nindent 16 }}
+ {{- range .Values.modelservice.vllm.extraEnvVars }}
+ {{- include "common.tplvalues.render" ( dict "value" . "context" $) | nindent 16 }}
+ {{- end }}
volumeMounts:
- name: home
mountPath: /home
diff --git a/charts/llm-d/templates/modelservice/presets/basic-gpu-with-nixl-and-redis-lookup-preset.yaml b/charts/llm-d/templates/modelservice/presets/basic-gpu-with-nixl-and-redis-lookup-preset.yaml
index e6a2074..6150970 100644
--- a/charts/llm-d/templates/modelservice/presets/basic-gpu-with-nixl-and-redis-lookup-preset.yaml
+++ b/charts/llm-d/templates/modelservice/presets/basic-gpu-with-nixl-and-redis-lookup-preset.yaml
@@ -71,12 +71,20 @@ data:
command:
- vllm
- serve
- - {{ `{{ default (print "/models/" .ModelPath) .HFModelName }}` }}
+ - {{ `{{ if and (ne .LoadFormat "") (eq .LoadFormat "runai_streamer") }}{{ .ModelArtifactURI }}{{ else }}{{ default (print "/models/" .ModelPath) .HFModelName }}{{ end }}` }}
args:
- "--port"
- "8001"
- "--kv-transfer-config"
- '{"kv_connector":"MultiConnector","kv_role":"kv_both","kv_connector_extra_config":{"connectors":[{"kv_connector":"NixlConnector","kv_role":"kv_both"},{"kv_connector":"LMCacheConnectorV1","kv_role":"kv_both"}]}}'
+ {{- if .Values.modelservice.vllm.loadFormat }}
+ - "--load-format"
+ - {{ .Values.modelservice.vllm.loadFormat | quote }}
+ {{- end }}
+ {{- include "modelservice.runaiStreamer.extraConfigArgs" . | nindent 16 }}
+ {{- range .Values.modelservice.vllm.extraArgs }}
+ - {{ . | quote }}
+ {{- end }}
env:
- name: HOME
value: /home
@@ -119,6 +127,10 @@ data:
- name: HF_HUB_CACHE
value: /models
{{ `{{- end }}` }}
+ {{- include "modelservice.runaiStreamer.envVars" . | nindent 16 }}
+ {{- range .Values.modelservice.vllm.extraEnvVars }}
+ {{- include "common.tplvalues.render" ( dict "value" . "context" $) | nindent 16 }}
+ {{- end }}
volumeMounts:
- name: home
mountPath: /home
@@ -186,12 +198,20 @@ data:
command:
- vllm
- serve
- - {{ `{{ default (print "/models/" .ModelPath) .HFModelName }}` }}
+ - {{ `{{ if and (ne .LoadFormat "") (eq .LoadFormat "runai_streamer") }}{{ .ModelArtifactURI }}{{ else }}{{ default (print "/models/" .ModelPath) .HFModelName }}{{ end }}` }}
args:
- "--port"
- "8000"
- "--kv-transfer-config"
- '{"kv_connector":"MultiConnector","kv_role":"kv_both","kv_connector_extra_config":{"connectors":[{"kv_connector":"NixlConnector","kv_role":"kv_both"},{"kv_connector":"LMCacheConnectorV1","kv_role":"kv_both"}]}}'
+ {{- if .Values.modelservice.vllm.loadFormat }}
+ - "--load-format"
+ - {{ .Values.modelservice.vllm.loadFormat | quote }}
+ {{- end }}
+ {{- include "modelservice.runaiStreamer.extraConfigArgs" . | nindent 16 }}
+ {{- range .Values.modelservice.vllm.extraArgs }}
+ - {{ . | quote }}
+ {{- end }}
env:
- name: HOME
value: /home
@@ -234,6 +254,10 @@ data:
- name: HF_HUB_CACHE
value: /models
{{ `{{- end }}` }}
+ {{- include "modelservice.runaiStreamer.envVars" . | nindent 16 }}
+ {{- range .Values.modelservice.vllm.extraEnvVars }}
+ {{- include "common.tplvalues.render" ( dict "value" . "context" $) | nindent 16 }}
+ {{- end }}
volumeMounts:
- name: home
mountPath: /home
diff --git a/charts/llm-d/templates/modelservice/presets/basic-gpu-with-nixl-preset.yaml b/charts/llm-d/templates/modelservice/presets/basic-gpu-with-nixl-preset.yaml
index e84b680..e4a5f32 100644
--- a/charts/llm-d/templates/modelservice/presets/basic-gpu-with-nixl-preset.yaml
+++ b/charts/llm-d/templates/modelservice/presets/basic-gpu-with-nixl-preset.yaml
@@ -71,12 +71,20 @@ data:
command:
- vllm
- serve
- - {{ `{{ default (print "/models/" .ModelPath) .HFModelName }}` }}
+ - {{ `{{ if and (ne .LoadFormat "") (eq .LoadFormat "runai_streamer") }}{{ .ModelArtifactURI }}{{ else }}{{ default (print "/models/" .ModelPath) .HFModelName }}{{ end }}` }}
args:
- "--port"
- "8001"
- "--kv-transfer-config"
- '{"kv_connector":"NixlConnector","kv_role":"kv_both"}'
+ {{- if .Values.modelservice.vllm.loadFormat }}
+ - "--load-format"
+ - {{ .Values.modelservice.vllm.loadFormat | quote }}
+ {{- end }}
+ {{- include "modelservice.runaiStreamer.extraConfigArgs" . | nindent 16 }}
+ {{- range .Values.modelservice.vllm.extraArgs }}
+ - {{ . | quote }}
+ {{- end }}
env:
- name: HOME
value: /home
@@ -107,6 +115,10 @@ data:
- name: HF_HUB_CACHE
value: /models
{{ `{{- end }}` }}
+ {{- include "modelservice.runaiStreamer.envVars" . | nindent 16 }}
+ {{- range .Values.modelservice.vllm.extraEnvVars }}
+ {{- include "common.tplvalues.render" ( dict "value" . "context" $) | nindent 16 }}
+ {{- end }}
volumeMounts:
- name: home
mountPath: /home
@@ -172,12 +184,20 @@ data:
command:
- vllm
- serve
- - {{ `{{ default (print "/models/" .ModelPath) .HFModelName }}` }}
+ - {{ `{{ if and (ne .LoadFormat "") (eq .LoadFormat "runai_streamer") }}{{ .ModelArtifactURI }}{{ else }}{{ default (print "/models/" .ModelPath) .HFModelName }}{{ end }}` }}
args:
- "--port"
- "8000"
- "--kv-transfer-config"
- '{"kv_connector":"NixlConnector","kv_role":"kv_both"}'
+ {{- if .Values.modelservice.vllm.loadFormat }}
+ - "--load-format"
+ - {{ .Values.modelservice.vllm.loadFormat | quote }}
+ {{- end }}
+ {{- include "modelservice.runaiStreamer.extraConfigArgs" . | nindent 16 }}
+ {{- range .Values.modelservice.vllm.extraArgs }}
+ - {{ . | quote }}
+ {{- end }}
env:
- name: HOME
value: /home
@@ -208,6 +228,10 @@ data:
- name: HF_HUB_CACHE
value: /models
{{ `{{- end }}` }}
+ {{- include "modelservice.runaiStreamer.envVars" . | nindent 16 }}
+ {{- range .Values.modelservice.vllm.extraEnvVars }}
+ {{- include "common.tplvalues.render" ( dict "value" . "context" $) | nindent 16 }}
+ {{- end }}
volumeMounts:
- name: home
mountPath: /home
diff --git a/charts/llm-d/templates/sample-application/_helpers.tpl b/charts/llm-d/templates/sample-application/_helpers.tpl
index a170a1a..ea00930 100644
--- a/charts/llm-d/templates/sample-application/_helpers.tpl
+++ b/charts/llm-d/templates/sample-application/_helpers.tpl
@@ -35,8 +35,10 @@ Define the type of the modelArtifactURI
pvc
{{- else if hasPrefix "hf://" .Values.sampleApplication.model.modelArtifactURI -}}
hf
+ {{- else if eq .Values.modelservice.vllm.loadFormat "runai_streamer" -}}
+ objectstorage
{{- else }}
- {{- fail "Values.sampleApplication.model.modelArtifactURI supports hf:// and pvc://" }}
+ {{- fail "Values.sampleApplication.model.modelArtifactURI supports hf:// and pvc://. For other protocols (like s3://), set modelservice.vllm.loadFormat to 'runai_streamer'" }}
{{- end }}
{{- end }}
diff --git a/charts/llm-d/templates/sample-application/modelservice.yaml b/charts/llm-d/templates/sample-application/modelservice.yaml
index 6ba5c22..ec96a22 100644
--- a/charts/llm-d/templates/sample-application/modelservice.yaml
+++ b/charts/llm-d/templates/sample-application/modelservice.yaml
@@ -27,6 +27,14 @@ spec:
args:
- "--served-model-name"
- {{ include "sampleApplication.servedModelNames" .}}
+ {{- if .Values.modelservice.vllm.loadFormat }}
+ - "--load-format"
+ - {{ .Values.modelservice.vllm.loadFormat | quote }}
+ {{- end }}
+ {{- include "modelservice.runaiStreamer.extraConfigArgs" . | nindent 6 }}
+ {{- range .Values.modelservice.vllm.extraArgs }}
+ - {{ . | quote }}
+ {{- end }}
{{- range .Values.sampleApplication.decode.extraArgs }}
- {{ include "common.tplvalues.render" ( dict "value" . "context" $) | quote }}
{{- end }}
@@ -39,6 +47,10 @@ spec:
name: {{ .Values.sampleApplication.model.auth.hfToken.name }}
key: {{ .Values.sampleApplication.model.auth.hfToken.key }}
{{- end }}
+ {{- include "modelservice.runaiStreamer.envVars" . | nindent 6 }}
+ {{- range .Values.modelservice.vllm.extraEnvVars }}
+ {{- include "common.tplvalues.render" ( dict "value" . "context" $) | nindent 6 }}
+ {{- end }}
prefill:
replicas: {{ .Values.sampleApplication.prefill.replicas }}
containers:
@@ -46,6 +58,14 @@ spec:
args:
- "--served-model-name"
- {{ include "sampleApplication.servedModelNames" .}}
+ {{- if .Values.modelservice.vllm.loadFormat }}
+ - "--load-format"
+ - {{ .Values.modelservice.vllm.loadFormat | quote }}
+ {{- end }}
+ {{- include "modelservice.runaiStreamer.extraConfigArgs" . | nindent 6 }}
+ {{- range .Values.modelservice.vllm.extraArgs }}
+ - {{ . | quote }}
+ {{- end }}
{{- range .Values.sampleApplication.prefill.extraArgs }}
- {{ include "common.tplvalues.render" ( dict "value" . "context" $) | quote }}
{{- end }}
@@ -58,6 +78,10 @@ spec:
name: {{ .Values.sampleApplication.model.auth.hfToken.name }}
key: {{ .Values.sampleApplication.model.auth.hfToken.key }}
{{- end }}
+ {{- include "modelservice.runaiStreamer.envVars" . | nindent 6 }}
+ {{- range .Values.modelservice.vllm.extraEnvVars }}
+ {{- include "common.tplvalues.render" ( dict "value" . "context" $) | nindent 6 }}
+ {{- end }}
endpointPicker:
containers:
- name: epp
diff --git a/charts/llm-d/templates/tests/test-connection.yaml b/charts/llm-d/templates/tests/test-connection.yaml
index 1babd1d..1260f80 100644
--- a/charts/llm-d/templates/tests/test-connection.yaml
+++ b/charts/llm-d/templates/tests/test-connection.yaml
@@ -35,16 +35,87 @@ spec:
command: ["/bin/sh", "-c"]
args:
- |
+ set -x # Enable debug mode
+ echo "=== DEBUG: Starting chart test debug ==="
+ echo "Release name: {{ .Release.Name }}"
+ echo "Release namespace: {{ .Release.Namespace }}"
+ echo "Gateway fullname: {{ include "gateway.fullname" . }}"
+ echo "Target service: {{ include "gateway.fullname" . }}-istio.{{ .Release.Namespace }}.svc.cluster.local"
+ echo ""
+
+ echo "=== DEBUG: Checking cluster state ==="
+ echo "Current namespace:"
+ cat /var/run/secrets/kubernetes.io/serviceaccount/namespace || echo "Failed to read namespace"
+ echo ""
+
+ echo "Available services in namespace {{ .Release.Namespace }}:"
+ nslookup -type=SRV _http._tcp.{{ .Release.Namespace }}.svc.cluster.local || echo "No SRV records found"
+ echo ""
+
+ echo "=== DEBUG: Attempting direct service lookups ==="
+ echo "Trying short name lookup:"
+ nslookup {{ include "gateway.fullname" . }}-istio || echo "Short name lookup failed"
+ echo ""
+
+ echo "Trying namespace-qualified lookup:"
+ nslookup {{ include "gateway.fullname" . }}-istio.{{ .Release.Namespace }} || echo "Namespace-qualified lookup failed"
+ echo ""
+
+ echo "Trying full FQDN lookup:"
+ nslookup {{ include "gateway.fullname" . }}-istio.{{ .Release.Namespace }}.svc.cluster.local || echo "FQDN lookup failed"
+ echo ""
+
+ echo -e "\e[32m๐ฅท Waiting for gateway service to be ready\e[0m"
+ echo ""
+ # Wait for gateway service to exist with timeout
+ timeout=300 # 5 minutes
+ elapsed=0
+ while ! nslookup {{ include "gateway.fullname" . }}-istio.{{ .Release.Namespace }}.svc.cluster.local >/dev/null 2>&1; do
+ echo "Gateway service not found (${elapsed}s/${timeout}s), retrying in 5s..."
+ sleep 5
+ elapsed=$((elapsed + 5))
+ if [ $elapsed -ge $timeout ]; then
+ echo "ERROR: Gateway service not found after ${timeout}s"
+ echo "=== DEBUG: Final service check ==="
+ nslookup {{ include "gateway.fullname" . }}-istio.{{ .Release.Namespace }}.svc.cluster.local || true
+ exit 1
+ fi
+ done
+ echo "Gateway service is ready after ${elapsed}s"
+ echo ""
+
+ echo "=== DEBUG: Testing HTTP connectivity ==="
+ echo "Target URL: http://{{ include "gateway.fullname" . }}-istio/v1/models"
+ echo ""
+
echo -e "\e[32m๐ฅท Waiting for pods to come up\e[0m"
echo ""
- curl --connect-timeout 5 --max-time 20 --retry 20 --retry-delay 10 --retry-max-time 60 --retry-all-errors http://{{ include "gateway.fullname" . }}-istio/v1/models
+
+ echo "=== DEBUG: First HTTP request ==="
+ curl -v --connect-timeout 5 --max-time 20 --retry 20 --retry-delay 10 --retry-max-time 60 --retry-all-errors http://{{ include "gateway.fullname" . }}-istio/v1/models || {
+ echo "ERROR: First HTTP request failed"
+ echo "=== DEBUG: Network troubleshooting ==="
+ echo "Gateway service lookup:"
+ nslookup {{ include "gateway.fullname" . }}-istio.{{ .Release.Namespace }}.svc.cluster.local || true
+ echo "Attempting ping:"
+ ping -c 3 {{ include "gateway.fullname" . }}-istio.{{ .Release.Namespace }}.svc.cluster.local || true
+ exit 1
+ }
echo ""
echo ""
echo -e "\e[32m๐ฅท Basic chat validation\e[0m"
echo ""
- curl --connect-timeout 5 --max-time 20 --retry 20 --retry-delay 10 --retry-max-time 60 --retry-all-errors http://{{ include "gateway.fullname" . }}-istio/v1/chat/completions \
+
+ echo "=== DEBUG: Second HTTP request ==="
+ curl -v --connect-timeout 5 --max-time 20 --retry 20 --retry-delay 10 --retry-max-time 60 --retry-all-errors http://{{ include "gateway.fullname" . }}-istio/v1/chat/completions \
-H 'accept: application/json' \
-H 'Content-Type: application/json' \
- -d '{"model":"food-review","messages":[{"content":"Say hi","role":"user"}],"stream":false}'
+ -d '{"model":"food-review","messages":[{"content":"Say hi","role":"user"}],"stream":false}' || {
+ echo "ERROR: Second HTTP request failed"
+ exit 1
+ }
+
+ echo ""
+ echo "=== DEBUG: Test completed successfully ==="
{{- end }}
diff --git a/charts/llm-d/tests/loadformat-test.sh b/charts/llm-d/tests/loadformat-test.sh
new file mode 100644
index 0000000..f4f7517
--- /dev/null
+++ b/charts/llm-d/tests/loadformat-test.sh
@@ -0,0 +1,142 @@
+#!/bin/bash
+set -euo pipefail
+
+# Test script for loadFormat and runai_streamer functionality
+# This script validates that the Helm templates render correctly with various loadFormat configurations
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+CHART_DIR="$(cd "${SCRIPT_DIR}/.." && pwd)"
+
+echo "Testing loadFormat and runai_streamer functionality..."
+
+# Test 1: Default behavior (no loadFormat specified)
+echo "Test 1: Testing default behavior (no loadFormat)"
+helm template test-default "${CHART_DIR}" \
+ --values "${CHART_DIR}/ci/default-values.yaml" \
+ --output-dir /tmp/test-default 2>/dev/null
+
+# Verify loadFormat is not present in default case
+if grep -q "load-format" /tmp/test-default/llm-d/templates/modelservice/presets/basic-gpu-preset.yaml; then
+ echo "โ FAIL: load-format should not be present in default configuration"
+ exit 1
+else
+ echo "โ
PASS: load-format correctly omitted in default configuration"
+fi
+
+# Test 2: RunAI Streamer configuration
+echo "Test 2: Testing runai_streamer configuration"
+helm template test-runai "${CHART_DIR}" \
+ --values "${CHART_DIR}/ci/runai-streamer-values.yaml" \
+ --output-dir /tmp/test-runai 2>/dev/null
+
+PRESET_FILE="/tmp/test-runai/llm-d/templates/modelservice/presets/basic-gpu-preset.yaml"
+
+# Check that load-format is properly set
+if grep -q -- "--load-format" "${PRESET_FILE}" && grep -q "runai_streamer" "${PRESET_FILE}"; then
+ echo "โ
PASS: load-format argument correctly added"
+else
+ echo "โ FAIL: load-format argument not found in preset"
+ exit 1
+fi
+
+# Check for model-loader-extra-config
+if grep -q -- "--model-loader-extra-config" "${PRESET_FILE}"; then
+ echo "โ
PASS: model-loader-extra-config argument correctly added"
+else
+ echo "โ FAIL: model-loader-extra-config argument not found"
+ exit 1
+fi
+
+# Check for RunAI Streamer environment variables
+EXPECTED_ENV_VARS=(
+ "RUNAI_STREAMER_CONCURRENCY"
+ "RUNAI_STREAMER_CHUNK_BYTESIZE"
+ "RUNAI_STREAMER_MEMORY_LIMIT"
+ "AWS_ENDPOINT_URL"
+ "AWS_CA_BUNDLE"
+ "RUNAI_STREAMER_S3_USE_VIRTUAL_ADDRESSING"
+)
+
+for env_var in "${EXPECTED_ENV_VARS[@]}"; do
+ if grep -q "${env_var}" "${PRESET_FILE}"; then
+ echo "โ
PASS: Environment variable ${env_var} found"
+ else
+ echo "โ FAIL: Environment variable ${env_var} not found"
+ exit 1
+ fi
+done
+
+# Check for extra args
+if grep -q -- "--custom-arg1" "${PRESET_FILE}" && grep -q "value1" "${PRESET_FILE}"; then
+ echo "โ
PASS: Extra args correctly rendered"
+else
+ echo "โ FAIL: Extra args not found"
+ exit 1
+fi
+
+# Check for extra environment variables
+if grep -q "TEST_ENV_VAR" "${PRESET_FILE}" && grep -q "test-value" "${PRESET_FILE}"; then
+ echo "โ
PASS: Extra environment variables correctly rendered"
+else
+ echo "โ FAIL: Extra environment variables not found"
+ exit 1
+fi
+
+# Test 3: Sample application with runai_streamer
+echo "Test 3: Testing sample application with runai_streamer"
+helm template test-sample "${CHART_DIR}" \
+ --values "${CHART_DIR}/ci/runai-streamer-values.yaml" \
+ --set sampleApplication.enabled=true \
+ --set sampleApplication.model.modelArtifactURI="s3://test-bucket/model" \
+ --output-dir /tmp/test-sample 2>/dev/null
+
+SAMPLE_FILE="/tmp/test-sample/llm-d/templates/sample-application/modelservice.yaml"
+
+# Check that sample application gets the loadFormat configuration
+if grep -q -- "--load-format" "${SAMPLE_FILE}" && grep -q "runai_streamer" "${SAMPLE_FILE}"; then
+ echo "โ
PASS: Sample application load-format correctly configured"
+else
+ echo "โ FAIL: Sample application load-format not configured"
+ exit 1
+fi
+
+# Test 4: Template validation for all presets
+echo "Test 4: Testing all presets render correctly with runai_streamer"
+PRESET_FILES=(
+ "basic-gpu-preset.yaml"
+ "basic-gpu-with-nixl-preset.yaml"
+ "basic-gpu-with-nixl-and-redis-lookup-preset.yaml"
+)
+
+for preset in "${PRESET_FILES[@]}"; do
+ preset_path="/tmp/test-runai/llm-d/templates/modelservice/presets/${preset}"
+ if [ -f "${preset_path}" ]; then
+ if grep -q -- "--load-format" "${preset_path}" && grep -q "RUNAI_STREAMER_CONCURRENCY" "${preset_path}"; then
+ echo "โ
PASS: Preset ${preset} correctly configured"
+ else
+ echo "โ FAIL: Preset ${preset} missing required configurations"
+ exit 1
+ fi
+ else
+ echo "โ FAIL: Preset file ${preset} not found"
+ exit 1
+ fi
+done
+
+# Test 5: Validate JSON structure in model-loader-extra-config
+echo "Test 5: Testing JSON structure in model-loader-extra-config"
+# Extract the JSON from the rendered template and validate it
+JSON_LINE=$(grep -A1 -- "--model-loader-extra-config" "${PRESET_FILE}" | tail -n1)
+if echo "${JSON_LINE}" | grep -q 'concurrency.*32' && echo "${JSON_LINE}" | grep -q 'memory_limit.*1073741824' && echo "${JSON_LINE}" | grep -q 'pattern.*custom-model-rank'; then
+ echo "โ
PASS: JSON structure in model-loader-extra-config is correct"
+else
+ echo "โ FAIL: JSON structure in model-loader-extra-config is incorrect"
+ echo "Found: ${JSON_LINE}"
+ exit 1
+fi
+
+# Cleanup
+rm -rf /tmp/test-default /tmp/test-runai /tmp/test-sample
+
+echo ""
+echo "๐ All tests passed! loadFormat and runai_streamer functionality is working correctly."
diff --git a/charts/llm-d/tests/test-all-loadformat.sh b/charts/llm-d/tests/test-all-loadformat.sh
new file mode 100644
index 0000000..9ccb874
--- /dev/null
+++ b/charts/llm-d/tests/test-all-loadformat.sh
@@ -0,0 +1,43 @@
+#!/bin/bash
+set -euo pipefail
+
+# Comprehensive test runner for loadFormat and runai_streamer functionality
+# This script runs all tests related to the PR changes
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+
+echo "๐งช Running comprehensive tests for loadFormat and runai_streamer functionality"
+echo "=========================================================================="
+
+# Run loadFormat template rendering tests
+echo ""
+echo "๐ Running loadFormat template rendering tests..."
+if "${SCRIPT_DIR}/loadformat-test.sh"; then
+ echo "โ
loadFormat template rendering tests: PASSED"
+else
+ echo "โ loadFormat template rendering tests: FAILED"
+ exit 1
+fi
+
+# Run URI validation tests
+echo ""
+echo "๐ Running URI validation tests..."
+if "${SCRIPT_DIR}/uri-validation-test.sh"; then
+ echo "โ
URI validation tests: PASSED"
+else
+ echo "โ URI validation tests: FAILED"
+ exit 1
+fi
+
+echo ""
+echo "๐ All tests passed! The loadFormat and runai_streamer implementation is working correctly."
+echo ""
+echo "Summary of tested functionality:"
+echo "- โ
loadFormat configuration in values.yaml"
+echo "- โ
runai_streamer environment variables rendering"
+echo "- โ
model-loader-extra-config JSON generation"
+echo "- โ
All modelservice presets support runai_streamer"
+echo "- โ
Sample application integration with runai_streamer"
+echo "- โ
URI validation for hf://, pvc://, s3://, gcs:// schemes"
+echo "- โ
Error handling for unsupported URI schemes"
+echo "- โ
Backward compatibility with existing configurations"
diff --git a/charts/llm-d/tests/uri-validation-test.sh b/charts/llm-d/tests/uri-validation-test.sh
new file mode 100644
index 0000000..c50c810
--- /dev/null
+++ b/charts/llm-d/tests/uri-validation-test.sh
@@ -0,0 +1,140 @@
+#!/bin/bash
+set -euo pipefail
+
+# Test script for sample application model artifact URI validation
+# This script validates that the sample application helper correctly handles different URI types
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+CHART_DIR="$(cd "${SCRIPT_DIR}/.." && pwd)"
+
+echo "Testing sample application model artifact URI validation..."
+
+# Test 1: Test hf:// URI support (should work)
+echo "Test 1: Testing hf:// URI support"
+helm template test-hf "${CHART_DIR}" \
+ --values "${CHART_DIR}/ci/default-values.yaml" \
+ --set sampleApplication.enabled=true \
+ --set sampleApplication.model.modelArtifactURI="hf://microsoft/DialoGPT-medium" \
+ --output-dir /tmp/test-hf 2>/dev/null
+
+if [ -f "/tmp/test-hf/llm-d/templates/sample-application/modelservice.yaml" ]; then
+ echo "โ
PASS: hf:// URI correctly handled"
+else
+ echo "โ FAIL: hf:// URI not handled correctly"
+ exit 1
+fi
+
+# Test 2: Test pvc:// URI support (should work)
+echo "Test 2: Testing pvc:// URI support"
+helm template test-pvc "${CHART_DIR}" \
+ --values "${CHART_DIR}/ci/default-values.yaml" \
+ --set sampleApplication.enabled=true \
+ --set sampleApplication.model.modelArtifactURI="pvc://my-model-pvc/model" \
+ --output-dir /tmp/test-pvc 2>/dev/null
+
+if [ -f "/tmp/test-pvc/llm-d/templates/sample-application/modelservice.yaml" ]; then
+ echo "โ
PASS: pvc:// URI correctly handled"
+else
+ echo "โ FAIL: pvc:// URI not handled correctly"
+ exit 1
+fi
+
+# Test 3: Test s3:// URI without runai_streamer (should fail)
+echo "Test 3: Testing s3:// URI without runai_streamer (should fail)"
+set +e
+helm template test-s3-fail "${CHART_DIR}" \
+ --values "${CHART_DIR}/ci/default-values.yaml" \
+ --set sampleApplication.enabled=true \
+ --set sampleApplication.model.modelArtifactURI="s3://my-bucket/model" \
+ --output-dir /tmp/test-s3-fail >/dev/null 2>&1
+EXIT_CODE=$?
+set -e
+
+if [ $EXIT_CODE -ne 0 ]; then
+ echo "โ
PASS: s3:// URI correctly rejected without runai_streamer"
+else
+ echo "โ FAIL: s3:// URI should have been rejected without runai_streamer"
+ exit 1
+fi
+
+# Test 4: Test s3:// URI with runai_streamer (should work)
+echo "Test 4: Testing s3:// URI with runai_streamer (should work)"
+helm template test-s3-success "${CHART_DIR}" \
+ --values "${CHART_DIR}/ci/runai-streamer-values.yaml" \
+ --set sampleApplication.enabled=true \
+ --set sampleApplication.model.modelArtifactURI="s3://my-bucket/model" \
+ --output-dir /tmp/test-s3-success 2>/dev/null
+
+if [ -f "/tmp/test-s3-success/llm-d/templates/sample-application/modelservice.yaml" ]; then
+ echo "โ
PASS: s3:// URI correctly handled with runai_streamer"
+else
+ echo "โ FAIL: s3:// URI not handled correctly with runai_streamer"
+ exit 1
+fi
+
+# Test 5: Test gcs:// URI with runai_streamer (should work)
+echo "Test 5: Testing gcs:// URI with runai_streamer (should work)"
+helm template test-gcs-success "${CHART_DIR}" \
+ --values "${CHART_DIR}/ci/runai-streamer-values.yaml" \
+ --set sampleApplication.enabled=true \
+ --set sampleApplication.model.modelArtifactURI="gcs://my-bucket/model" \
+ --output-dir /tmp/test-gcs-success 2>/dev/null
+
+if [ -f "/tmp/test-gcs-success/llm-d/templates/sample-application/modelservice.yaml" ]; then
+ echo "โ
PASS: gcs:// URI correctly handled with runai_streamer"
+else
+ echo "โ FAIL: gcs:// URI not handled correctly with runai_streamer"
+ exit 1
+fi
+
+# Test 6: Test that loadFormat is correctly passed to sample application
+echo "Test 6: Testing loadFormat configuration in sample application"
+
+# Check that with runai_streamer, the load-format argument is passed
+SAMPLE_FILE_RUNAI="/tmp/test-s3-success/llm-d/templates/sample-application/modelservice.yaml"
+if grep -q -- "--load-format" "${SAMPLE_FILE_RUNAI}" && grep -q "runai_streamer" "${SAMPLE_FILE_RUNAI}"; then
+ echo "โ
PASS: Sample application correctly includes load-format argument"
+else
+ echo "โ FAIL: Sample application should include load-format argument"
+ exit 1
+fi
+
+# Check that runai_streamer environment variables are included
+if grep -q "RUNAI_STREAMER_CONCURRENCY" "${SAMPLE_FILE_RUNAI}"; then
+ echo "โ
PASS: Sample application includes runai_streamer environment variables"
+else
+ echo "โ FAIL: Sample application should include runai_streamer environment variables"
+ exit 1
+fi
+
+# Check that the modelArtifacts URI is set correctly
+if grep -q "uri: s3://my-bucket/model" "${SAMPLE_FILE_RUNAI}"; then
+ echo "โ
PASS: Sample application correctly sets modelArtifacts URI"
+else
+ echo "โ FAIL: Sample application should set modelArtifacts URI correctly"
+ exit 1
+fi
+
+# Test 7: Test unknown URI scheme without runai_streamer (should fail)
+echo "Test 7: Testing unknown URI scheme without runai_streamer (should fail)"
+set +e
+helm template test-unknown-fail "${CHART_DIR}" \
+ --values "${CHART_DIR}/ci/default-values.yaml" \
+ --set sampleApplication.enabled=true \
+ --set sampleApplication.model.modelArtifactURI="unknown://some-path" \
+ --output-dir /tmp/test-unknown-fail >/dev/null 2>&1
+EXIT_CODE=$?
+set -e
+
+if [ $EXIT_CODE -ne 0 ]; then
+ echo "โ
PASS: Unknown URI scheme correctly rejected without runai_streamer"
+else
+ echo "โ FAIL: Unknown URI scheme should have been rejected without runai_streamer"
+ exit 1
+fi
+
+# Cleanup
+rm -rf /tmp/test-hf /tmp/test-pvc /tmp/test-s3-fail /tmp/test-s3-success /tmp/test-gcs-success /tmp/test-unknown-fail
+
+echo ""
+echo "๐ All URI validation tests passed!"
diff --git a/charts/llm-d/values.schema.json b/charts/llm-d/values.schema.json
index 332fae1..a993e5b 100644
--- a/charts/llm-d/values.schema.json
+++ b/charts/llm-d/values.schema.json
@@ -3880,7 +3880,7 @@
"description": "EnvVar represents an environment variable present in a Container.",
"properties": {
"name": {
- "description": "Name of the environment variable. Must be a C_IDENTIFIER.",
+ "description": "Name of the environment variable. May consist of any printable ASCII characters except '='.",
"type": "string"
},
"value": {
@@ -6791,6 +6791,133 @@
"default": "See below",
"description": "vLLM container options",
"properties": {
+ "extraArgs": {
+ "description": "Additional command line arguments for vLLM",
+ "items": {
+ "required": [],
+ "type": "string"
+ },
+ "required": [],
+ "title": "extraArgs"
+ },
+ "extraEnvVars": {
+ "description": "Additional environment variables for vLLM containers",
+ "items": {
+ "description": "EnvVar represents an environment variable present in a Container.",
+ "properties": {
+ "name": {
+ "description": "Name of the environment variable. May consist of any printable ASCII characters except '='.",
+ "type": "string"
+ },
+ "value": {
+ "description": "Variable references $(VAR_NAME) are expanded using the previously defined environment variables in the container and any service environment variables. If a variable cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. \"$$(VAR_NAME)\" will produce the string literal \"$(VAR_NAME)\". Escaped references will never be expanded, regardless of whether the variable exists or not. Defaults to \"\".",
+ "type": "string"
+ },
+ "valueFrom": {
+ "description": "EnvVarSource represents a source for the value of an EnvVar.",
+ "properties": {
+ "configMapKeyRef": {
+ "description": "Selects a key from a ConfigMap.",
+ "properties": {
+ "key": {
+ "description": "The key to select.",
+ "type": "string"
+ },
+ "name": {
+ "description": "Name of the referent. This field is effectively required, but due to backwards compatibility is allowed to be empty. Instances of this type with an empty value here are almost certainly wrong. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names",
+ "type": "string"
+ },
+ "optional": {
+ "description": "Specify whether the ConfigMap or its key must be defined",
+ "type": "boolean"
+ }
+ },
+ "required": [
+ "key"
+ ],
+ "type": "object",
+ "x-kubernetes-map-type": "atomic"
+ },
+ "fieldRef": {
+ "description": "ObjectFieldSelector selects an APIVersioned field of an object.",
+ "properties": {
+ "apiVersion": {
+ "description": "Version of the schema the FieldPath is written in terms of, defaults to \"v1\".",
+ "type": "string"
+ },
+ "fieldPath": {
+ "description": "Path of the field to select in the specified API version.",
+ "type": "string"
+ }
+ },
+ "required": [
+ "fieldPath"
+ ],
+ "type": "object",
+ "x-kubernetes-map-type": "atomic"
+ },
+ "resourceFieldRef": {
+ "description": "ResourceFieldSelector represents container resources (cpu, memory) and their output format",
+ "properties": {
+ "containerName": {
+ "description": "Container name: required for volumes, optional for env vars",
+ "type": "string"
+ },
+ "divisor": {
+ "oneOf": [
+ {
+ "type": "string"
+ },
+ {
+ "type": "number"
+ }
+ ]
+ },
+ "resource": {
+ "description": "Required: resource to select",
+ "type": "string"
+ }
+ },
+ "required": [
+ "resource"
+ ],
+ "type": "object",
+ "x-kubernetes-map-type": "atomic"
+ },
+ "secretKeyRef": {
+ "description": "SecretKeySelector selects a key of a Secret.",
+ "properties": {
+ "key": {
+ "description": "The key of the secret to select from. Must be a valid secret key.",
+ "type": "string"
+ },
+ "name": {
+ "description": "Name of the referent. This field is effectively required, but due to backwards compatibility is allowed to be empty. Instances of this type with an empty value here are almost certainly wrong. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names",
+ "type": "string"
+ },
+ "optional": {
+ "description": "Specify whether the Secret or its key must be defined",
+ "type": "boolean"
+ }
+ },
+ "required": [
+ "key"
+ ],
+ "type": "object",
+ "x-kubernetes-map-type": "atomic"
+ }
+ },
+ "type": "object"
+ }
+ },
+ "required": [
+ "name"
+ ],
+ "type": "object"
+ },
+ "required": [],
+ "title": "extraEnvVars"
+ },
"image": {
"additionalProperties": false,
"default": "See below",
@@ -6833,6 +6960,12 @@
"required": [],
"title": "image"
},
+ "loadFormat": {
+ "default": "",
+ "description": "Load format for vLLM model loading
When set to \"runai_streamer\", enables Run:AI Model Streamer for loading models from object storage
Options: \"\", \"runai_streamer\", \"runai_streamer_sharded\"",
+ "required": [],
+ "title": "loadFormat"
+ },
"logLevel": {
"default": "INFO",
"description": "Log level to run VLLM with
VLLM supports standard python log-levels, see: https://docs.python.org/3/library/logging.html#logging-levels
Options: \"DEBUG\", \"INFO\", \"WARNING\", \"ERROR\", \"CRITICAL\"",
@@ -6852,6 +6985,66 @@
},
"required": [],
"title": "metrics"
+ },
+ "runaiStreamer": {
+ "additionalProperties": false,
+ "default": "See below",
+ "description": "RunAI Model Streamer configuration options
These options are used when loadFormat is set to \"runai_streamer\" or \"runai_streamer_sharded\"",
+ "properties": {
+ "chunkBytesize": {
+ "default": "",
+ "description": "Controls the maximum size of memory each OS thread reads from the file at once
Positive integer in bytes
Default: 2,097,152 (2 MiB) for file system, 8,388,608 (8 MiB) for object store",
+ "required": [],
+ "title": "chunkBytesize"
+ },
+ "concurrency": {
+ "default": "16",
+ "description": "Controls the level of concurrency and number of OS threads reading tensors
Positive integer",
+ "required": [],
+ "title": "concurrency"
+ },
+ "memoryLimit": {
+ "default": "-1",
+ "description": "Controls the size of the CPU Memory buffer to which tensors are read
Integer: -1 (UNLIMITED), 0 (MIN), or positive integer in bytes",
+ "required": [],
+ "title": "memoryLimit"
+ },
+ "pattern": {
+ "default": "",
+ "description": "Custom naming pattern for sharded model files
Used with runai_streamer_sharded load format
Example: \"custom-model-rank-{rank}-part-{part}.safetensors\"",
+ "required": [],
+ "title": "pattern"
+ },
+ "s3": {
+ "additionalProperties": false,
+ "default": "See below",
+ "description": "S3/Object store configuration",
+ "properties": {
+ "caBundlePath": {
+ "default": "",
+ "description": "Path to a certificate bundle to use for HTTPS certificate validation",
+ "required": [],
+ "title": "caBundlePath"
+ },
+ "endpointUrl": {
+ "default": "",
+ "description": "Override url endpoint for reading from S3 compatible object store
Mandatory for S3-compatible stores like GCS, Minio",
+ "required": [],
+ "title": "endpointUrl"
+ },
+ "useVirtualAddressing": {
+ "default": "true",
+ "description": "Controls parsing the url endpoint for reading from object store
Boolean: true enables virtual addressing, false uses path-style",
+ "required": [],
+ "title": "useVirtualAddressing"
+ }
+ },
+ "required": [],
+ "title": "s3"
+ }
+ },
+ "required": [],
+ "title": "runaiStreamer"
}
},
"required": [],
@@ -10492,7 +10685,7 @@
"description": "EnvVar represents an environment variable present in a Container.",
"properties": {
"name": {
- "description": "Name of the environment variable. Must be a C_IDENTIFIER.",
+ "description": "Name of the environment variable. May consist of any printable ASCII characters except '='.",
"type": "string"
},
"value": {
diff --git a/charts/llm-d/values.schema.tmpl.json b/charts/llm-d/values.schema.tmpl.json
index 2a92aef..94ae791 100644
--- a/charts/llm-d/values.schema.tmpl.json
+++ b/charts/llm-d/values.schema.tmpl.json
@@ -1442,6 +1442,24 @@
"default": "See below",
"description": "vLLM container options",
"properties": {
+ "extraArgs": {
+ "description": "Additional command line arguments for vLLM",
+ "items": {
+ "required": [],
+ "type": "string"
+ },
+ "required": [],
+ "title": "extraArgs"
+ },
+ "extraEnvVars": {
+ "description": "Additional environment variables for vLLM containers",
+ "items": {
+ "$ref": "https://raw.githubusercontent.com/yannh/kubernetes-json-schema/master/master/_definitions.json#/definitions/io.k8s.api.core.v1.EnvVar",
+ "required": []
+ },
+ "required": [],
+ "title": "extraEnvVars"
+ },
"image": {
"additionalProperties": false,
"default": "See below",
@@ -1484,6 +1502,12 @@
"required": [],
"title": "image"
},
+ "loadFormat": {
+ "default": "",
+ "description": "Load format for vLLM model loading \u003cbr /\u003e When set to \"runai_streamer\", enables Run:AI Model Streamer for loading models from object storage \u003cbr /\u003e Options: \"\", \"runai_streamer\", \"runai_streamer_sharded\"",
+ "required": [],
+ "title": "loadFormat"
+ },
"logLevel": {
"default": "INFO",
"description": "Log level to run VLLM with \u003cbr /\u003e VLLM supports standard python log-levels, see: https://docs.python.org/3/library/logging.html#logging-levels \u003cbr /\u003e Options: \"DEBUG\", \"INFO\", \"WARNING\", \"ERROR\", \"CRITICAL\"",
@@ -1503,6 +1527,66 @@
},
"required": [],
"title": "metrics"
+ },
+ "runaiStreamer": {
+ "additionalProperties": false,
+ "default": "See below",
+ "description": "RunAI Model Streamer configuration options \u003cbr /\u003e These options are used when loadFormat is set to \"runai_streamer\" or \"runai_streamer_sharded\"",
+ "properties": {
+ "chunkBytesize": {
+ "default": "",
+ "description": "Controls the maximum size of memory each OS thread reads from the file at once \u003cbr /\u003e Positive integer in bytes \u003cbr /\u003e Default: 2,097,152 (2 MiB) for file system, 8,388,608 (8 MiB) for object store",
+ "required": [],
+ "title": "chunkBytesize"
+ },
+ "concurrency": {
+ "default": "16",
+ "description": "Controls the level of concurrency and number of OS threads reading tensors \u003cbr /\u003e Positive integer",
+ "required": [],
+ "title": "concurrency"
+ },
+ "memoryLimit": {
+ "default": "-1",
+ "description": "Controls the size of the CPU Memory buffer to which tensors are read \u003cbr /\u003e Integer: -1 (UNLIMITED), 0 (MIN), or positive integer in bytes",
+ "required": [],
+ "title": "memoryLimit"
+ },
+ "pattern": {
+ "default": "",
+ "description": "Custom naming pattern for sharded model files \u003cbr /\u003e Used with runai_streamer_sharded load format \u003cbr /\u003e Example: \"custom-model-rank-{rank}-part-{part}.safetensors\"",
+ "required": [],
+ "title": "pattern"
+ },
+ "s3": {
+ "additionalProperties": false,
+ "default": "See below",
+ "description": "S3/Object store configuration",
+ "properties": {
+ "caBundlePath": {
+ "default": "",
+ "description": "Path to a certificate bundle to use for HTTPS certificate validation",
+ "required": [],
+ "title": "caBundlePath"
+ },
+ "endpointUrl": {
+ "default": "",
+ "description": "Override url endpoint for reading from S3 compatible object store \u003cbr /\u003e Mandatory for S3-compatible stores like GCS, Minio",
+ "required": [],
+ "title": "endpointUrl"
+ },
+ "useVirtualAddressing": {
+ "default": "true",
+ "description": "Controls parsing the url endpoint for reading from object store \u003cbr /\u003e Boolean: true enables virtual addressing, false uses path-style",
+ "required": [],
+ "title": "useVirtualAddressing"
+ }
+ },
+ "required": [],
+ "title": "s3"
+ }
+ },
+ "required": [],
+ "title": "runaiStreamer"
}
},
"required": [],
diff --git a/charts/llm-d/values.yaml b/charts/llm-d/values.yaml
index b937e04..1ad53e3 100644
--- a/charts/llm-d/values.yaml
+++ b/charts/llm-d/values.yaml
@@ -795,6 +795,63 @@ modelservice:
#
Options: "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"
logLevel: "INFO"
+ # -- Load format for vLLM model loading
+ #
When set to "runai_streamer", enables Run:AI Model Streamer for loading models from object storage
+ #
Options: "", "runai_streamer", "runai_streamer_sharded"
+ loadFormat: ""
+
+ # -- RunAI Model Streamer configuration options
+ #
These options are used when loadFormat is set to "runai_streamer" or "runai_streamer_sharded"
+ # @default -- See below
+ runaiStreamer:
+
+ # -- Controls the level of concurrency and number of OS threads reading tensors
+ #
Positive integer
+ concurrency: 16
+
+ # -- Controls the maximum size of memory each OS thread reads from the file at once
+ #
Positive integer in bytes
+ #
Default: 2,097,152 (2 MiB) for file system, 8,388,608 (8 MiB) for object store
+ chunkBytesize: ""
+
+ # -- Controls the size of the CPU Memory buffer to which tensors are read
+ #
Integer: -1 (UNLIMITED), 0 (MIN), or positive integer in bytes
+ memoryLimit: -1
+
+ # -- Custom naming pattern for sharded model files
+ #
Used with runai_streamer_sharded load format
+ #
Example: "custom-model-rank-{rank}-part-{part}.safetensors"
+ pattern: ""
+
+ # -- S3/Object store configuration
+ # @default -- See below
+ s3:
+
+ # -- Override url endpoint for reading from S3 compatible object store
+ #
Mandatory for S3-compatible stores like GCS, Minio
+ endpointUrl: ""
+
+ # -- Path to a certificate bundle to use for HTTPS certificate validation
+ caBundlePath: ""
+
+ # -- Controls parsing the url endpoint for reading from object store
+ #
Boolean: true enables virtual addressing, false uses path-style
+ useVirtualAddressing: true
+
+ # @schema
+ # items:
+ # type: string
+ # @schema
+ # -- Additional command line arguments for vLLM
+ extraArgs: []
+
+ # @schema
+ # items:
+ # $ref: https://raw.githubusercontent.com/yannh/kubernetes-json-schema/master/master/_definitions.json#/definitions/io.k8s.api.core.v1.EnvVar
+ # @schema
+ # -- Additional environment variables for vLLM containers
+ extraEnvVars: []
+
# -- Routing proxy container options
# @default -- See below
routingProxy: