Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions cmd/manager/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"net/http"
"os"

rbg "github.com/bcfre/rbg-api/api/workloads/v1alpha1"
kedav1 "github.com/kedacore/keda/v2/apis/keda/v1alpha1"
ray "github.com/ray-project/kuberay/ray-operator/apis/ray/v1"
zaplog "go.uber.org/zap"
Expand Down Expand Up @@ -220,6 +221,7 @@ func main() {
{ray.SchemeGroupVersion, constants.RayClusterKind, ray.AddToScheme},
{knservingv1.SchemeGroupVersion, constants.KnativeServiceKind, knservingv1.AddToScheme},
{lws.SchemeGroupVersion, constants.LWSKind, lws.AddToScheme},
{rbg.SchemeGroupVersion, constants.RBGKind, rbg.AddToScheme},
{volcano.SchemeGroupVersion, constants.VolcanoQueueKind, volcano.AddToScheme},
{volcanobatch.SchemeGroupVersion, constants.VolcanoJobKind, volcanobatch.AddToScheme},
{kedav1.SchemeGroupVersion, constants.KEDAScaledObjectKind, kedav1.AddToScheme},
Expand Down
119 changes: 119 additions & 0 deletions config/samples/rbg/rbg-qwen3-0-6b.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
# apiVersion: ome.io/v1beta1
# kind: InferenceService
# metadata:
# name: qwen3-0-6b
# # namespace: qwen3-0-6b
# annotations:
# ome.io/deploymentMode: "RoleBasedGroup"
# spec:
# model:
# name: qwen3-0-6b
# runtime:
# name: srt-qwen3-0-6b
# # router:
# # minReplicas: 1
# # maxReplicas: 1
# engine:
# minReplicas: 1
# maxReplicas: 1
---
apiVersion: ome.io/v1beta1
kind: InferenceService
metadata:
name: qwen3-0-6b-pd
annotations:
ome.io/deploymentMode: "RoleBasedGroup"
spec:
model:
name: qwen3-0-6b
runtime:
name: srt-qwen3-0-6b-pd
# router:
# minReplicas: 2
# maxReplicas: 2
engine:
minReplicas: 2
maxReplicas: 2
runner:
name: ome-container
# image: docker.io/lmsysorg/sglang:v0.5.4.post3-cu129-amd64
# image: docker.io/lmsysorg/sglang:v0.5.5.post3-cu129-amd64
# image: nginx:latest
env:
- name: test4
value: test4
# command:
# - sh
# - -c
# - "sleep infinity"
decoder:
minReplicas: 2
maxReplicas: 2
runner:
name: ome-container
# image: docker.io/lmsysorg/sglang:v0.5.4.post3-cu129-amd64
image: nginx:latest-not-exist-4
# command:
# - sh
# - -c
# - "sleep infinity"
---
# apiVersion: ome.io/v1beta1
# kind: InferenceService
# metadata:
# name: qwen3-0-6b-pd
# annotations:
# ome.io/deploymentMode: "RoleBasedGroup"
# spec:
# model:
# name: qwen3-0-6b
# runtime:
# name: srt-qwen3-0-6b-pd
# # router:
# # minReplicas: 2
# # maxReplicas: 2
# engine:
# minReplicas: 2
# maxReplicas: 2
# # runner:
# # name: ome-container
# # image: docker.io/lmsysorg/sglang:v0.5.4.post3-cu129-amd64
# # image: docker.io/lmsysorg/sglang:v0.5.5.post3-cu129-amd64
# # image: nginx:latest
# # command:
# # - sh
# # - -c
# # - "sleep infinity"
# decoder:
# minReplicas: 2
# maxReplicas: 2
# # runner:
# # name: ome-container
# # image: docker.io/lmsysorg/sglang:v0.5.4.post3-cu129-amd64
# # image: nginx:latest
# # command:
# # - sh
# # - -c
# # - "sleep infinity"
---
# apiVersion: ome.io/v1beta1
# kind: InferenceService
# metadata:
# name: qwen3-0-6b-mn-pd
# # namespace: qwen3-0-6b
# annotations:
# ome.io/deploymentMode: "RoleBasedGroup"
# spec:
# model:
# name: qwen3-0-6b
# runtime:
# name: srt-qwen3-0-6b-mn-pd
# router:
# minReplicas: 1
# maxReplicas: 1
# engine:
# minReplicas: 1
# maxReplicas: 1
# decoder:
# minReplicas: 1
# maxReplicas: 1
25 changes: 13 additions & 12 deletions go.mod
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
module github.com/sgl-project/ome

go 1.25
go 1.25.5

require (
github.com/Azure/azure-sdk-for-go/sdk/azcore v1.7.2
Expand All @@ -12,9 +12,10 @@ require (
github.com/aws/aws-sdk-go-v2/service/s3 v1.87.3
github.com/aws/aws-sdk-go-v2/service/sts v1.38.2
github.com/aws/smithy-go v1.24.0
github.com/bcfre/rbg-api v0.5.0
github.com/fsnotify/fsnotify v1.9.0
github.com/gin-gonic/gin v1.10.0
github.com/go-logr/logr v1.4.2
github.com/go-logr/logr v1.4.3
github.com/go-playground/validator/v10 v10.20.0
github.com/google/go-cmp v0.7.0
github.com/google/uuid v1.6.0
Expand All @@ -40,7 +41,7 @@ require (
go.uber.org/zap v1.27.0
golang.org/x/oauth2 v0.29.0
golang.org/x/sys v0.39.0
golang.org/x/term v0.33.0
golang.org/x/term v0.37.0
gomodules.xyz/jsonpatch/v2 v2.4.0
google.golang.org/api v0.231.0
google.golang.org/protobuf v1.36.10
Expand All @@ -53,7 +54,7 @@ require (
k8s.io/client-go v0.33.7
k8s.io/klog/v2 v2.130.1
k8s.io/kube-openapi v0.0.0-20250318190949-c8a335a9a2ff
k8s.io/utils v0.0.0-20241210054802-24370beab758
k8s.io/utils v0.0.0-20251002143259-bc988d571ff4
knative.dev/pkg v0.0.0-20231115001034-97c7258e3a98
knative.dev/serving v0.39.3
sigs.k8s.io/controller-runtime v0.19.7
Expand Down Expand Up @@ -106,7 +107,7 @@ require (
github.com/emicklei/go-restful/v3 v3.12.1 // indirect
github.com/evanphx/json-patch/v5 v5.9.11 // indirect
github.com/felixge/httpsnoop v1.0.4 // indirect
github.com/fxamacker/cbor/v2 v2.7.0 // indirect
github.com/fxamacker/cbor/v2 v2.9.0 // indirect
github.com/gabriel-vasile/mimetype v1.4.3 // indirect
github.com/gin-contrib/sse v0.1.0 // indirect
github.com/go-kit/log v0.2.1 // indirect
Expand Down Expand Up @@ -150,7 +151,7 @@ require (
github.com/mattn/go-isatty v0.0.20 // indirect
github.com/mitchellh/mapstructure v1.5.0 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/opencontainers/go-digest v1.0.0 // indirect
github.com/openzipkin/zipkin-go v0.4.2 // indirect
Expand Down Expand Up @@ -189,13 +190,13 @@ require (
go.uber.org/dig v1.18.0 // indirect
go.uber.org/multierr v1.11.0 // indirect
golang.org/x/arch v0.8.0 // indirect
golang.org/x/crypto v0.40.0 // indirect
golang.org/x/crypto v0.44.0 // indirect
golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 // indirect
golang.org/x/net v0.42.0 // indirect
golang.org/x/sync v0.16.0 // indirect
golang.org/x/text v0.28.0 // indirect
golang.org/x/net v0.47.0 // indirect
golang.org/x/sync v0.18.0 // indirect
golang.org/x/text v0.31.0 // indirect
golang.org/x/time v0.11.0 // indirect
golang.org/x/tools v0.35.0 // indirect
golang.org/x/tools v0.38.0 // indirect
google.golang.org/genproto v0.0.0-20250303144028-a0af3efb3deb // indirect
google.golang.org/genproto/googleapis/api v0.0.0-20250414145226-207652e42e2e // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20250425173222-7b384671a197 // indirect
Expand All @@ -209,7 +210,7 @@ require (
k8s.io/component-base v0.33.7 // indirect
knative.dev/networking v0.0.0-20231115015815-3af9769712cd // indirect
sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.31.2 // indirect
sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3 // indirect
sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730 // indirect
sigs.k8s.io/randfill v1.0.0 // indirect
sigs.k8s.io/structured-merge-diff/v4 v4.6.0 // indirect
)
Loading