Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion src/dstack/_internal/core/backends/base/compute.py
Original file line number Diff line number Diff line change
Expand Up @@ -840,6 +840,7 @@ def get_gateway_user_data(authorized_key: str) -> str:
packages=[
"nginx",
"python3.10-venv",
"python3-pip", # Add pip for sglang-router installation
],
snap={"commands": [["install", "--classic", "certbot"]]},
runcmd=[
Expand All @@ -850,6 +851,8 @@ def get_gateway_user_data(authorized_key: str) -> str:
"s/# server_names_hash_bucket_size 64;/server_names_hash_bucket_size 128;/",
"/etc/nginx/nginx.conf",
],
# Install sglang-router system-wide. Can be conditionally installed in the future.
["pip", "install", "sglang-router"],
["su", "ubuntu", "-c", " && ".join(get_dstack_gateway_commands())],
],
ssh_authorized_keys=[authorized_key],
Expand Down Expand Up @@ -979,7 +982,8 @@ def get_dstack_gateway_wheel(build: str) -> str:
r.raise_for_status()
build = r.text.strip()
logger.debug("Found the latest gateway build: %s", build)
return f"{base_url}/dstack_gateway-{build}-py3-none-any.whl"
# return f"{base_url}/dstack_gateway-{build}-py3-none-any.whl"
return "https://bihan-test-bucket.s3.eu-west-1.amazonaws.com/dstack_gateway-0.0.0-py3-none-any.whl"


def get_dstack_gateway_commands() -> List[str]:
Expand Down
1 change: 1 addition & 0 deletions src/dstack/_internal/core/models/gateways.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ class GatewayConfiguration(CoreModel):
default: Annotated[bool, Field(description="Make the gateway default")] = False
backend: Annotated[BackendType, Field(description="The gateway backend")]
region: Annotated[str, Field(description="The gateway region")]
router: Annotated[Optional[str], Field(description="The router type, e.g. `sglang`")] = None
domain: Annotated[
Optional[str], Field(description="The gateway domain, e.g. `example.com`")
] = None
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,13 @@ limit_req_zone {{ zone.key }} zone={{ zone.name }}:10m rate={{ zone.rpm }}r/m;

{% if replicas %}
upstream {{ domain }}.upstream {
{% if router == "sglang" %}
server 127.0.0.1:3000; # SGLang router on the gateway
{% else %}
{% for replica in replicas %}
server unix:{{ replica.socket }}; # replica {{ replica.id }}
{% endfor %}
{% endif %}
}
{% else %}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
{% for replica in replicas %}
# Worker {{ loop.index }}
upstream sglang_worker_{{ loop.index }}_upstream {
server unix:{{ replica.socket }};
}

server {
listen 127.0.0.1:{{ 10000 + loop.index }};
access_log off; # disable access logs for this internal endpoint

proxy_read_timeout 300s;
proxy_send_timeout 300s;

location / {
proxy_pass http://sglang_worker_{{ loop.index }}_upstream;
proxy_http_version 1.1;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header Connection "";
proxy_set_header Upgrade $http_upgrade;
}
}
{% endfor %}
1 change: 1 addition & 0 deletions src/dstack/_internal/proxy/gateway/routers/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ async def register_service(
model=body.options.openai.model if body.options.openai is not None else None,
ssh_private_key=body.ssh_private_key,
repo=repo,
router=body.router,
nginx=nginx,
service_conn_pool=service_conn_pool,
)
Expand Down
1 change: 1 addition & 0 deletions src/dstack/_internal/proxy/gateway/schemas/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ class RegisterServiceRequest(BaseModel):
options: Options
ssh_private_key: str
rate_limits: tuple[RateLimit, ...] = ()
router: Optional[str] = None


class RegisterReplicaRequest(BaseModel):
Expand Down
Loading
Loading