Skip to content

Commit 243ade8

Browse files
authored
Add create cserve deployment example script (#91)
* CServe SDK APIs to use cserve_v2 * Add get_cluster_id * Add example script for create cserve deployment
1 parent 4b38e2c commit 243ade8

File tree

3 files changed

+53
-8
lines changed

3 files changed

+53
-8
lines changed

centml/cli/cluster.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -177,10 +177,13 @@ def get(type, id):
177177
click.echo(
178178
tabulate(
179179
[
180-
("Hugging face model", deployment.model),
180+
("Hugging face model", deployment.recipe.model),
181181
(
182182
"Parallelism",
183-
{"tensor": deployment.tensor_parallel_size, "pipeline": deployment.pipeline_parallel_size},
183+
{
184+
"tensor": deployment.recipe.additional_properties['tensor_parallel_size'],
185+
"pipeline": deployment.recipe.additional_properties['pipeline_parallel_size'],
186+
},
184187
),
185188
("Replicas", {"min": deployment.min_scale, "max": deployment.max_scale}),
186189
("Max concurrency", deployment.concurrency or "None"),

centml/sdk/api.py

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
DeploymentStatus,
77
CreateInferenceDeploymentRequest,
88
CreateComputeDeploymentRequest,
9-
CreateCServeDeploymentRequest,
9+
CreateCServeV2DeploymentRequest,
1010
)
1111

1212
from centml.sdk import auth
@@ -32,16 +32,16 @@ def get_compute(self, id):
3232
return self._api.get_compute_deployment_deployments_compute_deployment_id_get(id)
3333

3434
def get_cserve(self, id):
35-
return self._api.get_cserve_deployment_deployments_cserve_deployment_id_get(id)
35+
return self._api.get_cserve_v2_deployment_deployments_cserve_v2_deployment_id_get(id)
3636

3737
def create_inference(self, request: CreateInferenceDeploymentRequest):
3838
return self._api.create_inference_deployment_deployments_inference_post(request)
3939

4040
def create_compute(self, request: CreateComputeDeploymentRequest):
4141
return self._api.create_compute_deployment_deployments_compute_post(request)
4242

43-
def create_cserve(self, request: CreateCServeDeploymentRequest):
44-
return self._api.create_cserve_deployment_deployments_cserve_post(request)
43+
def create_cserve(self, request: CreateCServeV2DeploymentRequest):
44+
return self._api.create_cserve_v2_deployment_deployments_cserve_v2_post(request)
4545

4646
def _update_status(self, id, new_status):
4747
status_req = platform_api_python_client.DeploymentStatusRequest(status=new_status)
@@ -67,8 +67,16 @@ def get_hardware_instances(self, cluster_id=None):
6767
def get_prebuilt_images(self, depl_type: DeploymentType):
6868
return self._api.get_prebuilt_images_prebuilt_images_get(type=depl_type)
6969

70-
def get_cserve_recipe(self):
71-
return self._api.get_cserve_recipe_deployments_cserve_recipes_get().results
70+
def get_cserve_recipe(self, model=None, hf_token=None):
71+
return self._api.get_cserve_recipe_deployments_cserve_recipes_get(model=model, hf_token=hf_token).results
72+
73+
def get_cluster_id(self, hardware_instance_id):
74+
filtered_hw = list(filter(lambda h: h.id == hardware_instance_id, self.get_hardware_instances()))
75+
76+
if len(filtered_hw) == 0:
77+
raise Exception(f"Invalid hardware instance id {hardware_instance_id}")
78+
79+
return filtered_hw[0].cluster_id
7280

7381

7482
@contextmanager

examples/sdk/create_cserve.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
import time
2+
import centml
3+
from centml.sdk.api import get_centml_client
4+
from centml.sdk import DeploymentType, CreateCServeV2DeploymentRequest
5+
6+
with get_centml_client() as cclient:
7+
# Get fastest recipe for the Qwen model
8+
fastest = cclient.get_cserve_recipe(model="Qwen/Qwen2-VL-7B-Instruct")[0].fastest
9+
10+
# Modify the recipe if necessary
11+
fastest.recipe.additional_properties["max_num_seqs"] = 512
12+
13+
# Create CServeV2 deployment
14+
request = CreateCServeV2DeploymentRequest(
15+
name="qwen-fastest",
16+
cluster_id=cclient.get_cluster_id(fastest.hardware_instance_id),
17+
hardware_instance_id=fastest.hardware_instance_id,
18+
recipe=fastest.recipe,
19+
min_scale=1,
20+
max_scale=1,
21+
env_vars={},
22+
)
23+
response = cclient.create_cserve(request)
24+
print("Create deployment response: ", response)
25+
26+
# Get deployment details
27+
deployment = cclient.get_cserve(response.id)
28+
print("Deployment details: ", deployment)
29+
30+
# Pause the deployment
31+
cclient.pause(deployment.id)
32+
33+
# Delete the deployment
34+
cclient.delete(deployment.id)

0 commit comments

Comments
 (0)