77from centml .sdk .api import get_centml_client
88
99
10+ # convert deployment type enum to a user friendly name
1011depl_type_to_name_map = {
11- DeploymentType .INFERENCE : 'inference' ,
12- DeploymentType .COMPUTE : 'compute' ,
13- DeploymentType .COMPILATION : 'compilation' ,
14- DeploymentType .INFERENCE_V2 : 'inference' ,
15- DeploymentType .COMPUTE_V2 : 'compute' ,
16- DeploymentType .CSERVE : 'cserve' ,
17- DeploymentType .CSERVE_V2 : 'cserve' ,
18- DeploymentType .RAG : 'rag' ,
12+ DeploymentType .INFERENCE : "inference" ,
13+ DeploymentType .COMPUTE : "compute" ,
14+ DeploymentType .COMPILATION : "compilation" ,
15+ DeploymentType .INFERENCE_V2 : "inference" ,
16+ DeploymentType .INFERENCE_V3 : "inference" ,
17+ DeploymentType .COMPUTE_V2 : "compute" ,
18+ # For user, they are all cserve.
19+ DeploymentType .CSERVE : "cserve" ,
20+ DeploymentType .CSERVE_V2 : "cserve" ,
21+ DeploymentType .CSERVE_V3 : "cserve" ,
22+ DeploymentType .RAG : "rag" ,
1923}
24+ # use latest type to for user requests
2025depl_name_to_type_map = {
21- ' inference' : DeploymentType .INFERENCE_V2 ,
22- ' cserve' : DeploymentType .CSERVE_V2 ,
23- ' compute' : DeploymentType .COMPUTE_V2 ,
24- ' rag' : DeploymentType .RAG ,
26+ " inference" : DeploymentType .INFERENCE_V3 ,
27+ " cserve" : DeploymentType .CSERVE_V3 ,
28+ " compute" : DeploymentType .COMPUTE_V2 ,
29+ " rag" : DeploymentType .RAG ,
2530}
2631
2732
@@ -56,6 +61,21 @@ def _format_ssh_key(ssh_key):
5661 return ssh_key [:32 ] + "..."
5762
5863
64+ def _get_replica_info (deployment ):
65+ """Extract replica information handling V2/V3 field differences"""
66+ # Check actual deployment object fields rather than depl_type
67+ # since unified get_cserve() can return either V2 or V3 objects
68+ if hasattr (deployment , 'min_replicas' ):
69+ # V3 deployment response object
70+ return {"min" : deployment .min_replicas , "max" : deployment .max_replicas }
71+ elif hasattr (deployment , 'min_scale' ):
72+ # V2 deployment response object
73+ return {"min" : deployment .min_scale , "max" : deployment .max_scale }
74+ else :
75+ # Fallback - shouldn't happen
76+ return {"min" : "N/A" , "max" : "N/A" }
77+
78+
5979def _get_ready_status (cclient , deployment ):
6080 api_status = deployment .status
6181 service_status = (
@@ -121,12 +141,12 @@ def get(type, id):
121141 with get_centml_client () as cclient :
122142 depl_type = depl_name_to_type_map [type ]
123143
124- if depl_type == DeploymentType .INFERENCE_V2 :
125- deployment = cclient .get_inference (id )
144+ if depl_type in [ DeploymentType .INFERENCE_V2 , DeploymentType . INFERENCE_V3 ] :
145+ deployment = cclient .get_inference (id ) # handles both V2 and V3
126146 elif depl_type == DeploymentType .COMPUTE_V2 :
127147 deployment = cclient .get_compute (id )
128- elif depl_type == DeploymentType .CSERVE_V2 :
129- deployment = cclient .get_cserve (id )
148+ elif depl_type in [ DeploymentType .CSERVE_V2 , DeploymentType . CSERVE_V3 ] :
149+ deployment = cclient .get_cserve (id ) # handles both V2 and V3
130150 else :
131151 sys .exit ("Please enter correct deployment type" )
132152
@@ -150,21 +170,18 @@ def get(type, id):
150170 )
151171
152172 click .echo ("Additional deployment configurations:" )
153- if depl_type == DeploymentType .INFERENCE_V2 :
154- click .echo (
155- tabulate (
156- [
157- ("Image" , deployment .image_url ),
158- ("Container port" , deployment .container_port ),
159- ("Healthcheck" , deployment .healthcheck or "/" ),
160- ("Replicas" , {"min" : deployment .min_scale , "max" : deployment .max_scale }),
161- ("Environment variables" , deployment .env_vars or "None" ),
162- ("Max concurrency" , deployment .concurrency or "None" ),
163- ],
164- tablefmt = "rounded_outline" ,
165- disable_numparse = True ,
166- )
167- )
173+ if depl_type in [DeploymentType .INFERENCE_V2 , DeploymentType .INFERENCE_V3 ]:
174+ replica_info = _get_replica_info (deployment )
175+ display_rows = [
176+ ("Image" , deployment .image_url ),
177+ ("Container port" , deployment .container_port ),
178+ ("Healthcheck" , deployment .healthcheck or "/" ),
179+ ("Replicas" , replica_info ),
180+ ("Environment variables" , deployment .env_vars or "None" ),
181+ ("Max concurrency" , deployment .concurrency or "None" ),
182+ ]
183+
184+ click .echo (tabulate (display_rows , tablefmt = "rounded_outline" , disable_numparse = True ))
168185 elif depl_type == DeploymentType .COMPUTE_V2 :
169186 click .echo (
170187 tabulate (
@@ -173,25 +190,22 @@ def get(type, id):
173190 disable_numparse = True ,
174191 )
175192 )
176- elif depl_type == DeploymentType .CSERVE_V2 :
177- click .echo (
178- tabulate (
179- [
180- ("Hugging face model" , deployment .recipe .model ),
181- (
182- "Parallelism" ,
183- {
184- "tensor" : deployment .recipe .additional_properties ['tensor_parallel_size' ],
185- "pipeline" : deployment .recipe .additional_properties ['pipeline_parallel_size' ],
186- },
187- ),
188- ("Replicas" , {"min" : deployment .min_scale , "max" : deployment .max_scale }),
189- ("Max concurrency" , deployment .concurrency or "None" ),
190- ],
191- tablefmt = "rounded_outline" ,
192- disable_numparse = True ,
193- )
194- )
193+ elif depl_type in [DeploymentType .CSERVE_V2 , DeploymentType .CSERVE_V3 ]:
194+ replica_info = _get_replica_info (deployment )
195+ display_rows = [
196+ ("Hugging face model" , deployment .recipe .model ),
197+ (
198+ "Parallelism" ,
199+ {
200+ "tensor" : deployment .recipe .additional_properties .get ("tensor_parallel_size" , "N/A" ),
201+ "pipeline" : deployment .recipe .additional_properties .get ("pipeline_parallel_size" , "N/A" ),
202+ },
203+ ),
204+ ("Replicas" , replica_info ),
205+ ("Max concurrency" , deployment .concurrency or "None" ),
206+ ]
207+
208+ click .echo (tabulate (display_rows , tablefmt = "rounded_outline" , disable_numparse = True ))
195209
196210
197211@click .command (help = "Delete a deployment" )
0 commit comments