Skip to content

Commit 5d3f175

Browse files
authored
HF Optimum Neuron 0.4.1 DLCs (#5323)
* feat: updated huggingface neuronx inference and training containers * feat: add support for new huggingface vllm neuronx containers Also added reference to the first available container URI. * test: add image uri test for huggingface vllm neuronx images
1 parent 8e91c2e commit 5d3f175

File tree

5 files changed

+171
-3
lines changed

5 files changed

+171
-3
lines changed

src/sagemaker/huggingface/llm_utils.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,14 @@ def get_huggingface_llm_image_uri(
6767
image_scope="inference",
6868
inference_tool="neuronx",
6969
)
70+
if backend == "huggingface-vllm-neuronx":
71+
return image_uris.retrieve(
72+
"huggingface-vllm-neuronx",
73+
region=region,
74+
version=version,
75+
image_scope="inference",
76+
inference_tool="neuronx",
77+
)
7078
if backend == "huggingface-tei":
7179
return image_uris.retrieve(
7280
"huggingface-tei",

src/sagemaker/image_uri_config/huggingface-neuronx.json

Lines changed: 90 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@
99
"4.36": "4.36.2",
1010
"4.43": "4.43.2",
1111
"4.48": "4.48.1",
12-
"4.51": "4.51.0"
12+
"4.51": "4.51.0",
13+
"4.55": "4.55.4"
1314
},
1415
"versions": {
1516
"4.28.1": {
@@ -272,6 +273,49 @@
272273
"sdk2.24.1"
273274
]
274275
}
276+
},
277+
"4.55.4": {
278+
"version_aliases": {
279+
"pytorch2.7": "pytorch2.7.0"
280+
},
281+
"pytorch2.7.0": {
282+
"py_versions": [
283+
"py310"
284+
],
285+
"repository": "huggingface-pytorch-training-neuronx",
286+
"registries": {
287+
"ap-northeast-1": "763104351884",
288+
"ap-south-1": "763104351884",
289+
"ap-south-2": "772153158452",
290+
"ap-southeast-1": "763104351884",
291+
"ap-southeast-2": "763104351884",
292+
"ap-southeast-4": "457447274322",
293+
"ap-southeast-5": "550225433462",
294+
"ap-southeast-7": "590183813437",
295+
"cn-north-1": "727897471807",
296+
"cn-northwest-1": "727897471807",
297+
"eu-central-1": "763104351884",
298+
"eu-central-2": "380420809688",
299+
"eu-south-2": "503227376785",
300+
"eu-west-1": "763104351884",
301+
"eu-west-3": "763104351884",
302+
"il-central-1": "780543022126",
303+
"mx-central-1":"637423239942",
304+
"sa-east-1": "763104351884",
305+
"us-east-1": "763104351884",
306+
"us-east-2": "763104351884",
307+
"us-gov-east-1": "446045086412",
308+
"us-gov-west-1": "442386744353",
309+
"us-west-2": "763104351884",
310+
"ca-west-1": "204538143572"
311+
},
312+
"container_version": {
313+
"inf": "ubuntu22.04"
314+
},
315+
"sdk_versions": [
316+
"sdk2.26.0"
317+
]
318+
}
275319
}
276320
}
277321
},
@@ -284,7 +328,8 @@
284328
"4.34": "4.34.1",
285329
"4.36": "4.36.2",
286330
"4.43": "4.43.2",
287-
"4.51": "4.51.3"
331+
"4.51": "4.51.3",
332+
"4.55": "4.55.4"
288333
},
289334
"versions": {
290335
"4.28.1": {
@@ -592,6 +637,49 @@
592637
"sdk2.24.1"
593638
]
594639
}
640+
},
641+
"4.55.4": {
642+
"version_aliases": {
643+
"pytorch2.7": "pytorch2.7.1"
644+
},
645+
"pytorch2.7.1": {
646+
"py_versions": [
647+
"py310"
648+
],
649+
"repository": "huggingface-pytorch-inference-neuronx",
650+
"registries": {
651+
"ap-northeast-1": "763104351884",
652+
"ap-south-1": "763104351884",
653+
"ap-south-2": "772153158452",
654+
"ap-southeast-1": "763104351884",
655+
"ap-southeast-2": "763104351884",
656+
"ap-southeast-4": "457447274322",
657+
"ap-southeast-5": "550225433462",
658+
"ap-southeast-7": "590183813437",
659+
"cn-north-1": "727897471807",
660+
"cn-northwest-1": "727897471807",
661+
"eu-central-1": "763104351884",
662+
"eu-central-2": "380420809688",
663+
"eu-south-2": "503227376785",
664+
"eu-west-1": "763104351884",
665+
"eu-west-3": "763104351884",
666+
"il-central-1": "780543022126",
667+
"mx-central-1":"637423239942",
668+
"sa-east-1": "763104351884",
669+
"us-east-1": "763104351884",
670+
"us-east-2": "763104351884",
671+
"us-gov-east-1": "446045086412",
672+
"us-gov-west-1": "442386744353",
673+
"us-west-2": "763104351884",
674+
"ca-west-1": "204538143572"
675+
},
676+
"container_version": {
677+
"inf": "ubuntu22.04"
678+
},
679+
"sdk_versions": [
680+
"sdk2.26.0"
681+
]
682+
}
595683
}
596684
}
597685
}
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
{
2+
"inference": {
3+
"processors": [
4+
"inf2"
5+
],
6+
"version_aliases": {
7+
"0.4": "0.4.1"
8+
},
9+
"versions": {
10+
"0.4.1": {
11+
"py_versions": [
12+
"py310"
13+
],
14+
"registries": {
15+
"ap-northeast-1": "763104351884",
16+
"ap-south-1": "763104351884",
17+
"ap-southeast-1": "763104351884",
18+
"ap-southeast-2": "763104351884",
19+
"eu-central-1": "763104351884",
20+
"eu-west-1": "763104351884",
21+
"eu-west-3": "763104351884",
22+
"sa-east-1": "763104351884",
23+
"us-east-1": "763104351884",
24+
"us-east-2": "763104351884",
25+
"us-west-2": "763104351884"
26+
},
27+
"tag_prefix": "0.10.2",
28+
"repository": "huggingface-vllm-inference-neuronx",
29+
"container_version": {
30+
"inf2": "ubuntu22.04"
31+
},
32+
"sdk_versions": [
33+
"sdk2.26.0"
34+
]
35+
}
36+
}
37+
}
38+
}

src/sagemaker/image_uris.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
HUGGING_FACE_TEI_GPU_FRAMEWORK = "huggingface-tei"
4242
HUGGING_FACE_TEI_CPU_FRAMEWORK = "huggingface-tei-cpu"
4343
HUGGING_FACE_LLM_NEURONX_FRAMEWORK = "huggingface-llm-neuronx"
44+
HUGGING_FACE_VLLM_NEURONX_FRAMEWORK = "huggingface-vllm-neuronx"
4445
XGBOOST_FRAMEWORK = "xgboost"
4546
SKLEARN_FRAMEWORK = "sklearn"
4647
TRAINIUM_ALLOWED_FRAMEWORKS = "pytorch"
@@ -230,7 +231,11 @@ def retrieve(
230231
container_version = version_config["container_version"][processor]
231232

232233
# Append sdk version in case of trainium instances
233-
if repo in ["pytorch-training-neuron", "pytorch-training-neuronx"]:
234+
if repo in [
235+
"pytorch-training-neuron",
236+
"pytorch-training-neuronx",
237+
"huggingface-vllm-inference-neuronx",
238+
]:
234239
if not sdk_version:
235240
sdk_version = _get_latest_versions(version_config["sdk_versions"])
236241
container_version = sdk_version + "-" + container_version

tests/unit/sagemaker/image_uris/test_huggingface_llm.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,11 @@
3737
"1.8.2": "2.0.1-tei1.8.2-cpu-py310-ubuntu22.04",
3838
},
3939
}
40+
HF_VLLM_VERSIONS_MAPPING = {
41+
"inf2": {
42+
"0.4.1": "0.10.2-neuronx-py310-sdk2.26.0-ubuntu22.04",
43+
},
44+
}
4045
HF_VERSIONS_MAPPING = {
4146
"gpu": {
4247
"0.6.0": "2.0.0-tgi0.6.0-gpu-py39-cu118-ubuntu20.04",
@@ -124,6 +129,30 @@ def test_huggingface_uris(load_config):
124129
assert expected == uri
125130

126131

132+
@pytest.mark.parametrize("load_config", ["huggingface-vllm-neuronx.json"], indirect=True)
133+
def test_huggingface_vllm_neuronx_uris(load_config):
134+
VERSIONS = load_config["inference"]["versions"]
135+
device = load_config["inference"]["processors"][0]
136+
assert device == "inf2"
137+
backend = "huggingface-vllm-neuronx"
138+
139+
# Fail if device is not in mapping
140+
if device not in HF_VLLM_VERSIONS_MAPPING:
141+
raise ValueError(f"Device {device} not found in HF_VLLM_VERSIONS_MAPPING")
142+
for version in VERSIONS:
143+
ACCOUNTS = load_config["inference"]["versions"][version]["registries"]
144+
for region in ACCOUNTS.keys():
145+
uri = get_huggingface_llm_image_uri(backend, region=region, version=version)
146+
expected = expected_uris.huggingface_llm_framework_uri(
147+
"huggingface-vllm-inference-neuronx",
148+
ACCOUNTS[region],
149+
version,
150+
HF_VLLM_VERSIONS_MAPPING[device][version],
151+
region=region,
152+
)
153+
assert expected == uri
154+
155+
127156
@pytest.mark.parametrize(
128157
"load_config", ["huggingface-tei.json", "huggingface-tei-cpu.json"], indirect=True
129158
)

0 commit comments

Comments
 (0)