diff --git a/dlc_developer_config.toml b/dlc_developer_config.toml
index 2ddfe8ccb932..6005bb6bf7ae 100644
--- a/dlc_developer_config.toml
+++ b/dlc_developer_config.toml
@@ -1,6 +1,6 @@
 [dev]
 # Set to "huggingface", for example, if you are a huggingface developer. Default is ""
-partner_developer = ""
+partner_developer = "huggingface"
 # Please only set it to true if you are preparing an EI related PR
 # Do remember to revert it back to false before merging any PR (including EI dedicated PR)
 ei_mode = false
@@ -36,8 +36,8 @@ deep_canary_mode = false
 
 [build]
 # Add in frameworks you would like to build. By default, builds are disabled unless you specify building an image.
-# available frameworks - ["base", "vllm", "sglang", "autogluon", "huggingface_tensorflow", "huggingface_pytorch", "huggingface_tensorflow_trcomp", "huggingface_pytorch_trcomp", "pytorch_trcomp", "tensorflow", "pytorch", "stabilityai_pytorch"]
-build_frameworks = []
+# available frameworks - ["base", "vllm", "sglang", "autogluon", "huggingface_vllm", "huggingface_tensorflow", "huggingface_pytorch", "huggingface_tensorflow_trcomp", "huggingface_pytorch_trcomp", "pytorch_trcomp", "tensorflow", "pytorch", "stabilityai_pytorch"]
+build_frameworks = ["huggingface_vllm"]
 
 
 # By default we build both training and inference containers. Set true/false values to determine which to build.
@@ -186,5 +186,8 @@ dlc-pr-tensorflow-2-eia-inference = ""
 # vllm
 dlc-pr-vllm = ""
 
+# HuggingFace vLLM
+dlc-pr-huggingface-vllm = ""
+
 # sglang
 dlc-pr-sglang = ""
\ No newline at end of file
diff --git a/huggingface/hf-vllm/buildspec.yml b/huggingface/hf-vllm/buildspec.yml
index e69de29bb2d1..7b7305a723aa 100644
--- a/huggingface/hf-vllm/buildspec.yml
+++ b/huggingface/hf-vllm/buildspec.yml
@@ -0,0 +1,52 @@
+account_id: &ACCOUNT_ID <set-$ACCOUNT_ID-in-environment>
+prod_account_id: &PROD_ACCOUNT_ID 763104351884
+region: &REGION <set-$REGION-in-environment>
+base_framework: &BASE_FRAMEWORK vllm
+framework: &FRAMEWORK !join [ "huggingface_", *BASE_FRAMEWORK]
+version: &VERSION "0.12.0"
+short_version: &SHORT_VERSION "0.12"
+arch_type: &ARCH_TYPE x86_64
+autopatch_build: "False"
+
+repository_info:
+  build_repository: &BUILD_REPOSITORY
+    image_type: &IMAGE_TYPE gpu
+    root: huggingface/hf-vllm
+    repository_name: &REPOSITORY_NAME !join [ pr, "-", *FRAMEWORK ]
+    repository: &REPOSITORY !join [ *ACCOUNT_ID, .dkr.ecr., *REGION, .amazonaws.com/, *REPOSITORY_NAME ]
+    release_repository_name: &RELEASE_REPOSITORY_NAME !join [ *FRAMEWORK ]
+    release_repository: &RELEASE_REPOSITORY !join [ *PROD_ACCOUNT_ID, .dkr.ecr., *REGION, .amazonaws.com/, *RELEASE_REPOSITORY_NAME ]
+
+context:
+  build_context: &BUILD_CONTEXT
+    deep_learning_container:
+      source: ../../src/deep_learning_container.py
+      target: deep_learning_container.py
+    cuda-compatibility-lib:
+      source: ../build_artifacts/inference/cuda-compatibility-lib.sh
+      target: cuda-compatibility-lib.sh
+
+
+images:
+  BuildHuggingFaceVllmGpuPy312Cu129DockerImage:
+    <<: *BUILD_REPOSITORY
+    context:
+      <<: *BUILD_CONTEXT
+    image_size_baseline: 26000
+    device_type: &DEVICE_TYPE gpu
+    cuda_version: &CUDA_VERSION cu129
+    python_version: &DOCKER_PYTHON_VERSION py3
+    tag_python_version: &TAG_PYTHON_VERSION py312
+    os_version: &OS_VERSION ubuntu22.04
+    vllm_version: &VLLM_VERSION 0.12.0
+    tag: !join [ "hf-vllm", "-", *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *CUDA_VERSION, "-", *OS_VERSION, "-sagemaker" ]
+    latest_release_tag: !join [ "hf-vllm", "-", *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *CUDA_VERSION, "-", *OS_VERSION, "-sagemaker" ]
+    docker_file: !join [ docker/, *SHORT_VERSION, /, *CUDA_VERSION, /Dockerfile ]
+    target: sagemaker
+    build: true
+    enable_common_stage_build: false
+    test_configs:
+      test_platforms:
+        - sanity
+        - security
+        - sagemaker
diff --git a/huggingface/hf-vllm/docker/0.12/cu129/Dockerfile b/huggingface/hf-vllm/docker/0.12/cu129/Dockerfile
new file mode 100644
index 000000000000..b81fcbeeaade
--- /dev/null
+++ b/huggingface/hf-vllm/docker/0.12/cu129/Dockerfile
@@ -0,0 +1,42 @@
+ARG FINAL_BASE_IMAGE=763104351884.dkr.ecr.us-east-1.amazonaws.com/vllm:0.12.0-gpu-py312-cu129-ubuntu22.04-sagemaker-v1.0
+FROM ${FINAL_BASE_IMAGE} AS vllm-base
+
+LABEL maintainer="Amazon AI"
+LABEL dlc_major_version="1"
+
+ARG HUGGINGFACE_HUB_VERSION=0.36.0
+ARG HF_XET_VERSION=1.2.0
+
+RUN apt-get update -y \
+&& apt-get install -y --no-install-recommends curl unzip \
+&& rm -rf /var/lib/apt/lists/*
+
+
+RUN pip install --upgrade pip && \
+   pip install --no-cache-dir \
+     huggingface-hub==${HUGGINGFACE_HUB_VERSION} \
+     hf-xet==${HF_XET_VERSION} \
+     grpcio
+
+
+FROM vllm-base AS sagemaker
+ENV HF_HUB_ENABLE_HF_TRANSFER="1" \
+    HF_HUB_USER_AGENT_ORIGIN="aws:sagemaker:gpu-cuda:inference:hf-vllm"
+
+COPY cuda-compatibility-lib.sh /usr/local/bin/cuda-compatibility-lib.sh
+RUN chmod +x /usr/local/bin/cuda-compatibility-lib.sh
+
+RUN set -eux; \
+    HOME_DIR=/root; \
+    uv pip install --system --upgrade pip requests PTable; \
+    curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip; \
+    unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/; \
+    cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance; \
+    chmod +x /usr/local/bin/testOSSCompliance; \
+    chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh; \
+    ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} python3; \
+    rm -rf ${HOME_DIR}/oss_compliance*
+
+
+ENTRYPOINT ["/usr/local/bin/sagemaker_entrypoint.sh"]
+
diff --git a/huggingface/hf-vllm/out.py b/huggingface/hf-vllm/out.py
new file mode 100644
index 000000000000..cf44f88fa683
--- /dev/null
+++ b/huggingface/hf-vllm/out.py
@@ -0,0 +1,14 @@
+import os
+
+try:
+    if os.path.exists("/usr/local/bin/deep_learning_container.py") and (
+        os.getenv("OPT_OUT_TRACKING") is None or os.getenv("OPT_OUT_TRACKING", "").lower() != "true"
+    ):
+        import threading
+
+        cmd = "python /usr/local/bin/deep_learning_container.py --framework huggingface_pytorch --framework-version 2.7.1 --container-type training &>/dev/null"
+        x = threading.Thread(target=lambda: os.system(cmd))
+        x.setDaemon(True)
+        x.start()
+except Exception:
+    pass
diff --git a/huggingface/hf-vllm/telemetry.sh b/huggingface/hf-vllm/telemetry.sh
new file mode 100644
index 000000000000..c01514033dd9
--- /dev/null
+++ b/huggingface/hf-vllm/telemetry.sh
@@ -0,0 +1,12 @@
+# telemetry.sh
+#!/bin/bash
+if [ -f /usr/local/bin/deep_learning_container.py ] && [[ -z "${OPT_OUT_TRACKING}" || "${OPT_OUT_TRACKING,,}" != "true" ]]; then
+    (
+        python /usr/local/bin/deep_learning_container.py \
+            --framework "hf-vllm" \
+            --framework-version "0.12.0" \
+            --container-type "inference" \
+            &>/dev/null &
+    )
+fi
+
diff --git a/src/constants.py b/src/constants.py
index 73f07931c2be..bb4baa4385c3 100644
--- a/src/constants.py
+++ b/src/constants.py
@@ -27,6 +27,7 @@
     "base",
     "vllm",
     "sglang",
+    "huggingface_vllm",
 }
 DEVICE_TYPES = {"cpu", "gpu", "hpu", "eia", "inf", "neuron", "neuronx"}
 IMAGE_TYPES = {"training", "inference"}
diff --git a/test/sagemaker_tests/huggingface/hf-vllm/__init__.py b/test/sagemaker_tests/huggingface/hf-vllm/__init__.py
new file mode 100644
index 000000000000..199e66b95926
--- /dev/null
+++ b/test/sagemaker_tests/huggingface/hf-vllm/__init__.py
@@ -0,0 +1,13 @@
+# Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+from __future__ import absolute_import
diff --git a/test/sagemaker_tests/huggingface/hf-vllm/conftest.py b/test/sagemaker_tests/huggingface/hf-vllm/conftest.py
new file mode 100644
index 000000000000..20daa2c701fd
--- /dev/null
+++ b/test/sagemaker_tests/huggingface/hf-vllm/conftest.py
@@ -0,0 +1,393 @@
+# Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+from __future__ import absolute_import
+
+import json
+import logging
+import os
+import platform
+import shutil
+import sys
+import tempfile
+
+import boto3
+import pytest
+
+from botocore.exceptions import ClientError
+from sagemaker import LocalSession, Session
+from sagemaker.pytorch import PyTorch
+
+from .utils import image_utils, get_ecr_registry
+
+NO_P4_REGIONS = [
+    "af-south-1",
+    "ap-east-1",
+    "ap-northeast-3",
+    "ap-southeast-1",
+    "ap-southeast-2",
+    "ap-south-1",
+    "ca-central-1",
+    "eu-central-1",
+    "eu-north-1",
+    "eu-west-2",
+    "eu-west-3",
+    "eu-south-1",
+    "me-south-1",
+    "sa-east-1",
+    "us-west-1",
+    "cn-northwest-1",
+    "il-central-1",
+]
+
+NO_G5_REGIONS = [
+    "us-west-1",
+    "ca-west-1",
+    "mx-cental-1",
+    "af-south-1",
+    "ap-east-1",
+    "ap-south-2",
+    "ap-southeast-5",
+    "ap-southeast-4",
+    "ap-northeast-3",
+    "ap-southeast-1",
+    "ap-southeast-7",
+    "eu-south-1",
+    "eu-west-3",
+    "eu-south-2",
+    "eu-central-2",
+    "me-south-1",
+]
+
+
+logger = logging.getLogger(__name__)
+logging.getLogger("boto").setLevel(logging.INFO)
+logging.getLogger("boto3").setLevel(logging.INFO)
+logging.getLogger("botocore").setLevel(logging.INFO)
+logging.getLogger("factory.py").setLevel(logging.INFO)
+logging.getLogger("auth.py").setLevel(logging.INFO)
+logging.getLogger("connectionpool.py").setLevel(logging.INFO)
+
+
+dir_path = os.path.dirname(os.path.realpath(__file__))
+
+
+def pytest_addoption(parser):
+    parser.addoption("--build-image", "-D", action="store_true")
+    parser.addoption("--build-base-image", "-B", action="store_true")
+    parser.addoption("--aws-id")
+    parser.addoption("--instance-type")
+    parser.addoption("--accelerator-type", default=None)
+    parser.addoption("--docker-base-name", default="huggingface_pytorch")
+    parser.addoption("--region", default="us-west-2")
+    parser.addoption("--framework-version", default="")
+    parser.addoption(
+        "--py-version",
+        choices=["2", "3", "37", "38", "39", "310", "311", "312"],
+        default=str(sys.version_info.major),
+    )
+    # Processor is still "cpu" for EIA tests
+    parser.addoption(
+        "--processor", choices=["gpu", "cpu", "eia", "neuron", "neuronx"], default="cpu"
+    )
+    # If not specified, will default to {framework-version}-{processor}-py{py-version}
+    parser.addoption("--tag", default=None)
+    parser.addoption(
+        "--generate-coverage-doc",
+        default=False,
+        action="store_true",
+        help="use this option to generate test coverage doc",
+    )
+    parser.addoption(
+        "--efa",
+        action="store_true",
+        default=False,
+        help="Run only efa tests",
+    )
+    parser.addoption("--sagemaker-regions", default="us-west-2")
+
+
+def pytest_configure(config):
+    config.addinivalue_line("markers", "efa(): explicitly mark to run efa tests")
+
+
+def pytest_runtest_setup(item):
+    if item.config.getoption("--efa"):
+        efa_tests = [mark for mark in item.iter_markers(name="efa")]
+        if not efa_tests:
+            pytest.skip("Skipping non-efa tests")
+
+
+def pytest_collection_modifyitems(session, config, items):
+    for item in items:
+        print(f"item {item}")
+        for marker in item.iter_markers(name="team"):
+            print(f"item {marker}")
+            team_name = marker.args[0]
+            item.user_properties.append(("team_marker", team_name))
+            print(f"item.user_properties {item.user_properties}")
+
+    if config.getoption("--generate-coverage-doc"):
+        from test.test_utils.test_reporting import TestReportGenerator
+
+        report_generator = TestReportGenerator(items, is_sagemaker=True)
+        report_generator.generate_coverage_doc(
+            framework="huggingface_pytorch", job_type="inference"
+        )
+
+
+@pytest.fixture(scope="session", name="docker_base_name")
+def fixture_docker_base_name(request):
+    return request.config.getoption("--docker-base-name")
+
+
+@pytest.fixture(scope="session", name="region")
+def fixture_region(request):
+    return request.config.getoption("--region")
+
+
+@pytest.fixture(scope="session", name="framework_version")
+def fixture_framework_version(request):
+    return request.config.getoption("--framework-version")
+
+
+@pytest.fixture(scope="session", name="py_version")
+def fixture_py_version(request):
+    return "py{}".format(int(request.config.getoption("--py-version")))
+
+
+@pytest.fixture(scope="session", name="processor")
+def fixture_processor(request):
+    return request.config.getoption("--processor")
+
+
+@pytest.fixture(scope="session", name="tag")
+def fixture_tag(request, framework_version, processor, py_version):
+    provided_tag = request.config.getoption("--tag")
+    default_tag = "{}-{}-{}".format(framework_version, processor, py_version)
+    return provided_tag if provided_tag else default_tag
+
+
+@pytest.fixture(scope="session", name="docker_image")
+def fixture_docker_image(docker_base_name, tag):
+    return "{}:{}".format(docker_base_name, tag)
+
+
+@pytest.fixture
+def opt_ml():
+    tmp = tempfile.mkdtemp()
+    os.mkdir(os.path.join(tmp, "output"))
+
+    # Docker cannot mount Mac OS /var folder properly see
+    # https://forums.docker.com/t/var-folders-isnt-mounted-properly/9600
+    opt_ml_dir = "/private{}".format(tmp) if platform.system() == "Darwin" else tmp
+    yield opt_ml_dir
+
+    shutil.rmtree(tmp, True)
+
+
+@pytest.fixture(scope="session", name="use_gpu")
+def fixture_use_gpu(processor):
+    return processor == "gpu"
+
+
+@pytest.fixture(scope="session", name="build_base_image", autouse=True)
+def fixture_build_base_image(
+    request, framework_version, py_version, processor, tag, docker_base_name
+):
+    build_base_image = request.config.getoption("--build-base-image")
+    if build_base_image:
+        return image_utils.build_base_image(
+            framework_name=docker_base_name,
+            framework_version=framework_version,
+            py_version=py_version,
+            base_image_tag=tag,
+            processor=processor,
+            cwd=os.path.join(dir_path, ".."),
+        )
+
+    return tag
+
+
+@pytest.fixture(scope="session", name="sagemaker_session")
+def fixture_sagemaker_session(region):
+    return Session(boto_session=boto3.Session(region_name=region))
+
+
+@pytest.fixture(scope="session", name="sagemaker_regions")
+def fixture_sagemaker_regions(request):
+    sagemaker_regions = request.config.getoption("--sagemaker-regions")
+    return sagemaker_regions.split(",")
+
+
+@pytest.fixture(scope="session", name="sagemaker_local_session")
+def fixture_sagemaker_local_session(region):
+    return LocalSession(boto_session=boto3.Session(region_name=region))
+
+
+@pytest.fixture(name="aws_id", scope="session")
+def fixture_aws_id(request):
+    return request.config.getoption("--aws-id")
+
+
+@pytest.fixture(name="instance_type", scope="session")
+def fixture_instance_type(request, processor):
+    provided_instance_type = request.config.getoption("--instance-type")
+    default_instance_type = "local" if processor == "cpu" else "local_gpu"
+    return provided_instance_type or default_instance_type
+
+
+@pytest.fixture(name="accelerator_type", scope="session")
+def fixture_accelerator_type(request):
+    return request.config.getoption("--accelerator-type")
+
+
+@pytest.fixture(name="docker_registry", scope="session")
+def fixture_docker_registry(aws_id, region):
+    return get_ecr_registry(aws_id, region)
+
+
+@pytest.fixture(name="ecr_image", scope="session")
+def fixture_ecr_image(docker_registry, docker_base_name, tag):
+    return "{}/{}:{}".format(docker_registry, docker_base_name, tag)
+
+
+@pytest.fixture(autouse=True)
+def skip_by_device_type(request, use_gpu, instance_type, accelerator_type):
+    is_gpu = use_gpu or instance_type[3] in ["g", "p"]
+    is_eia = accelerator_type is not None
+    is_neuron = instance_type.startswith("ml.inf1")
+    is_neuronx = instance_type.startswith("ml.inf2") or instance_type.startswith("ml.trn1")
+
+    # Separate out cases for clearer logic.
+    # When running Neuron test, skip CPU  and GPU test.
+    if request.node.get_closest_marker("neuron_test") and not is_neuron:
+        pytest.skip("Skipping because running on '{}' instance".format(instance_type))
+    elif request.node.get_closest_marker("neuronx_test") and not is_neuronx:
+        pytest.skip("Skipping because running on '{}' instance".format(instance_type))
+
+    # When running GPU test, skip CPU  and neuron test. When running CPU test, skip GPU  and neuron test.
+    elif (request.node.get_closest_marker("gpu_test") and not is_gpu) or (
+        request.node.get_closest_marker("cpu_test") and (is_gpu or is_neuron or is_neuronx)
+    ):
+        pytest.skip("Skipping because running on '{}' instance".format(instance_type))
+
+    # When running EIA test, skip the CPU, GPU and Neuron functions
+    elif (
+        request.node.get_closest_marker("neuron_test")
+        or request.node.get_closest_marker("gpu_test")
+        or request.node.get_closest_marker("cpu_test")
+    ) and is_eia:
+        pytest.skip("Skipping because running on '{}' instance".format(instance_type))
+
+    # When running CPU or GPU or Neuron test, skip EIA test.
+    elif request.node.get_closest_marker("eia_test") and not is_eia:
+        pytest.skip("Skipping because running on '{}' instance".format(instance_type))
+
+
+@pytest.fixture(autouse=True)
+def skip_by_py_version(request, py_version):
+    if request.node.get_closest_marker("skip_py2") and py_version != "py3":
+        pytest.skip("Skipping the test because Python 2 is not supported.")
+
+
+@pytest.fixture(autouse=True)
+def skip_gpu_instance_restricted_regions(region, instance_type):
+    if (region in NO_P4_REGIONS and instance_type.startswith("ml.p4")) or (
+        region in NO_G5_REGIONS and instance_type.startswith("ml.g5")
+    ):
+        pytest.skip(
+            "Skipping GPU test in region {} with instance type {}".format(region, instance_type)
+        )
+
+
+@pytest.fixture(autouse=True)
+def skip_gpu_py2(request, use_gpu, instance_type, py_version, framework_version):
+    is_gpu = use_gpu or instance_type[3] in ["g", "p"]
+    if (
+        request.node.get_closest_marker("skip_gpu_py2")
+        and is_gpu
+        and py_version != "py3"
+        and framework_version == "1.4.0"
+    ):
+        pytest.skip("Skipping the test until mms issue resolved.")
+
+
+def _get_remote_override_flags():
+    try:
+        s3_client = boto3.client("s3")
+        sts_client = boto3.client("sts")
+        account_id = sts_client.get_caller_identity().get("Account")
+        result = s3_client.get_object(
+            Bucket=f"dlc-cicd-helper-{account_id}", Key="override_tests_flags.json"
+        )
+        json_content = json.loads(result["Body"].read().decode("utf-8"))
+    except ClientError as e:
+        logger.warning("ClientError when performing S3/STS operation: {}".format(e))
+        json_content = {}
+    return json_content
+
+
+def _is_test_disabled(test_name, build_name, version):
+    """
+    Expected format of remote_override_flags:
+    {
+        "CB Project Name for Test Type A": {
+            "CodeBuild Resolved Source Version": ["test_type_A_test_function_1", "test_type_A_test_function_2"]
+        },
+        "CB Project Name for Test Type B": {
+            "CodeBuild Resolved Source Version": ["test_type_B_test_function_1", "test_type_B_test_function_2"]
+        }
+    }
+
+    :param test_name: str Test Function node name (includes parametrized values in string)
+    :param build_name: str Build Project name of current execution
+    :param version: str Source Version of current execution
+    :return: bool True if test is disabled as per remote override, False otherwise
+    """
+    remote_override_flags = _get_remote_override_flags()
+    remote_override_build = remote_override_flags.get(build_name, {})
+    if version in remote_override_build:
+        return not remote_override_build[version] or any(
+            [test_keyword in test_name for test_keyword in remote_override_build[version]]
+        )
+    return False
+
+
+@pytest.fixture(autouse=True)
+def disable_test(request):
+    test_name = request.node.name
+    # We do not have a regex pattern to find CB name, which means we must resort to string splitting
+    build_arn = os.getenv("CODEBUILD_BUILD_ARN")
+    build_name = build_arn.split("/")[-1].split(":")[0] if build_arn else None
+    version = os.getenv("CODEBUILD_RESOLVED_SOURCE_VERSION")
+
+    if build_name and version and _is_test_disabled(test_name, build_name, version):
+        pytest.skip(f"Skipping {test_name} test because it has been disabled.")
+
+
+@pytest.fixture(autouse=True)
+def skip_test_successfully_executed_before(request):
+    """
+    "cache/lastfailed" contains information about failed tests only. We're running SM tests in separate threads for each image.
+    So when we retry SM tests, successfully executed tests executed again because pytest doesn't have that info in /.cache.
+    But the flag "--last-failed-no-failures all" requires pytest to execute all the available tests.
+    The only sign that a test passed last time - lastfailed file exists and the test name isn't in that file.
+    The method checks whether lastfailed file exists and the test name is not in it.
+    """
+    test_name = request.node.name
+    lastfailed = request.config.cache.get("cache/lastfailed", None)
+
+    if lastfailed is not None and not any(
+        test_name in failed_test_name for failed_test_name in lastfailed.keys()
+    ):
+        pytest.skip(f"Skipping {test_name} because it was successfully executed for this commit")
diff --git a/test/sagemaker_tests/huggingface/hf-vllm/integration/__init__.py b/test/sagemaker_tests/huggingface/hf-vllm/integration/__init__.py
new file mode 100644
index 000000000000..f9d5c7746fcb
--- /dev/null
+++ b/test/sagemaker_tests/huggingface/hf-vllm/integration/__init__.py
@@ -0,0 +1,62 @@
+# Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+from __future__ import absolute_import
+
+import json
+import re
+
+import boto3
+
+
+ROLE = "SageMakerRole"
+DEFAULT_TIMEOUT = 45
+
+
+class NoLogStreamFoundError(Exception):
+    pass
+
+
+class SageMakerEndpointFailure(Exception):
+    pass
+
+
+def dump_logs_from_cloudwatch(e, region="us-west-2"):
+    """
+    Function to dump logs from cloudwatch during error handling
+    """
+    error_hosting_endpoint_regex = re.compile(r"Error hosting endpoint ((\w|-)+):")
+    endpoint_url_regex = re.compile(r"/aws/sagemaker/Endpoints/((\w|-)+)")
+    endpoint_match = error_hosting_endpoint_regex.search(str(e)) or endpoint_url_regex.search(
+        str(e)
+    )
+    if endpoint_match:
+        logs_client = boto3.client("logs", region_name=region)
+        endpoint = endpoint_match.group(1)
+        log_group_name = f"/aws/sagemaker/Endpoints/{endpoint}"
+        log_stream_resp = logs_client.describe_log_streams(logGroupName=log_group_name)
+        all_traffic_log_stream = ""
+        for log_stream in log_stream_resp.get("logStreams", []):
+            log_stream_name = log_stream.get("logStreamName")
+            if log_stream_name.startswith("AllTraffic"):
+                all_traffic_log_stream = log_stream_name
+                break
+        if not all_traffic_log_stream:
+            raise NoLogStreamFoundError(
+                f"Cannot find all traffic log streams for endpoint {endpoint}"
+            ) from e
+        events = logs_client.get_log_events(
+            logGroupName=log_group_name, logStreamName=all_traffic_log_stream
+        )
+        raise SageMakerEndpointFailure(
+            f"Error from endpoint {endpoint}:\n{json.dumps(events, indent=4)}"
+        ) from e
diff --git a/test/sagemaker_tests/huggingface/hf-vllm/integration/sagemaker/__init__.py b/test/sagemaker_tests/huggingface/hf-vllm/integration/sagemaker/__init__.py
new file mode 100644
index 000000000000..04fbf5d9a144
--- /dev/null
+++ b/test/sagemaker_tests/huggingface/hf-vllm/integration/sagemaker/__init__.py
@@ -0,0 +1,12 @@
+# Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
diff --git a/test/sagemaker_tests/huggingface/hf-vllm/integration/sagemaker/test_vllm.py b/test/sagemaker_tests/huggingface/hf-vllm/integration/sagemaker/test_vllm.py
new file mode 100644
index 000000000000..8aa8a4ce2775
--- /dev/null
+++ b/test/sagemaker_tests/huggingface/hf-vllm/integration/sagemaker/test_vllm.py
@@ -0,0 +1,112 @@
+# Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+from __future__ import absolute_import
+
+import json
+import logging
+
+import pytest
+import sagemaker
+from sagemaker.huggingface import HuggingFaceModel
+from sagemaker.serializers import JSONSerializer
+from sagemaker.deserializers import JSONDeserializer
+
+from ...integration import dump_logs_from_cloudwatch
+from ...integration.sagemaker.timeout import timeout_and_delete_endpoint
+from ..... import invoke_sm_endpoint_helper_function
+
+LOGGER = logging.getLogger(__name__)
+
+
+@pytest.mark.model("bloom-560m")
+@pytest.mark.processor("gpu")
+@pytest.mark.gpu_test
+@pytest.mark.team("sagemaker-1p-algorithms")
+def test_vllm_bloom(framework_version, ecr_image, instance_type, sagemaker_regions):
+    instance_type = instance_type or "ml.g6.12xlarge"
+    invoke_sm_endpoint_helper_function(
+        ecr_image=ecr_image,
+        sagemaker_regions=sagemaker_regions,
+        test_function=_test_vllm_model,
+        framework_version=framework_version,
+        instance_type=instance_type,
+        model_id="bigscience/bloom-560m",
+        dump_logs_from_cloudwatch=dump_logs_from_cloudwatch,
+    )
+
+
+@pytest.mark.model("qwen3-8b")
+@pytest.mark.processor("gpu")
+@pytest.mark.gpu_test
+@pytest.mark.team("sagemaker-1p-algorithms")
+def test_vllm_qwen(framework_version, ecr_image, instance_type, sagemaker_regions):
+    instance_type = instance_type or "ml.g6.12xlarge"
+    invoke_sm_endpoint_helper_function(
+        ecr_image=ecr_image,
+        sagemaker_regions=sagemaker_regions,
+        test_function=_test_vllm_model,
+        framework_version=framework_version,
+        instance_type=instance_type,
+        model_id="Qwen/Qwen3-8B",
+        dump_logs_from_cloudwatch=dump_logs_from_cloudwatch,
+    )
+
+
+def _test_vllm_model(
+    sagemaker_session,
+    framework_version,
+    image_uri,
+    instance_type,
+    model_id,
+    accelerator_type=None,
+    **kwargs,
+):
+    """Test vLLM model deployment and inference using OpenAI-compatible API format"""
+    endpoint_name = sagemaker.utils.unique_name_from_base("sagemaker-hf-vllm-serving")
+
+    env = {
+        "HF_MODEL_ID": model_id,
+        "SM_NUM_GPUS": "4",
+        "SM_VLLM_MAX_MODEL_LEN": "512",
+    }
+
+    hf_model = HuggingFaceModel(
+        env=env,
+        role="SageMakerRole",
+        image_uri=image_uri,
+        sagemaker_session=sagemaker_session,
+    )
+
+    with timeout_and_delete_endpoint(endpoint_name, sagemaker_session, minutes=45):
+        predictor = hf_model.deploy(
+            initial_instance_count=1,
+            instance_type=instance_type,
+            endpoint_name=endpoint_name,
+            container_startup_health_check_timeout=1800,
+        )
+
+        predictor.serializer = JSONSerializer()
+        predictor.deserializer = JSONDeserializer()
+
+        # vLLM uses OpenAI-compatible API format
+        data = {
+            "prompt": "What is Deep Learning?",
+            "max_tokens": 50,
+            "temperature": 0.7,
+        }
+
+        LOGGER.info(f"Running inference with data: {data}")
+        output = predictor.predict(data)
+        LOGGER.info(f"Output: {json.dumps(output)}")
+
+        assert output is not None
diff --git a/test/sagemaker_tests/huggingface/hf-vllm/integration/sagemaker/timeout.py b/test/sagemaker_tests/huggingface/hf-vllm/integration/sagemaker/timeout.py
new file mode 100644
index 000000000000..1d13878031f7
--- /dev/null
+++ b/test/sagemaker_tests/huggingface/hf-vllm/integration/sagemaker/timeout.py
@@ -0,0 +1,66 @@
+# Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+from __future__ import absolute_import
+import signal
+from contextlib import contextmanager
+import logging
+
+from botocore.exceptions import ClientError
+
+LOGGER = logging.getLogger("timeout")
+
+
+class TimeoutError(Exception):
+    pass
+
+
+@contextmanager
+def timeout(seconds=0, minutes=0, hours=0):
+    """Add a signal-based timeout to any block of code.
+    If multiple time units are specified, they will be added together to determine time limit.
+    Usage:
+    with timeout(seconds=5):
+        my_slow_function(...)
+    Args:
+        - seconds: The time limit, in seconds.
+        - minutes: The time limit, in minutes.
+        - hours: The time limit, in hours.
+    """
+
+    limit = seconds + 60 * minutes + 3600 * hours
+
+    def handler(signum, frame):
+        raise TimeoutError("timed out after {} seconds".format(limit))
+
+    try:
+        signal.signal(signal.SIGALRM, handler)
+        signal.alarm(limit)
+
+        yield
+    finally:
+        signal.alarm(0)
+
+
+@contextmanager
+def timeout_and_delete_endpoint(endpoint_name, sagemaker_session, seconds=0, minutes=0, hours=0):
+    with timeout(seconds=seconds, minutes=minutes, hours=hours) as t:
+        try:
+            yield [t]
+        finally:
+            try:
+                sagemaker_session.delete_endpoint(endpoint_name)
+                LOGGER.info("deleted endpoint {}".format(endpoint_name))
+            except ClientError as ce:
+                if ce.response["Error"]["Code"] == "ValidationException":
+                    # avoids the inner exception to be overwritten
+                    pass
diff --git a/test/sagemaker_tests/huggingface/hf-vllm/requirements.txt b/test/sagemaker_tests/huggingface/hf-vllm/requirements.txt
new file mode 100644
index 000000000000..33b7528fa51f
--- /dev/null
+++ b/test/sagemaker_tests/huggingface/hf-vllm/requirements.txt
@@ -0,0 +1,29 @@
+boto3
+coverage
+# Docker v7.0.0 breaks compatibility with Docker Compose v1 (SageMaker Local)
+docker<=6.1.3
+docker-compose
+flake8==3.7.7
+Flask==1.1.1
+mock
+pytest==8.3.5
+pytest-cov
+pytest-rerunfailures
+pytest-xdist
+PyYAML
+protobuf>=3.20,<=3.20.2
+sagemaker>=2,<3
+six
+requests<2.32.0
+requests_mock
+Pillow
+retrying==1.3.3
+urllib3==1.26.0
+pluggy>=1.5,<2
+requests_mock
+sagemaker-inference
+tenacity
+fabric
+invoke
+gitpython
+toml
diff --git a/test/sagemaker_tests/huggingface/hf-vllm/utils/__init__.py b/test/sagemaker_tests/huggingface/hf-vllm/utils/__init__.py
new file mode 100644
index 000000000000..5395887f0378
--- /dev/null
+++ b/test/sagemaker_tests/huggingface/hf-vllm/utils/__init__.py
@@ -0,0 +1,36 @@
+# Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+from __future__ import absolute_import
+
+import boto3
+import botocore
+
+
+def _botocore_resolver():
+    """
+    Get the DNS suffix for the given region.
+    :return: endpoint object
+    """
+    loader = botocore.loaders.create_loader()
+    return botocore.regions.EndpointResolver(loader.load_data("endpoints"))
+
+
+def get_ecr_registry(account, region):
+    """
+    Get prefix of ECR image URI
+    :param account: Account ID
+    :param region: region where ECR repo exists
+    :return: AWS ECR registry
+    """
+    endpoint_data = _botocore_resolver().construct_endpoint("ecr", region)
+    return "{}.dkr.{}".format(account, endpoint_data["hostname"])
diff --git a/test/sagemaker_tests/huggingface/hf-vllm/utils/image_utils.py b/test/sagemaker_tests/huggingface/hf-vllm/utils/image_utils.py
new file mode 100644
index 000000000000..26103ec508c2
--- /dev/null
+++ b/test/sagemaker_tests/huggingface/hf-vllm/utils/image_utils.py
@@ -0,0 +1,67 @@
+# Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+from __future__ import absolute_import
+
+import os
+import subprocess
+import sys
+
+CYAN_COLOR = "\033[36m"
+END_COLOR = "\033[0m"
+
+
+def build_base_image(
+    framework_name, framework_version, py_version, processor, base_image_tag, cwd="."
+):
+    base_image_uri = get_base_image_uri(framework_name, base_image_tag)
+
+    dockerfile_location = os.path.join(
+        "docker", framework_version, "base", "Dockerfile.{}".format(processor)
+    )
+
+    subprocess.check_call(
+        [
+            "docker",
+            "build",
+            "-t",
+            base_image_uri,
+            "-f",
+            dockerfile_location,
+            "--build-arg",
+            "py_version={}".format(py_version[-1]),
+            cwd,
+        ],
+        cwd=cwd,
+    )
+    print("created image {}".format(base_image_uri))
+    return base_image_uri
+
+
+def get_base_image_uri(framework_name, base_image_tag):
+    return "{}-base:{}".format(framework_name, base_image_tag)
+
+
+def get_image_uri(framework_name, tag):
+    return "{}:{}".format(framework_name, tag)
+
+
+def _check_call(cmd, *popenargs, **kwargs):
+    if isinstance(cmd, str):
+        cmd = cmd.split(" ")
+    _print_cmd(cmd)
+    subprocess.check_call(cmd, *popenargs, **kwargs)
+
+
+def _print_cmd(cmd):
+    print("executing docker command: {}{}{}".format(CYAN_COLOR, " ".join(cmd), END_COLOR))
+    sys.stdout.flush()
diff --git a/test/sagemaker_tests/huggingface/hf-vllm/utils/local_mode_utils.py b/test/sagemaker_tests/huggingface/hf-vllm/utils/local_mode_utils.py
new file mode 100644
index 000000000000..fa6b3cf00c36
--- /dev/null
+++ b/test/sagemaker_tests/huggingface/hf-vllm/utils/local_mode_utils.py
@@ -0,0 +1,46 @@
+# Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+from __future__ import absolute_import
+
+from contextlib import contextmanager
+import fcntl
+import os
+import tarfile
+import time
+
+from ..integration import resources_path
+
+LOCK_PATH = os.path.join(resources_path, "local_mode_lock")
+
+
+@contextmanager
+def lock():
+    # Since Local Mode uses the same port for serving, we need a lock in order
+    # to allow concurrent test execution.
+    local_mode_lock_fd = open(LOCK_PATH, "w")
+    local_mode_lock = local_mode_lock_fd.fileno()
+
+    fcntl.lockf(local_mode_lock, fcntl.LOCK_EX)
+
+    try:
+        yield
+    finally:
+        time.sleep(5)
+        fcntl.lockf(local_mode_lock, fcntl.LOCK_UN)
+
+
+def assert_files_exist(output_path, directory_file_map):
+    for directory, files in directory_file_map.items():
+        with tarfile.open(os.path.join(output_path, "{}.tar.gz".format(directory))) as tar:
+            for f in files:
+                tar.getmember(f)
diff --git a/test/sagemaker_tests/huggingface/inference/resources/local_mode_lock b/test/sagemaker_tests/huggingface/inference/resources/local_mode_lock
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/test/sagemaker_tests/huggingface/vllm/integration/__init__.py b/test/sagemaker_tests/huggingface/vllm/integration/__init__.py
new file mode 100644
index 000000000000..f9d5c7746fcb
--- /dev/null
+++ b/test/sagemaker_tests/huggingface/vllm/integration/__init__.py
@@ -0,0 +1,62 @@
+# Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+from __future__ import absolute_import
+
+import json
+import re
+
+import boto3
+
+
+ROLE = "SageMakerRole"
+DEFAULT_TIMEOUT = 45
+
+
+class NoLogStreamFoundError(Exception):
+    pass
+
+
+class SageMakerEndpointFailure(Exception):
+    pass
+
+
+def dump_logs_from_cloudwatch(e, region="us-west-2"):
+    """
+    Function to dump logs from cloudwatch during error handling
+    """
+    error_hosting_endpoint_regex = re.compile(r"Error hosting endpoint ((\w|-)+):")
+    endpoint_url_regex = re.compile(r"/aws/sagemaker/Endpoints/((\w|-)+)")
+    endpoint_match = error_hosting_endpoint_regex.search(str(e)) or endpoint_url_regex.search(
+        str(e)
+    )
+    if endpoint_match:
+        logs_client = boto3.client("logs", region_name=region)
+        endpoint = endpoint_match.group(1)
+        log_group_name = f"/aws/sagemaker/Endpoints/{endpoint}"
+        log_stream_resp = logs_client.describe_log_streams(logGroupName=log_group_name)
+        all_traffic_log_stream = ""
+        for log_stream in log_stream_resp.get("logStreams", []):
+            log_stream_name = log_stream.get("logStreamName")
+            if log_stream_name.startswith("AllTraffic"):
+                all_traffic_log_stream = log_stream_name
+                break
+        if not all_traffic_log_stream:
+            raise NoLogStreamFoundError(
+                f"Cannot find all traffic log streams for endpoint {endpoint}"
+            ) from e
+        events = logs_client.get_log_events(
+            logGroupName=log_group_name, logStreamName=all_traffic_log_stream
+        )
+        raise SageMakerEndpointFailure(
+            f"Error from endpoint {endpoint}:\n{json.dumps(events, indent=4)}"
+        ) from e