diff --git a/.azure-pipelines/scripts/install_nc.sh b/.azure-pipelines/scripts/install_nc.sh
index 38f0ccf557e..c95ce2f9dd7 100644
--- a/.azure-pipelines/scripts/install_nc.sh
+++ b/.azure-pipelines/scripts/install_nc.sh
@@ -11,12 +11,13 @@ if [[ $1 = *"3x_pt"* ]]; then
     else
         echo -e "\n Install torch CPU ... "
         pip install torch==2.7.0 torchvision --index-url https://download.pytorch.org/whl/cpu
-        python -m pip install intel-extension-for-pytorch==2.7.0 oneccl_bind_pt==2.7.0 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/cpu/us/
+        python -m pip install intel-extension-for-pytorch==2.7.0 oneccl_bind_pt==2.7.0 --index-url https://pytorch-extension.intel.com/release-whl/stable/cpu/us/
         python -m pip install --no-cache-dir -r requirements.txt
         python setup.py bdist_wheel
     fi
     pip install --no-deps dist/neural_compressor*.whl --force-reinstall
 elif [[ $1 = *"3x_tf"* ]]; then
+	pip install tensorflow==2.19.0
     python -m pip install --no-cache-dir -r requirements.txt
     python -m pip install --no-cache-dir -r requirements_tf.txt
     python setup.py bdist_wheel
diff --git a/.azure-pipelines/scripts/ut/3x/run_3x_pt_fp8.sh b/.azure-pipelines/scripts/ut/3x/run_3x_pt_fp8.sh
index 5fbab435634..5f5f2136c64 100644
--- a/.azure-pipelines/scripts/ut/3x/run_3x_pt_fp8.sh
+++ b/.azure-pipelines/scripts/ut/3x/run_3x_pt_fp8.sh
@@ -12,7 +12,6 @@ echo "##[section]import check pass"
 # install requirements
 echo "##[group]set up UT env..."
 export LD_LIBRARY_PATH=/usr/local/lib/:$LD_LIBRARY_PATH
-export PT_HPU_LAZY_MODE=1
 sed -i '/^intel_extension_for_pytorch/d' /neural-compressor/test/3x/torch/requirements.txt
 sed -i '/^auto_round/d' /neural-compressor/test/3x/torch/requirements.txt
 cat /neural-compressor/test/3x/torch/requirements.txt
diff --git a/.azure-pipelines/template/docker-template.yml b/.azure-pipelines/template/docker-template.yml
index da197c0b20a..a7625fc4d60 100644
--- a/.azure-pipelines/template/docker-template.yml
+++ b/.azure-pipelines/template/docker-template.yml
@@ -74,7 +74,7 @@ steps:
 
   - ${{ if eq(parameters.imageSource, 'pull') }}:
       - script: |
-          docker pull vault.habana.ai/gaudi-docker/1.21.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
+          docker pull vault.habana.ai/gaudi-docker/1.22.0/ubuntu24.04/habanalabs/pytorch-installer-2.7.1:latest
         displayName: "Pull habana docker image"
 
   - script: |
@@ -95,7 +95,7 @@ steps:
             else
                 docker run -dit --disable-content-trust --privileged --name=${{ parameters.containerName }} --shm-size="2g" \
                 --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --net=host --ipc=host \
-                -v ${BUILD_SOURCESDIRECTORY}:/neural-compressor vault.habana.ai/gaudi-docker/1.21.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
+                -v ${BUILD_SOURCESDIRECTORY}:/neural-compressor vault.habana.ai/gaudi-docker/1.22.0/ubuntu24.04/habanalabs/pytorch-installer-2.7.1:latest
                 docker exec ${{ parameters.containerName }} bash -c "ln -sf \$(which python3) /usr/bin/python"
             fi
             echo "Show the container list after docker run ... "
diff --git a/.azure-pipelines/ut-3x-pt-fp8.yml b/.azure-pipelines/ut-3x-pt-fp8.yml
index 74f5a877496..47d18fab624 100644
--- a/.azure-pipelines/ut-3x-pt-fp8.yml
+++ b/.azure-pipelines/ut-3x-pt-fp8.yml
@@ -39,6 +39,7 @@ stages:
     jobs:
       - job:
         displayName: Torch 3x Habana FP8
+        timeoutInMinutes: 120
         steps:
           - template: template/ut-template.yml
             parameters:
@@ -54,6 +55,7 @@ stages:
     jobs:
       - job:
         displayName: Torch 3x Habana FP8 baseline
+        timeoutInMinutes: 120
         continueOnError: true
         steps:
           - template: template/ut-template.yml
diff --git a/README.md b/README.md
index 5e54c01fe32..362f47676cf 100644
--- a/README.md
+++ b/README.md
@@ -5,7 +5,7 @@ Intel® Neural Compressor
 <h3> An open-source Python library supporting popular model compression techniques on all mainstream deep learning frameworks (TensorFlow, PyTorch, and ONNX Runtime)</h3>
 
 [![python](https://img.shields.io/badge/python-3.8%2B-blue)](https://github.com/intel/neural-compressor)
-[![version](https://img.shields.io/badge/release-3.4.1-green)](https://github.com/intel/neural-compressor/releases)
+[![version](https://img.shields.io/badge/release-3.5-green)](https://github.com/intel/neural-compressor/releases)
 [![license](https://img.shields.io/badge/license-Apache%202-blue)](https://github.com/intel/neural-compressor/blob/master/LICENSE)
 [![coverage](https://img.shields.io/badge/coverage-85%25-green)](https://github.com/intel/neural-compressor)
 [![Downloads](https://static.pepy.tech/personalized-badge/neural-compressor?period=total&units=international_system&left_color=grey&right_color=green&left_text=downloads)](https://pepy.tech/project/neural-compressor)
@@ -56,7 +56,7 @@ To try on Intel Gaudi2, docker image with Gaudi Software Stack is recommended, p
 
 Run a container with an interactive shell, [more info](https://docs.habana.ai/en/latest/Installation_Guide/Additional_Installation/Docker_Installation.html#docker-installation)
 ```
-docker run -it --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --net=host --ipc=host vault.habana.ai/gaudi-docker/1.21.0/ubuntu24.04/habanalabs/pytorch-installer-2.6.0:latest
+docker run -it --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --net=host --ipc=host vault.habana.ai/gaudi-docker/1.22.0/ubuntu24.04/habanalabs/pytorch-installer-2.7.1:latest
 ```
 
 > Note: Since Habana software >= 1.21.0, `PT_HPU_LAZY_MODE=0` is the default setting. However, most low-precision functions (such as `convert_from_uint4`) do not support this setting. Therefore, we recommend setting `PT_HPU_LAZY_MODE=1` to maintain compatibility.
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/transformers/weight_only/text-generation/requirements_cpu_woq.txt b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/transformers/weight_only/text-generation/requirements_cpu_woq.txt
index fe584f6fc27..417511035ba 100644
--- a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/transformers/weight_only/text-generation/requirements_cpu_woq.txt
+++ b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/transformers/weight_only/text-generation/requirements_cpu_woq.txt
@@ -16,5 +16,5 @@ huggingface_hub
 numba
 tbb
 intel-extension-for-pytorch==2.4.0
---extra-index-url https://pytorch-extension.intel.com/release-whl/stable/cpu/us/
+--index-url https://pytorch-extension.intel.com/release-whl/stable/cpu/us/
 oneccl_bind_pt==2.4.0
diff --git a/neural_compressor/torch/algorithms/weight_only/save_load.py b/neural_compressor/torch/algorithms/weight_only/save_load.py
index 3c9927734cc..b51af6701a7 100644
--- a/neural_compressor/torch/algorithms/weight_only/save_load.py
+++ b/neural_compressor/torch/algorithms/weight_only/save_load.py
@@ -937,7 +937,10 @@ def _init_hf_model(self, model_class, config):
                 else:  # pragma: no cover
                     assert False, f'`torch_dtype` can be either `torch.dtype` or `"auto"`, but received {torch_dtype}'
 
-            dtype_orig = model_class._set_default_torch_dtype(torch_dtype)
+            if parse(transformers.__version__) >= parse("4.56.0"):
+                dtype_orig = model_class._set_default_dtype(torch_dtype)
+            else:
+                dtype_orig = model_class._set_default_torch_dtype(torch_dtype)
 
         init_contexts = (
             [no_init_weights(_enable=_fast_init)]
diff --git a/neural_compressor/torch/utils/utility.py b/neural_compressor/torch/utils/utility.py
index a484824d39c..0f452f10746 100644
--- a/neural_compressor/torch/utils/utility.py
+++ b/neural_compressor/torch/utils/utility.py
@@ -38,7 +38,7 @@
 if is_transformers_imported():
     import transformers
 
-    SUPPORTED_LAYERS = [nn.Linear, transformers.modeling_utils.Conv1D]
+    SUPPORTED_LAYERS = [nn.Linear, transformers.pytorch_utils.Conv1D]
 else:
     SUPPORTED_LAYERS = [nn.Conv1d, nn.Linear]
 
diff --git a/neural_compressor/transformers/models/modeling_auto.py b/neural_compressor/transformers/models/modeling_auto.py
index dd17dfe580a..59de626cfaf 100644
--- a/neural_compressor/transformers/models/modeling_auto.py
+++ b/neural_compressor/transformers/models/modeling_auto.py
@@ -393,7 +393,7 @@ def load_low_bit(cls, pretrained_model_name_or_path, *model_args, **kwargs):
         # index of the files.
         is_sharded = False
         sharded_metadata = None
-        if transformers.__version__ >= "4.50":
+        if parse(transformers.__version__) >= parse("4.50"):
             from transformers.modeling_utils import _get_resolved_checkpoint_files
 
             gguf_file = kwargs.pop("gguf_file", None)
@@ -635,8 +635,10 @@ def load_low_bit(cls, pretrained_model_name_or_path, *model_args, **kwargs):
                             torch_dtype = torch.float32
                 else:
                     assert False, f'`torch_dtype` can be either `torch.dtype` or `"auto"`, but received {torch_dtype}'
-
-            dtype_orig = model_class._set_default_torch_dtype(torch_dtype)
+            if parse(transformers.__version__) >= parse("4.56.0"):
+                dtype_orig = model_class._set_default_dtype(torch_dtype)
+            else:
+                dtype_orig = model_class._set_default_torch_dtype(torch_dtype)
         if quantization_config.compute_dtype is None:
             if use_xpu:
                 quantization_config.compute_dtype = (
diff --git a/neural_compressor/transformers/quantization/utils.py b/neural_compressor/transformers/quantization/utils.py
index 80be5b3764f..ced7ba174cd 100644
--- a/neural_compressor/transformers/quantization/utils.py
+++ b/neural_compressor/transformers/quantization/utils.py
@@ -579,7 +579,7 @@ def set_nontext_module_config(model, to_quant_block_names, config):
             set_nontext_module_config(model, to_quant_block_names, config)
 
             for n, m in model.named_modules():
-                if isinstance(m, torch.nn.Linear) or isinstance(m, transformers.modeling_utils.Conv1D):
+                if isinstance(m, torch.nn.Linear) or isinstance(m, transformers.pytorch_utils.Conv1D):
                     if m.weight.shape[0] % 32 != 0 or m.weight.shape[1] % 32 != 0:
                         config.modules_to_not_convert.append(n)
                         print(
diff --git a/test/3x/torch/algorithms/pt2e_quant/test_pt2e_w8a8.py b/test/3x/torch/algorithms/pt2e_quant/test_pt2e_w8a8.py
index 408c7aeb841..5df612dd1b6 100644
--- a/test/3x/torch/algorithms/pt2e_quant/test_pt2e_w8a8.py
+++ b/test/3x/torch/algorithms/pt2e_quant/test_pt2e_w8a8.py
@@ -75,24 +75,18 @@ def test_quantizer_on_llm(self):
 
         model_name = "facebook/opt-125m"
         model = AutoModelForCausalLM.from_pretrained(model_name)
+        model_config = model.config
         tokenizer = AutoTokenizer.from_pretrained(model_name)
-        input_ids = tokenizer("Hello, my dog is cute", return_tensors="pt")["input_ids"]
+        inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
         # example_inputs = (input_ids,)
         # model = export_model_for_pt2e_quant(model, example_inputs=example_inputs)
+        attention_mask = inputs.attention_mask
+        input_ids = inputs.input_ids
+
+
+        from transformers.integrations.executorch import export_with_dynamic_cache
         from transformers import DynamicCache
-        example_inputs =                 {
-                    "input_ids": input_ids,
-                    "attention_mask": None,
-                    "past_key_values": DynamicCache(),
-                    "use_cache": True,
-                }
-        with torch.no_grad():
-            ep = torch.export.export_for_training(
-                model,
-                (),
-                example_inputs,
-                strict=False,
-            )
+        ep = export_with_dynamic_cache(model, input_ids, attention_mask)
         model = ep.module()
         model._exported = True
 
@@ -102,7 +96,12 @@ def test_quantizer_on_llm(self):
         prepare_model = w8a8_static_quantizer.prepare(model)
         # calibrate
         for i in range(2):
-            prepare_model(**example_inputs)
+            prepare_model(
+                input_ids=input_ids,
+                attention_mask=attention_mask,
+                past_key_values=DynamicCache(config=model_config),
+                use_cache=True,
+            )
         # convert
         converted_model = w8a8_static_quantizer.convert(prepare_model)
         # inference
@@ -110,7 +109,11 @@ def test_quantizer_on_llm(self):
 
         config.freezing = True
         opt_model = torch.compile(converted_model)
-        out = opt_model(**example_inputs)
+        out = opt_model(input_ids=input_ids,
+            attention_mask=attention_mask,
+            past_key_values=DynamicCache(config=model_config),
+            use_cache=True,
+            )
         assert out.logits is not None
 
     @patch("neural_compressor.torch.algorithms.pt2e_quant.core.logger.error")
diff --git a/test/3x/torch/quantization/test_pt2e_quant.py b/test/3x/torch/quantization/test_pt2e_quant.py
index 7bce6b89066..ab426a5a2c7 100644
--- a/test/3x/torch/quantization/test_pt2e_quant.py
+++ b/test/3x/torch/quantization/test_pt2e_quant.py
@@ -206,23 +206,19 @@ def test_prepare_and_convert_on_llm(self, force_not_import_ipex):
         model_name = "facebook/opt-125m"
         model = AutoModelForCausalLM.from_pretrained(model_name)
         tokenizer = AutoTokenizer.from_pretrained(model_name)
-        input_ids = tokenizer("Hello, my dog is cute", return_tensors="pt")["input_ids"]
+        model = AutoModelForCausalLM.from_pretrained(model_name)
+        model_config = model.config
+        tokenizer = AutoTokenizer.from_pretrained(model_name)
+        inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
         # example_inputs = (input_ids,)
-        # model = export(model, example_inputs=example_inputs)
+        # model = export_model_for_pt2e_quant(model, example_inputs=example_inputs)
+        attention_mask = inputs.attention_mask
+        input_ids = inputs.input_ids
+       
+        
+        from transformers.integrations.executorch import export_with_dynamic_cache
         from transformers import DynamicCache
-        example_inputs =                 {
-                    "input_ids": input_ids,
-                    "attention_mask": None,
-                    "past_key_values": DynamicCache(),
-                    "use_cache": True,
-                }
-        with torch.no_grad():
-            ep = torch.export.export_for_training(
-                model,
-                (),
-                example_inputs,
-                strict=False,
-            )
+        ep = export_with_dynamic_cache(model, input_ids, attention_mask)
         model = ep.module()
         model._exported = True
         model.dynamic_shapes = None
@@ -232,7 +228,12 @@ def test_prepare_and_convert_on_llm(self, force_not_import_ipex):
         prepare_model = prepare(model, quant_config)
         # calibrate
         for i in range(2):
-            prepare_model(**example_inputs)
+            prepare_model(
+                input_ids=input_ids,
+                attention_mask=attention_mask,
+                past_key_values=DynamicCache(config=model_config),
+                use_cache=True,
+            )
         # convert
         converted_model = convert(prepare_model)
         # inference
@@ -240,7 +241,12 @@ def test_prepare_and_convert_on_llm(self, force_not_import_ipex):
 
         config.freezing = True
         opt_model = torch.compile(converted_model)
-        out = opt_model(**example_inputs)
+        out = opt_model(
+                input_ids=input_ids,
+                attention_mask=attention_mask,
+                past_key_values=DynamicCache(config=model_config),
+                use_cache=True,
+        )
         assert out.logits is not None
 
     @staticmethod
diff --git a/test/3x/torch/requirements.txt b/test/3x/torch/requirements.txt
index 16b7e508083..b17193fe980 100644
--- a/test/3x/torch/requirements.txt
+++ b/test/3x/torch/requirements.txt
@@ -1,6 +1,6 @@
 auto_round
 datasets
-deepspeed @ git+https://github.com/HabanaAI/DeepSpeed.git@1.21.0
+deepspeed @ git+https://github.com/HabanaAI/DeepSpeed.git@1.22.0
 expecttest
 intel_extension_for_pytorch
 numpy