From 9857e22cd6b849b6ced63bec8416082624901ef9 Mon Sep 17 00:00:00 2001
From: Wauplin <lucainp@gmail.com>
Date: Tue, 25 Nov 2025 10:18:12 +0100
Subject: [PATCH 1/4] Remove deprecated load config from file

---
 src/transformers/configuration_utils.py       |   5 -
 src/transformers/feature_extraction_utils.py  |   6 -
 .../generation/configuration_utils.py         |   5 -
 src/transformers/image_processing_base.py     |   6 -
 src/transformers/modeling_utils.py            |   5 -
 src/transformers/processing_utils.py          |   9 -
 src/transformers/tokenization_utils_base.py   | 159 ++++++++----------
 src/transformers/utils/__init__.py            |   2 -
 src/transformers/utils/hub.py                 |  41 +----
 src/transformers/video_processing_utils.py    |   6 -
 tests/utils/test_image_utils.py               |  76 +++------
 tests/utils/test_tokenization_utils.py        |  39 +----
 12 files changed, 96 insertions(+), 263 deletions(-)

diff --git a/src/transformers/configuration_utils.py b/src/transformers/configuration_utils.py
index 5d6fbda009dc..4b69b453359c 100755
--- a/src/transformers/configuration_utils.py
+++ b/src/transformers/configuration_utils.py
@@ -30,9 +30,7 @@
     PushToHubMixin,
     cached_file,
     copy_func,
-    download_url,
     extract_commit_hash,
-    is_remote_url,
     is_torch_available,
     logging,
 )
@@ -659,9 +657,6 @@ def _get_config_dict(
             # Special case when pretrained_model_name_or_path is a local file
             resolved_config_file = pretrained_model_name_or_path
             is_local = True
-        elif is_remote_url(pretrained_model_name_or_path):
-            configuration_file = pretrained_model_name_or_path if gguf_file is None else gguf_file
-            resolved_config_file = download_url(pretrained_model_name_or_path)
         else:
             configuration_file = kwargs.pop("_configuration_file", CONFIG_NAME) if gguf_file is None else gguf_file
 
diff --git a/src/transformers/feature_extraction_utils.py b/src/transformers/feature_extraction_utils.py
index 844d9a215914..7d9dda273636 100644
--- a/src/transformers/feature_extraction_utils.py
+++ b/src/transformers/feature_extraction_utils.py
@@ -30,10 +30,8 @@
     PushToHubMixin,
     TensorType,
     copy_func,
-    download_url,
     is_numpy_array,
     is_offline_mode,
-    is_remote_url,
     is_torch_available,
     is_torch_device,
     is_torch_dtype,
@@ -430,10 +428,6 @@ def get_feature_extractor_dict(
             resolved_feature_extractor_file = pretrained_model_name_or_path
             resolved_processor_file = None
             is_local = True
-        elif is_remote_url(pretrained_model_name_or_path):
-            feature_extractor_file = pretrained_model_name_or_path
-            resolved_processor_file = None
-            resolved_feature_extractor_file = download_url(pretrained_model_name_or_path)
         else:
             feature_extractor_file = FEATURE_EXTRACTOR_NAME
             try:
diff --git a/src/transformers/generation/configuration_utils.py b/src/transformers/generation/configuration_utils.py
index 2168dc0bbe19..341c02fe81eb 100644
--- a/src/transformers/generation/configuration_utils.py
+++ b/src/transformers/generation/configuration_utils.py
@@ -29,9 +29,7 @@
     ExplicitEnum,
     PushToHubMixin,
     cached_file,
-    download_url,
     extract_commit_hash,
-    is_remote_url,
     is_torch_available,
     logging,
 )
@@ -872,9 +870,6 @@ def from_pretrained(
             # Special case when config_path is a local file
             resolved_config_file = config_path
             is_local = True
-        elif is_remote_url(config_path):
-            configuration_file = config_path
-            resolved_config_file = download_url(config_path)
         else:
             configuration_file = config_file_name
             try:
diff --git a/src/transformers/image_processing_base.py b/src/transformers/image_processing_base.py
index 564af6be8081..2c1207fd8ca2 100644
--- a/src/transformers/image_processing_base.py
+++ b/src/transformers/image_processing_base.py
@@ -28,9 +28,7 @@
     PROCESSOR_NAME,
     PushToHubMixin,
     copy_func,
-    download_url,
     is_offline_mode,
-    is_remote_url,
     logging,
     safe_load_json_file,
 )
@@ -283,10 +281,6 @@ def get_image_processor_dict(
             resolved_image_processor_file = pretrained_model_name_or_path
             resolved_processor_file = None
             is_local = True
-        elif is_remote_url(pretrained_model_name_or_path):
-            image_processor_file = pretrained_model_name_or_path
-            resolved_processor_file = None
-            resolved_image_processor_file = download_url(pretrained_model_name_or_path)
         else:
             image_processor_file = image_processor_filename
             try:
diff --git a/src/transformers/modeling_utils.py b/src/transformers/modeling_utils.py
index e1df5de3ae35..9829ed7397ec 100644
--- a/src/transformers/modeling_utils.py
+++ b/src/transformers/modeling_utils.py
@@ -105,14 +105,12 @@
     cached_file,
     check_torch_load_is_safe,
     copy_func,
-    download_url,
     has_file,
     is_accelerate_available,
     is_flash_attn_2_available,
     is_flash_attn_3_available,
     is_kernels_available,
     is_offline_mode,
-    is_remote_url,
     is_torch_flex_attn_available,
     is_torch_greater_or_equal,
     is_torch_mlu_available,
@@ -531,9 +529,6 @@ def _get_resolved_checkpoint_files(
         elif os.path.isfile(os.path.join(subfolder, pretrained_model_name_or_path)):
             archive_file = pretrained_model_name_or_path
             is_local = True
-        elif is_remote_url(pretrained_model_name_or_path):
-            filename = pretrained_model_name_or_path
-            resolved_archive_file = download_url(pretrained_model_name_or_path)
         else:
             # set correct filename
             if transformers_explicit_filename is not None:
diff --git a/src/transformers/processing_utils.py b/src/transformers/processing_utils.py
index 8422294e9773..ed2af2c745f4 100644
--- a/src/transformers/processing_utils.py
+++ b/src/transformers/processing_utils.py
@@ -53,9 +53,7 @@
     cached_file,
     copy_func,
     direct_transformers_import,
-    download_url,
     is_offline_mode,
-    is_remote_url,
     is_torch_available,
     list_repo_templates,
     logging,
@@ -940,13 +938,6 @@ def get_processor_dict(
             resolved_raw_chat_template_file = None
             resolved_audio_tokenizer_file = None
             is_local = True
-        elif is_remote_url(pretrained_model_name_or_path):
-            processor_file = pretrained_model_name_or_path
-            resolved_processor_file = download_url(pretrained_model_name_or_path)
-            # can't load chat-template and audio tokenizer when given a file url as pretrained_model_name_or_path
-            resolved_chat_template_file = None
-            resolved_raw_chat_template_file = None
-            resolved_audio_tokenizer_file = None
         else:
             if is_local:
                 template_dir = Path(pretrained_model_name_or_path, CHAT_TEMPLATE_DIR)
diff --git a/src/transformers/tokenization_utils_base.py b/src/transformers/tokenization_utils_base.py
index 7647576914c1..22c3603df3f1 100644
--- a/src/transformers/tokenization_utils_base.py
+++ b/src/transformers/tokenization_utils_base.py
@@ -48,13 +48,11 @@
     add_end_docstrings,
     cached_file,
     copy_func,
-    download_url,
     extract_commit_hash,
     is_mlx_available,
     is_numpy_array,
     is_offline_mode,
     is_protobuf_available,
-    is_remote_url,
     is_tokenizers_available,
     is_torch_available,
     is_torch_device,
@@ -2010,94 +2008,79 @@ def from_pretrained(
 
         is_local = os.path.isdir(pretrained_model_name_or_path)
         single_file_id = None
-        if os.path.isfile(pretrained_model_name_or_path) or is_remote_url(pretrained_model_name_or_path):
-            if len(cls.vocab_files_names) > 1 and not gguf_file:
-                raise ValueError(
-                    f"Calling {cls.__name__}.from_pretrained() with the path to a single file or url is not "
-                    "supported for this tokenizer. Use a model identifier or the path to a directory instead."
-                )
-            warnings.warn(
-                f"Calling {cls.__name__}.from_pretrained() with the path to a single file or url is deprecated and "
-                "won't be possible anymore in v5. Use a model identifier or the path to a directory instead.",
-                FutureWarning,
-            )
-            file_id = list(cls.vocab_files_names.keys())[0]
 
-            vocab_files[file_id] = pretrained_model_name_or_path
-            single_file_id = file_id
+        if gguf_file:
+            vocab_files["vocab_file"] = gguf_file
         else:
-            if gguf_file:
-                vocab_files["vocab_file"] = gguf_file
-            else:
-                # At this point pretrained_model_name_or_path is either a directory or a model identifier name
-                additional_files_names = {
-                    "added_tokens_file": ADDED_TOKENS_FILE,  # kept only for legacy
-                    "special_tokens_map_file": SPECIAL_TOKENS_MAP_FILE,  # kept only for legacy
-                    "tokenizer_config_file": TOKENIZER_CONFIG_FILE,
-                    # tokenizer_file used to initialize a slow from a fast. Properly copy the `addedTokens` instead of adding in random orders
-                    "tokenizer_file": FULL_TOKENIZER_FILE,
-                    "chat_template_file": CHAT_TEMPLATE_FILE,
-                }
-
-                vocab_files = {**cls.vocab_files_names, **additional_files_names}
-                if "tokenizer_file" in vocab_files:
-                    # Try to get the tokenizer config to see if there are versioned tokenizer files.
-                    fast_tokenizer_file = FULL_TOKENIZER_FILE
-
-                    try:
-                        resolved_config_file = cached_file(
-                            pretrained_model_name_or_path,
-                            TOKENIZER_CONFIG_FILE,
-                            cache_dir=cache_dir,
-                            force_download=force_download,
-                            proxies=proxies,
-                            token=token,
-                            revision=revision,
-                            local_files_only=local_files_only,
-                            subfolder=subfolder,
-                            user_agent=user_agent,
-                            _raise_exceptions_for_missing_entries=False,
-                            _commit_hash=commit_hash,
-                        )
-                    except OSError:
-                        # Re-raise any error raised by cached_file in order to get a helpful error message
-                        raise
-                    except Exception:
-                        # For any other exception, we throw a generic error.
-                        raise OSError(
-                            f"Can't load tokenizer for '{pretrained_model_name_or_path}'. If you were trying to load it from "
-                            "'https://huggingface.co/models', make sure you don't have a local directory with the same name. "
-                            f"Otherwise, make sure '{pretrained_model_name_or_path}' is the correct path to a directory "
-                            f"containing all relevant files for a {cls.__name__} tokenizer."
-                        )
+            # At this point pretrained_model_name_or_path is either a directory or a model identifier name
+            additional_files_names = {
+                "added_tokens_file": ADDED_TOKENS_FILE,  # kept only for legacy
+                "special_tokens_map_file": SPECIAL_TOKENS_MAP_FILE,  # kept only for legacy
+                "tokenizer_config_file": TOKENIZER_CONFIG_FILE,
+                # tokenizer_file used to initialize a slow from a fast. Properly copy the `addedTokens` instead of adding in random orders
+                "tokenizer_file": FULL_TOKENIZER_FILE,
+                "chat_template_file": CHAT_TEMPLATE_FILE,
+            }
 
-                    commit_hash = extract_commit_hash(resolved_config_file, commit_hash)
-                    if resolved_config_file is not None:
-                        with open(resolved_config_file, encoding="utf-8") as reader:
-                            tokenizer_config = json.load(reader)
-                            if "fast_tokenizer_files" in tokenizer_config:
-                                fast_tokenizer_file = get_fast_tokenizer_file(tokenizer_config["fast_tokenizer_files"])
-                    vocab_files["tokenizer_file"] = fast_tokenizer_file
-
-                    # This block looks for any extra chat template files
-                    if is_local:
-                        template_dir = Path(pretrained_model_name_or_path, CHAT_TEMPLATE_DIR)
-                        if template_dir.is_dir():
-                            for template_file in template_dir.glob("*.jinja"):
-                                template_name = template_file.name.removesuffix(".jinja")
-                                vocab_files[f"chat_template_{template_name}"] = (
-                                    f"{CHAT_TEMPLATE_DIR}/{template_file.name}"
-                                )
-                    else:
-                        for template in list_repo_templates(
-                            pretrained_model_name_or_path,
-                            local_files_only=local_files_only,
-                            revision=revision,
-                            cache_dir=cache_dir,
-                            token=token,
-                        ):
-                            template = template.removesuffix(".jinja")
-                            vocab_files[f"chat_template_{template}"] = f"{CHAT_TEMPLATE_DIR}/{template}.jinja"
+            vocab_files = {**cls.vocab_files_names, **additional_files_names}
+            if "tokenizer_file" in vocab_files:
+                # Try to get the tokenizer config to see if there are versioned tokenizer files.
+                fast_tokenizer_file = FULL_TOKENIZER_FILE
+
+                try:
+                    resolved_config_file = cached_file(
+                        pretrained_model_name_or_path,
+                        TOKENIZER_CONFIG_FILE,
+                        cache_dir=cache_dir,
+                        force_download=force_download,
+                        proxies=proxies,
+                        token=token,
+                        revision=revision,
+                        local_files_only=local_files_only,
+                        subfolder=subfolder,
+                        user_agent=user_agent,
+                        _raise_exceptions_for_missing_entries=False,
+                        _commit_hash=commit_hash,
+                    )
+                except OSError:
+                    # Re-raise any error raised by cached_file in order to get a helpful error message
+                    raise
+                except Exception:
+                    # For any other exception, we throw a generic error.
+                    raise OSError(
+                        f"Can't load tokenizer for '{pretrained_model_name_or_path}'. If you were trying to load it from "
+                        "'https://huggingface.co/models', make sure you don't have a local directory with the same name. "
+                        f"Otherwise, make sure '{pretrained_model_name_or_path}' is the correct path to a directory "
+                        f"containing all relevant files for a {cls.__name__} tokenizer."
+                    )
+
+                commit_hash = extract_commit_hash(resolved_config_file, commit_hash)
+                if resolved_config_file is not None:
+                    with open(resolved_config_file, encoding="utf-8") as reader:
+                        tokenizer_config = json.load(reader)
+                        if "fast_tokenizer_files" in tokenizer_config:
+                            fast_tokenizer_file = get_fast_tokenizer_file(tokenizer_config["fast_tokenizer_files"])
+                vocab_files["tokenizer_file"] = fast_tokenizer_file
+
+                # This block looks for any extra chat template files
+                if is_local:
+                    template_dir = Path(pretrained_model_name_or_path, CHAT_TEMPLATE_DIR)
+                    if template_dir.is_dir():
+                        for template_file in template_dir.glob("*.jinja"):
+                            template_name = template_file.name.removesuffix(".jinja")
+                            vocab_files[f"chat_template_{template_name}"] = (
+                                f"{CHAT_TEMPLATE_DIR}/{template_file.name}"
+                            )
+                else:
+                    for template in list_repo_templates(
+                        pretrained_model_name_or_path,
+                        local_files_only=local_files_only,
+                        revision=revision,
+                        cache_dir=cache_dir,
+                        token=token,
+                    ):
+                        template = template.removesuffix(".jinja")
+                        vocab_files[f"chat_template_{template}"] = f"{CHAT_TEMPLATE_DIR}/{template}.jinja"
 
         if not is_local and not local_files_only:
             try:
@@ -2121,8 +2104,6 @@ def from_pretrained(
             elif single_file_id == file_id:
                 if os.path.isfile(file_path):
                     resolved_vocab_files[file_id] = file_path
-                elif is_remote_url(file_path):
-                    resolved_vocab_files[file_id] = download_url(file_path, proxies=proxies)
             else:
                 try:
                     resolved_vocab_files[file_id] = cached_file(
diff --git a/src/transformers/utils/__init__.py b/src/transformers/utils/__init__.py
index 38b5db8f4893..f916d7673e11 100644
--- a/src/transformers/utils/__init__.py
+++ b/src/transformers/utils/__init__.py
@@ -95,12 +95,10 @@
     cached_file,
     default_cache_path,
     define_sagemaker_information,
-    download_url,
     extract_commit_hash,
     has_file,
     http_user_agent,
     is_offline_mode,
-    is_remote_url,
     list_repo_templates,
     try_to_load_from_cache,
 )
diff --git a/src/transformers/utils/hub.py b/src/transformers/utils/hub.py
index 7103ba6b5035..306b406137af 100644
--- a/src/transformers/utils/hub.py
+++ b/src/transformers/utils/hub.py
@@ -44,7 +44,7 @@
     snapshot_download,
     try_to_load_from_cache,
 )
-from huggingface_hub.file_download import REGEX_COMMIT_HASH, http_get
+from huggingface_hub.file_download import REGEX_COMMIT_HASH
 from huggingface_hub.utils import (
     EntryNotFoundError,
     GatedRepoError,
@@ -60,12 +60,7 @@
 
 from . import __version__, logging
 from .generic import working_or_temp_dir
-from .import_utils import (
-    ENV_VARS_TRUE_VALUES,
-    get_torch_version,
-    is_torch_available,
-    is_training_run_on_sagemaker,
-)
+from .import_utils import ENV_VARS_TRUE_VALUES, get_torch_version, is_torch_available, is_training_run_on_sagemaker
 
 
 LEGACY_PROCESSOR_CHAT_TEMPLATE_FILE = "chat_template.json"
@@ -202,11 +197,6 @@ def list_repo_templates(
     return [entry.stem for entry in templates_dir.iterdir() if entry.is_file() and entry.name.endswith(".jinja")]
 
 
-def is_remote_url(url_or_filename):
-    parsed = urlparse(url_or_filename)
-    return parsed.scheme in ("http", "https")
-
-
 def define_sagemaker_information():
     try:
         instance_data = httpx.get(os.environ["ECS_CONTAINER_METADATA_URI"]).json()
@@ -583,33 +573,6 @@ def cached_files(
     return resolved_files
 
 
-def download_url(url, proxies=None):
-    """
-    Downloads a given url in a temporary file. This function is not safe to use in multiple processes. Its only use is
-    for deprecated behavior allowing to download config/models with a single url instead of using the Hub.
-
-    Args:
-        url (`str`): The url of the file to download.
-        proxies (`dict[str, str]`, *optional*):
-            A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
-            'http://hostname': 'foo.bar:4012'}.` The proxies are used on each request.
-
-    Returns:
-        `str`: The location of the temporary file where the url was downloaded.
-    """
-    warnings.warn(
-        f"Using `from_pretrained` with the url of a file (here {url}) is deprecated and won't be possible anymore in"
-        " v5 of Transformers. You should host your file on the Hub (hf.co) instead and use the repository ID. Note"
-        " that this is not compatible with the caching system (your file will be downloaded at each execution) or"
-        " multiple processes (each process will download the file in a different temporary file).",
-        FutureWarning,
-    )
-    tmp_fd, tmp_file = tempfile.mkstemp()
-    with os.fdopen(tmp_fd, "wb") as f:
-        http_get(url, f, proxies=proxies)
-    return tmp_file
-
-
 def has_file(
     path_or_repo: str | os.PathLike,
     filename: str,
diff --git a/src/transformers/video_processing_utils.py b/src/transformers/video_processing_utils.py
index eeb04eeb3adb..77836a0bf1ad 100644
--- a/src/transformers/video_processing_utils.py
+++ b/src/transformers/video_processing_utils.py
@@ -43,9 +43,7 @@
     TensorType,
     add_start_docstrings,
     copy_func,
-    download_url,
     is_offline_mode,
-    is_remote_url,
     is_torch_available,
     is_torchcodec_available,
     is_torchvision_v2_available,
@@ -638,10 +636,6 @@ def get_video_processor_dict(
             resolved_video_processor_file = pretrained_model_name_or_path
             resolved_processor_file = None
             is_local = True
-        elif is_remote_url(pretrained_model_name_or_path):
-            video_processor_file = pretrained_model_name_or_path
-            resolved_processor_file = None
-            resolved_video_processor_file = download_url(pretrained_model_name_or_path)
         else:
             video_processor_file = VIDEO_PROCESSOR_NAME
             try:
diff --git a/tests/utils/test_image_utils.py b/tests/utils/test_image_utils.py
index 124bc73d7357..4a6a03f813e6 100644
--- a/tests/utils/test_image_utils.py
+++ b/tests/utils/test_image_utils.py
@@ -11,17 +11,13 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 import codecs
-import os
-import tempfile
 import unittest
-from io import BytesIO
 
 import httpx
 import numpy as np
 import pytest
-from huggingface_hub.file_download import hf_hub_url, http_get
+from huggingface_hub.file_download import hf_hub_download
 
 from tests.pipelines.test_pipelines_document_question_answering import INVOICE_URL
 from transformers import is_torch_available, is_vision_available
@@ -46,8 +42,8 @@
 
 
 def get_image_from_hub_dataset(dataset_id: str, filename: str, revision: str | None = None) -> "PIL.Image.Image":
-    url = hf_hub_url(dataset_id, filename, repo_type="dataset", revision=revision)
-    return PIL.Image.open(BytesIO(httpx.get(url, follow_redirects=True).content))
+    path = hf_hub_download(dataset_id, filename, repo_type="dataset", revision=revision)
+    return PIL.Image.open(path)
 
 
 def get_random_image(height, width):
@@ -738,54 +734,30 @@ def test_load_img_local(self):
         )
 
     def test_load_img_base64_prefix(self):
-        try:
-            tmp_file = tempfile.NamedTemporaryFile(delete=False).name
-            with open(tmp_file, "wb") as f:
-                http_get(
-                    "https://huggingface.co/datasets/hf-internal-testing/dummy-base64-images/raw/main/image_0.txt", f
-                )
-
-            with open(tmp_file, encoding="utf-8") as b64:
-                img = load_image(b64.read())
-                img_arr = np.array(img)
-
-        finally:
-            os.remove(tmp_file)
-
+        path = hf_hub_download(
+            repo_id="hf-internal-testing/dummy-base64-images", filename="image_0.txt", repo_type="dataset"
+        )
+        with open(path, encoding="utf-8") as b64:
+            img = load_image(b64.read())
+            img_arr = np.array(img)
         self.assertEqual(img_arr.shape, (64, 32, 3))
 
     def test_load_img_base64(self):
-        try:
-            tmp_file = tempfile.NamedTemporaryFile(delete=False).name
-            with open(tmp_file, "wb") as f:
-                http_get(
-                    "https://huggingface.co/datasets/hf-internal-testing/dummy-base64-images/raw/main/image_1.txt", f
-                )
-
-            with open(tmp_file, encoding="utf-8") as b64:
-                img = load_image(b64.read())
-                img_arr = np.array(img)
-
-        finally:
-            os.remove(tmp_file)
-
+        path = hf_hub_download(
+            repo_id="hf-internal-testing/dummy-base64-images", filename="image_1.txt", repo_type="dataset"
+        )
+        with open(path, encoding="utf-8") as b64:
+            img = load_image(b64.read())
+            img_arr = np.array(img)
         self.assertEqual(img_arr.shape, (64, 32, 3))
 
     def test_load_img_base64_encoded_bytes(self):
-        try:
-            tmp_file = tempfile.NamedTemporaryFile(delete=False).name
-            with open(tmp_file, "wb") as f:
-                http_get(
-                    "https://huggingface.co/datasets/hf-internal-testing/dummy-base64-images/raw/main/image_2.txt", f
-                )
-
-            with codecs.open(tmp_file, encoding="unicode_escape") as b64:
-                img = load_image(b64.read())
-                img_arr = np.array(img)
-
-        finally:
-            os.remove(tmp_file)
-
+        path = hf_hub_download(
+            repo_id="hf-internal-testing/dummy-base64-images", filename="image_2.txt", repo_type="dataset"
+        )
+        with codecs.open(path, encoding="unicode_escape") as b64:
+            img = load_image(b64.read())
+            img_arr = np.array(img)
         self.assertEqual(img_arr.shape, (256, 256, 3))
 
     def test_load_img_rgba(self):
@@ -797,11 +769,7 @@ def test_load_img_rgba(self):
 
         img = load_image(img)  # img with mode RGBA
         img_arr = np.array(img)
-
-        self.assertEqual(
-            img_arr.shape,
-            (512, 512, 3),
-        )
+        self.assertEqual(img_arr.shape, (512, 512, 3))
 
     def test_load_img_la(self):
         # we use revision="refs/pr/1" until the PR is merged
diff --git a/tests/utils/test_tokenization_utils.py b/tests/utils/test_tokenization_utils.py
index 1cb872a3c216..d34b224068e1 100644
--- a/tests/utils/test_tokenization_utils.py
+++ b/tests/utils/test_tokenization_utils.py
@@ -20,16 +20,8 @@
 from pathlib import Path
 
 import httpx
-from huggingface_hub.file_download import http_get
-
-from transformers import (
-    AlbertTokenizer,
-    AutoTokenizer,
-    BertTokenizer,
-    BertTokenizerFast,
-    GPT2TokenizerFast,
-    is_tokenizers_available,
-)
+
+from transformers import AutoTokenizer, BertTokenizer, BertTokenizerFast, GPT2TokenizerFast, is_tokenizers_available
 from transformers.testing_utils import TOKEN, TemporaryHubRepo, is_staging_test, require_tokenizers
 from transformers.tokenization_utils import ExtensionsTrie, Trie
 
@@ -83,33 +75,6 @@ def test_cached_files_are_used_when_internet_is_down_missing_files(self):
             # This check we did call the fake head request
             mock_head.assert_called()
 
-    def test_legacy_load_from_one_file(self):
-        # This test is for deprecated behavior and can be removed in v5
-        try:
-            tmp_file = tempfile.NamedTemporaryFile(delete=False).name
-            with open(tmp_file, "wb") as f:
-                http_get("https://huggingface.co/albert/albert-base-v1/resolve/main/spiece.model", f)
-
-            _ = AlbertTokenizer.from_pretrained(tmp_file)
-        finally:
-            os.remove(tmp_file)
-
-        # Supporting this legacy load introduced a weird bug where the tokenizer would load local files if they are in
-        # the current folder and have the right name.
-        if os.path.isfile("tokenizer.json"):
-            # We skip the test if the user has a `tokenizer.json` in this folder to avoid deleting it.
-            self.skipTest(reason="Skipping test as there is a `tokenizer.json` file in the current folder.")
-        try:
-            with open("tokenizer.json", "wb") as f:
-                http_get("https://huggingface.co/hf-internal-testing/tiny-random-bert/blob/main/tokenizer.json", f)
-            tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-gpt2")
-            # The tiny random BERT has a vocab size of 1024, tiny openai-community/gpt2 as a vocab size of 1000
-            self.assertEqual(tokenizer.vocab_size, 1000)
-            # Tokenizer should depend on the remote checkpoint, not the local tokenizer.json file.
-
-        finally:
-            os.remove("tokenizer.json")
-
 
 @is_staging_test
 class TokenizerPushToHubTester(unittest.TestCase):

From a45742e176e2a36506debe9e08164a2793a8d53f Mon Sep 17 00:00:00 2001
From: Wauplin <lucainp@gmail.com>
Date: Tue, 25 Nov 2025 10:28:28 +0100
Subject: [PATCH 2/4] code quality

---
 src/transformers/models/metaclip_2/convert_metaclip_2_to_hf.py | 1 -
 src/transformers/utils/hub.py                                  | 2 --
 2 files changed, 3 deletions(-)

diff --git a/src/transformers/models/metaclip_2/convert_metaclip_2_to_hf.py b/src/transformers/models/metaclip_2/convert_metaclip_2_to_hf.py
index b086a5844b34..a5292ccae108 100644
--- a/src/transformers/models/metaclip_2/convert_metaclip_2_to_hf.py
+++ b/src/transformers/models/metaclip_2/convert_metaclip_2_to_hf.py
@@ -26,7 +26,6 @@
 
 # Import MetaCLIP modules
 from src.mini_clip.factory import create_model_and_transforms
-
 from transformers import (
     AutoTokenizer,
     CLIPImageProcessor,
diff --git a/src/transformers/utils/hub.py b/src/transformers/utils/hub.py
index 306b406137af..7f3b2ed53c8d 100644
--- a/src/transformers/utils/hub.py
+++ b/src/transformers/utils/hub.py
@@ -19,12 +19,10 @@
 import os
 import re
 import sys
-import tempfile
 import warnings
 from concurrent import futures
 from pathlib import Path
 from typing import TypedDict
-from urllib.parse import urlparse
 from uuid import uuid4
 
 import httpx

From c6474d3a2dd97e11980030624f482a1a8c7572b5 Mon Sep 17 00:00:00 2001
From: Wauplin <lucainp@gmail.com>
Date: Tue, 25 Nov 2025 10:31:04 +0100
Subject: [PATCH 3/4] ruff

---
 src/transformers/models/metaclip_2/convert_metaclip_2_to_hf.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/transformers/models/metaclip_2/convert_metaclip_2_to_hf.py b/src/transformers/models/metaclip_2/convert_metaclip_2_to_hf.py
index a5292ccae108..b086a5844b34 100644
--- a/src/transformers/models/metaclip_2/convert_metaclip_2_to_hf.py
+++ b/src/transformers/models/metaclip_2/convert_metaclip_2_to_hf.py
@@ -26,6 +26,7 @@
 
 # Import MetaCLIP modules
 from src.mini_clip.factory import create_model_and_transforms
+
 from transformers import (
     AutoTokenizer,
     CLIPImageProcessor,

From a17914355e4c1960199e3c117ebd7529ef4b4c99 Mon Sep 17 00:00:00 2001
From: Wauplin <lucainp@gmail.com>
Date: Tue, 25 Nov 2025 10:33:43 +0100
Subject: [PATCH 4/4] format

---
 src/transformers/tokenization_utils_base.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/transformers/tokenization_utils_base.py b/src/transformers/tokenization_utils_base.py
index 22c3603df3f1..ce2b597c6327 100644
--- a/src/transformers/tokenization_utils_base.py
+++ b/src/transformers/tokenization_utils_base.py
@@ -2068,9 +2068,7 @@ def from_pretrained(
                     if template_dir.is_dir():
                         for template_file in template_dir.glob("*.jinja"):
                             template_name = template_file.name.removesuffix(".jinja")
-                            vocab_files[f"chat_template_{template_name}"] = (
-                                f"{CHAT_TEMPLATE_DIR}/{template_file.name}"
-                            )
+                            vocab_files[f"chat_template_{template_name}"] = f"{CHAT_TEMPLATE_DIR}/{template_file.name}"
                 else:
                     for template in list_repo_templates(
                         pretrained_model_name_or_path,