From 9857e22cd6b849b6ced63bec8416082624901ef9 Mon Sep 17 00:00:00 2001 From: Wauplin Date: Tue, 25 Nov 2025 10:18:12 +0100 Subject: [PATCH 1/4] Remove deprecated load config from file --- src/transformers/configuration_utils.py | 5 - src/transformers/feature_extraction_utils.py | 6 - .../generation/configuration_utils.py | 5 - src/transformers/image_processing_base.py | 6 - src/transformers/modeling_utils.py | 5 - src/transformers/processing_utils.py | 9 - src/transformers/tokenization_utils_base.py | 159 ++++++++---------- src/transformers/utils/__init__.py | 2 - src/transformers/utils/hub.py | 41 +---- src/transformers/video_processing_utils.py | 6 - tests/utils/test_image_utils.py | 76 +++------ tests/utils/test_tokenization_utils.py | 39 +---- 12 files changed, 96 insertions(+), 263 deletions(-) diff --git a/src/transformers/configuration_utils.py b/src/transformers/configuration_utils.py index 5d6fbda009dc..4b69b453359c 100755 --- a/src/transformers/configuration_utils.py +++ b/src/transformers/configuration_utils.py @@ -30,9 +30,7 @@ PushToHubMixin, cached_file, copy_func, - download_url, extract_commit_hash, - is_remote_url, is_torch_available, logging, ) @@ -659,9 +657,6 @@ def _get_config_dict( # Special case when pretrained_model_name_or_path is a local file resolved_config_file = pretrained_model_name_or_path is_local = True - elif is_remote_url(pretrained_model_name_or_path): - configuration_file = pretrained_model_name_or_path if gguf_file is None else gguf_file - resolved_config_file = download_url(pretrained_model_name_or_path) else: configuration_file = kwargs.pop("_configuration_file", CONFIG_NAME) if gguf_file is None else gguf_file diff --git a/src/transformers/feature_extraction_utils.py b/src/transformers/feature_extraction_utils.py index 844d9a215914..7d9dda273636 100644 --- a/src/transformers/feature_extraction_utils.py +++ b/src/transformers/feature_extraction_utils.py @@ -30,10 +30,8 @@ PushToHubMixin, TensorType, copy_func, - download_url, is_numpy_array, is_offline_mode, - is_remote_url, is_torch_available, is_torch_device, is_torch_dtype, @@ -430,10 +428,6 @@ def get_feature_extractor_dict( resolved_feature_extractor_file = pretrained_model_name_or_path resolved_processor_file = None is_local = True - elif is_remote_url(pretrained_model_name_or_path): - feature_extractor_file = pretrained_model_name_or_path - resolved_processor_file = None - resolved_feature_extractor_file = download_url(pretrained_model_name_or_path) else: feature_extractor_file = FEATURE_EXTRACTOR_NAME try: diff --git a/src/transformers/generation/configuration_utils.py b/src/transformers/generation/configuration_utils.py index 2168dc0bbe19..341c02fe81eb 100644 --- a/src/transformers/generation/configuration_utils.py +++ b/src/transformers/generation/configuration_utils.py @@ -29,9 +29,7 @@ ExplicitEnum, PushToHubMixin, cached_file, - download_url, extract_commit_hash, - is_remote_url, is_torch_available, logging, ) @@ -872,9 +870,6 @@ def from_pretrained( # Special case when config_path is a local file resolved_config_file = config_path is_local = True - elif is_remote_url(config_path): - configuration_file = config_path - resolved_config_file = download_url(config_path) else: configuration_file = config_file_name try: diff --git a/src/transformers/image_processing_base.py b/src/transformers/image_processing_base.py index 564af6be8081..2c1207fd8ca2 100644 --- a/src/transformers/image_processing_base.py +++ b/src/transformers/image_processing_base.py @@ -28,9 +28,7 @@ PROCESSOR_NAME, PushToHubMixin, copy_func, - download_url, is_offline_mode, - is_remote_url, logging, safe_load_json_file, ) @@ -283,10 +281,6 @@ def get_image_processor_dict( resolved_image_processor_file = pretrained_model_name_or_path resolved_processor_file = None is_local = True - elif is_remote_url(pretrained_model_name_or_path): - image_processor_file = pretrained_model_name_or_path - resolved_processor_file = None - resolved_image_processor_file = download_url(pretrained_model_name_or_path) else: image_processor_file = image_processor_filename try: diff --git a/src/transformers/modeling_utils.py b/src/transformers/modeling_utils.py index e1df5de3ae35..9829ed7397ec 100644 --- a/src/transformers/modeling_utils.py +++ b/src/transformers/modeling_utils.py @@ -105,14 +105,12 @@ cached_file, check_torch_load_is_safe, copy_func, - download_url, has_file, is_accelerate_available, is_flash_attn_2_available, is_flash_attn_3_available, is_kernels_available, is_offline_mode, - is_remote_url, is_torch_flex_attn_available, is_torch_greater_or_equal, is_torch_mlu_available, @@ -531,9 +529,6 @@ def _get_resolved_checkpoint_files( elif os.path.isfile(os.path.join(subfolder, pretrained_model_name_or_path)): archive_file = pretrained_model_name_or_path is_local = True - elif is_remote_url(pretrained_model_name_or_path): - filename = pretrained_model_name_or_path - resolved_archive_file = download_url(pretrained_model_name_or_path) else: # set correct filename if transformers_explicit_filename is not None: diff --git a/src/transformers/processing_utils.py b/src/transformers/processing_utils.py index 8422294e9773..ed2af2c745f4 100644 --- a/src/transformers/processing_utils.py +++ b/src/transformers/processing_utils.py @@ -53,9 +53,7 @@ cached_file, copy_func, direct_transformers_import, - download_url, is_offline_mode, - is_remote_url, is_torch_available, list_repo_templates, logging, @@ -940,13 +938,6 @@ def get_processor_dict( resolved_raw_chat_template_file = None resolved_audio_tokenizer_file = None is_local = True - elif is_remote_url(pretrained_model_name_or_path): - processor_file = pretrained_model_name_or_path - resolved_processor_file = download_url(pretrained_model_name_or_path) - # can't load chat-template and audio tokenizer when given a file url as pretrained_model_name_or_path - resolved_chat_template_file = None - resolved_raw_chat_template_file = None - resolved_audio_tokenizer_file = None else: if is_local: template_dir = Path(pretrained_model_name_or_path, CHAT_TEMPLATE_DIR) diff --git a/src/transformers/tokenization_utils_base.py b/src/transformers/tokenization_utils_base.py index 7647576914c1..22c3603df3f1 100644 --- a/src/transformers/tokenization_utils_base.py +++ b/src/transformers/tokenization_utils_base.py @@ -48,13 +48,11 @@ add_end_docstrings, cached_file, copy_func, - download_url, extract_commit_hash, is_mlx_available, is_numpy_array, is_offline_mode, is_protobuf_available, - is_remote_url, is_tokenizers_available, is_torch_available, is_torch_device, @@ -2010,94 +2008,79 @@ def from_pretrained( is_local = os.path.isdir(pretrained_model_name_or_path) single_file_id = None - if os.path.isfile(pretrained_model_name_or_path) or is_remote_url(pretrained_model_name_or_path): - if len(cls.vocab_files_names) > 1 and not gguf_file: - raise ValueError( - f"Calling {cls.__name__}.from_pretrained() with the path to a single file or url is not " - "supported for this tokenizer. Use a model identifier or the path to a directory instead." - ) - warnings.warn( - f"Calling {cls.__name__}.from_pretrained() with the path to a single file or url is deprecated and " - "won't be possible anymore in v5. Use a model identifier or the path to a directory instead.", - FutureWarning, - ) - file_id = list(cls.vocab_files_names.keys())[0] - vocab_files[file_id] = pretrained_model_name_or_path - single_file_id = file_id + if gguf_file: + vocab_files["vocab_file"] = gguf_file else: - if gguf_file: - vocab_files["vocab_file"] = gguf_file - else: - # At this point pretrained_model_name_or_path is either a directory or a model identifier name - additional_files_names = { - "added_tokens_file": ADDED_TOKENS_FILE, # kept only for legacy - "special_tokens_map_file": SPECIAL_TOKENS_MAP_FILE, # kept only for legacy - "tokenizer_config_file": TOKENIZER_CONFIG_FILE, - # tokenizer_file used to initialize a slow from a fast. Properly copy the `addedTokens` instead of adding in random orders - "tokenizer_file": FULL_TOKENIZER_FILE, - "chat_template_file": CHAT_TEMPLATE_FILE, - } - - vocab_files = {**cls.vocab_files_names, **additional_files_names} - if "tokenizer_file" in vocab_files: - # Try to get the tokenizer config to see if there are versioned tokenizer files. - fast_tokenizer_file = FULL_TOKENIZER_FILE - - try: - resolved_config_file = cached_file( - pretrained_model_name_or_path, - TOKENIZER_CONFIG_FILE, - cache_dir=cache_dir, - force_download=force_download, - proxies=proxies, - token=token, - revision=revision, - local_files_only=local_files_only, - subfolder=subfolder, - user_agent=user_agent, - _raise_exceptions_for_missing_entries=False, - _commit_hash=commit_hash, - ) - except OSError: - # Re-raise any error raised by cached_file in order to get a helpful error message - raise - except Exception: - # For any other exception, we throw a generic error. - raise OSError( - f"Can't load tokenizer for '{pretrained_model_name_or_path}'. If you were trying to load it from " - "'https://huggingface.co/models', make sure you don't have a local directory with the same name. " - f"Otherwise, make sure '{pretrained_model_name_or_path}' is the correct path to a directory " - f"containing all relevant files for a {cls.__name__} tokenizer." - ) + # At this point pretrained_model_name_or_path is either a directory or a model identifier name + additional_files_names = { + "added_tokens_file": ADDED_TOKENS_FILE, # kept only for legacy + "special_tokens_map_file": SPECIAL_TOKENS_MAP_FILE, # kept only for legacy + "tokenizer_config_file": TOKENIZER_CONFIG_FILE, + # tokenizer_file used to initialize a slow from a fast. Properly copy the `addedTokens` instead of adding in random orders + "tokenizer_file": FULL_TOKENIZER_FILE, + "chat_template_file": CHAT_TEMPLATE_FILE, + } - commit_hash = extract_commit_hash(resolved_config_file, commit_hash) - if resolved_config_file is not None: - with open(resolved_config_file, encoding="utf-8") as reader: - tokenizer_config = json.load(reader) - if "fast_tokenizer_files" in tokenizer_config: - fast_tokenizer_file = get_fast_tokenizer_file(tokenizer_config["fast_tokenizer_files"]) - vocab_files["tokenizer_file"] = fast_tokenizer_file - - # This block looks for any extra chat template files - if is_local: - template_dir = Path(pretrained_model_name_or_path, CHAT_TEMPLATE_DIR) - if template_dir.is_dir(): - for template_file in template_dir.glob("*.jinja"): - template_name = template_file.name.removesuffix(".jinja") - vocab_files[f"chat_template_{template_name}"] = ( - f"{CHAT_TEMPLATE_DIR}/{template_file.name}" - ) - else: - for template in list_repo_templates( - pretrained_model_name_or_path, - local_files_only=local_files_only, - revision=revision, - cache_dir=cache_dir, - token=token, - ): - template = template.removesuffix(".jinja") - vocab_files[f"chat_template_{template}"] = f"{CHAT_TEMPLATE_DIR}/{template}.jinja" + vocab_files = {**cls.vocab_files_names, **additional_files_names} + if "tokenizer_file" in vocab_files: + # Try to get the tokenizer config to see if there are versioned tokenizer files. + fast_tokenizer_file = FULL_TOKENIZER_FILE + + try: + resolved_config_file = cached_file( + pretrained_model_name_or_path, + TOKENIZER_CONFIG_FILE, + cache_dir=cache_dir, + force_download=force_download, + proxies=proxies, + token=token, + revision=revision, + local_files_only=local_files_only, + subfolder=subfolder, + user_agent=user_agent, + _raise_exceptions_for_missing_entries=False, + _commit_hash=commit_hash, + ) + except OSError: + # Re-raise any error raised by cached_file in order to get a helpful error message + raise + except Exception: + # For any other exception, we throw a generic error. + raise OSError( + f"Can't load tokenizer for '{pretrained_model_name_or_path}'. If you were trying to load it from " + "'https://huggingface.co/models', make sure you don't have a local directory with the same name. " + f"Otherwise, make sure '{pretrained_model_name_or_path}' is the correct path to a directory " + f"containing all relevant files for a {cls.__name__} tokenizer." + ) + + commit_hash = extract_commit_hash(resolved_config_file, commit_hash) + if resolved_config_file is not None: + with open(resolved_config_file, encoding="utf-8") as reader: + tokenizer_config = json.load(reader) + if "fast_tokenizer_files" in tokenizer_config: + fast_tokenizer_file = get_fast_tokenizer_file(tokenizer_config["fast_tokenizer_files"]) + vocab_files["tokenizer_file"] = fast_tokenizer_file + + # This block looks for any extra chat template files + if is_local: + template_dir = Path(pretrained_model_name_or_path, CHAT_TEMPLATE_DIR) + if template_dir.is_dir(): + for template_file in template_dir.glob("*.jinja"): + template_name = template_file.name.removesuffix(".jinja") + vocab_files[f"chat_template_{template_name}"] = ( + f"{CHAT_TEMPLATE_DIR}/{template_file.name}" + ) + else: + for template in list_repo_templates( + pretrained_model_name_or_path, + local_files_only=local_files_only, + revision=revision, + cache_dir=cache_dir, + token=token, + ): + template = template.removesuffix(".jinja") + vocab_files[f"chat_template_{template}"] = f"{CHAT_TEMPLATE_DIR}/{template}.jinja" if not is_local and not local_files_only: try: @@ -2121,8 +2104,6 @@ def from_pretrained( elif single_file_id == file_id: if os.path.isfile(file_path): resolved_vocab_files[file_id] = file_path - elif is_remote_url(file_path): - resolved_vocab_files[file_id] = download_url(file_path, proxies=proxies) else: try: resolved_vocab_files[file_id] = cached_file( diff --git a/src/transformers/utils/__init__.py b/src/transformers/utils/__init__.py index 38b5db8f4893..f916d7673e11 100644 --- a/src/transformers/utils/__init__.py +++ b/src/transformers/utils/__init__.py @@ -95,12 +95,10 @@ cached_file, default_cache_path, define_sagemaker_information, - download_url, extract_commit_hash, has_file, http_user_agent, is_offline_mode, - is_remote_url, list_repo_templates, try_to_load_from_cache, ) diff --git a/src/transformers/utils/hub.py b/src/transformers/utils/hub.py index 7103ba6b5035..306b406137af 100644 --- a/src/transformers/utils/hub.py +++ b/src/transformers/utils/hub.py @@ -44,7 +44,7 @@ snapshot_download, try_to_load_from_cache, ) -from huggingface_hub.file_download import REGEX_COMMIT_HASH, http_get +from huggingface_hub.file_download import REGEX_COMMIT_HASH from huggingface_hub.utils import ( EntryNotFoundError, GatedRepoError, @@ -60,12 +60,7 @@ from . import __version__, logging from .generic import working_or_temp_dir -from .import_utils import ( - ENV_VARS_TRUE_VALUES, - get_torch_version, - is_torch_available, - is_training_run_on_sagemaker, -) +from .import_utils import ENV_VARS_TRUE_VALUES, get_torch_version, is_torch_available, is_training_run_on_sagemaker LEGACY_PROCESSOR_CHAT_TEMPLATE_FILE = "chat_template.json" @@ -202,11 +197,6 @@ def list_repo_templates( return [entry.stem for entry in templates_dir.iterdir() if entry.is_file() and entry.name.endswith(".jinja")] -def is_remote_url(url_or_filename): - parsed = urlparse(url_or_filename) - return parsed.scheme in ("http", "https") - - def define_sagemaker_information(): try: instance_data = httpx.get(os.environ["ECS_CONTAINER_METADATA_URI"]).json() @@ -583,33 +573,6 @@ def cached_files( return resolved_files -def download_url(url, proxies=None): - """ - Downloads a given url in a temporary file. This function is not safe to use in multiple processes. Its only use is - for deprecated behavior allowing to download config/models with a single url instead of using the Hub. - - Args: - url (`str`): The url of the file to download. - proxies (`dict[str, str]`, *optional*): - A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128', - 'http://hostname': 'foo.bar:4012'}.` The proxies are used on each request. - - Returns: - `str`: The location of the temporary file where the url was downloaded. - """ - warnings.warn( - f"Using `from_pretrained` with the url of a file (here {url}) is deprecated and won't be possible anymore in" - " v5 of Transformers. You should host your file on the Hub (hf.co) instead and use the repository ID. Note" - " that this is not compatible with the caching system (your file will be downloaded at each execution) or" - " multiple processes (each process will download the file in a different temporary file).", - FutureWarning, - ) - tmp_fd, tmp_file = tempfile.mkstemp() - with os.fdopen(tmp_fd, "wb") as f: - http_get(url, f, proxies=proxies) - return tmp_file - - def has_file( path_or_repo: str | os.PathLike, filename: str, diff --git a/src/transformers/video_processing_utils.py b/src/transformers/video_processing_utils.py index eeb04eeb3adb..77836a0bf1ad 100644 --- a/src/transformers/video_processing_utils.py +++ b/src/transformers/video_processing_utils.py @@ -43,9 +43,7 @@ TensorType, add_start_docstrings, copy_func, - download_url, is_offline_mode, - is_remote_url, is_torch_available, is_torchcodec_available, is_torchvision_v2_available, @@ -638,10 +636,6 @@ def get_video_processor_dict( resolved_video_processor_file = pretrained_model_name_or_path resolved_processor_file = None is_local = True - elif is_remote_url(pretrained_model_name_or_path): - video_processor_file = pretrained_model_name_or_path - resolved_processor_file = None - resolved_video_processor_file = download_url(pretrained_model_name_or_path) else: video_processor_file = VIDEO_PROCESSOR_NAME try: diff --git a/tests/utils/test_image_utils.py b/tests/utils/test_image_utils.py index 124bc73d7357..4a6a03f813e6 100644 --- a/tests/utils/test_image_utils.py +++ b/tests/utils/test_image_utils.py @@ -11,17 +11,13 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - import codecs -import os -import tempfile import unittest -from io import BytesIO import httpx import numpy as np import pytest -from huggingface_hub.file_download import hf_hub_url, http_get +from huggingface_hub.file_download import hf_hub_download from tests.pipelines.test_pipelines_document_question_answering import INVOICE_URL from transformers import is_torch_available, is_vision_available @@ -46,8 +42,8 @@ def get_image_from_hub_dataset(dataset_id: str, filename: str, revision: str | None = None) -> "PIL.Image.Image": - url = hf_hub_url(dataset_id, filename, repo_type="dataset", revision=revision) - return PIL.Image.open(BytesIO(httpx.get(url, follow_redirects=True).content)) + path = hf_hub_download(dataset_id, filename, repo_type="dataset", revision=revision) + return PIL.Image.open(path) def get_random_image(height, width): @@ -738,54 +734,30 @@ def test_load_img_local(self): ) def test_load_img_base64_prefix(self): - try: - tmp_file = tempfile.NamedTemporaryFile(delete=False).name - with open(tmp_file, "wb") as f: - http_get( - "https://huggingface.co/datasets/hf-internal-testing/dummy-base64-images/raw/main/image_0.txt", f - ) - - with open(tmp_file, encoding="utf-8") as b64: - img = load_image(b64.read()) - img_arr = np.array(img) - - finally: - os.remove(tmp_file) - + path = hf_hub_download( + repo_id="hf-internal-testing/dummy-base64-images", filename="image_0.txt", repo_type="dataset" + ) + with open(path, encoding="utf-8") as b64: + img = load_image(b64.read()) + img_arr = np.array(img) self.assertEqual(img_arr.shape, (64, 32, 3)) def test_load_img_base64(self): - try: - tmp_file = tempfile.NamedTemporaryFile(delete=False).name - with open(tmp_file, "wb") as f: - http_get( - "https://huggingface.co/datasets/hf-internal-testing/dummy-base64-images/raw/main/image_1.txt", f - ) - - with open(tmp_file, encoding="utf-8") as b64: - img = load_image(b64.read()) - img_arr = np.array(img) - - finally: - os.remove(tmp_file) - + path = hf_hub_download( + repo_id="hf-internal-testing/dummy-base64-images", filename="image_1.txt", repo_type="dataset" + ) + with open(path, encoding="utf-8") as b64: + img = load_image(b64.read()) + img_arr = np.array(img) self.assertEqual(img_arr.shape, (64, 32, 3)) def test_load_img_base64_encoded_bytes(self): - try: - tmp_file = tempfile.NamedTemporaryFile(delete=False).name - with open(tmp_file, "wb") as f: - http_get( - "https://huggingface.co/datasets/hf-internal-testing/dummy-base64-images/raw/main/image_2.txt", f - ) - - with codecs.open(tmp_file, encoding="unicode_escape") as b64: - img = load_image(b64.read()) - img_arr = np.array(img) - - finally: - os.remove(tmp_file) - + path = hf_hub_download( + repo_id="hf-internal-testing/dummy-base64-images", filename="image_2.txt", repo_type="dataset" + ) + with codecs.open(path, encoding="unicode_escape") as b64: + img = load_image(b64.read()) + img_arr = np.array(img) self.assertEqual(img_arr.shape, (256, 256, 3)) def test_load_img_rgba(self): @@ -797,11 +769,7 @@ def test_load_img_rgba(self): img = load_image(img) # img with mode RGBA img_arr = np.array(img) - - self.assertEqual( - img_arr.shape, - (512, 512, 3), - ) + self.assertEqual(img_arr.shape, (512, 512, 3)) def test_load_img_la(self): # we use revision="refs/pr/1" until the PR is merged diff --git a/tests/utils/test_tokenization_utils.py b/tests/utils/test_tokenization_utils.py index 1cb872a3c216..d34b224068e1 100644 --- a/tests/utils/test_tokenization_utils.py +++ b/tests/utils/test_tokenization_utils.py @@ -20,16 +20,8 @@ from pathlib import Path import httpx -from huggingface_hub.file_download import http_get - -from transformers import ( - AlbertTokenizer, - AutoTokenizer, - BertTokenizer, - BertTokenizerFast, - GPT2TokenizerFast, - is_tokenizers_available, -) + +from transformers import AutoTokenizer, BertTokenizer, BertTokenizerFast, GPT2TokenizerFast, is_tokenizers_available from transformers.testing_utils import TOKEN, TemporaryHubRepo, is_staging_test, require_tokenizers from transformers.tokenization_utils import ExtensionsTrie, Trie @@ -83,33 +75,6 @@ def test_cached_files_are_used_when_internet_is_down_missing_files(self): # This check we did call the fake head request mock_head.assert_called() - def test_legacy_load_from_one_file(self): - # This test is for deprecated behavior and can be removed in v5 - try: - tmp_file = tempfile.NamedTemporaryFile(delete=False).name - with open(tmp_file, "wb") as f: - http_get("https://huggingface.co/albert/albert-base-v1/resolve/main/spiece.model", f) - - _ = AlbertTokenizer.from_pretrained(tmp_file) - finally: - os.remove(tmp_file) - - # Supporting this legacy load introduced a weird bug where the tokenizer would load local files if they are in - # the current folder and have the right name. - if os.path.isfile("tokenizer.json"): - # We skip the test if the user has a `tokenizer.json` in this folder to avoid deleting it. - self.skipTest(reason="Skipping test as there is a `tokenizer.json` file in the current folder.") - try: - with open("tokenizer.json", "wb") as f: - http_get("https://huggingface.co/hf-internal-testing/tiny-random-bert/blob/main/tokenizer.json", f) - tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-gpt2") - # The tiny random BERT has a vocab size of 1024, tiny openai-community/gpt2 as a vocab size of 1000 - self.assertEqual(tokenizer.vocab_size, 1000) - # Tokenizer should depend on the remote checkpoint, not the local tokenizer.json file. - - finally: - os.remove("tokenizer.json") - @is_staging_test class TokenizerPushToHubTester(unittest.TestCase): From a45742e176e2a36506debe9e08164a2793a8d53f Mon Sep 17 00:00:00 2001 From: Wauplin Date: Tue, 25 Nov 2025 10:28:28 +0100 Subject: [PATCH 2/4] code quality --- src/transformers/models/metaclip_2/convert_metaclip_2_to_hf.py | 1 - src/transformers/utils/hub.py | 2 -- 2 files changed, 3 deletions(-) diff --git a/src/transformers/models/metaclip_2/convert_metaclip_2_to_hf.py b/src/transformers/models/metaclip_2/convert_metaclip_2_to_hf.py index b086a5844b34..a5292ccae108 100644 --- a/src/transformers/models/metaclip_2/convert_metaclip_2_to_hf.py +++ b/src/transformers/models/metaclip_2/convert_metaclip_2_to_hf.py @@ -26,7 +26,6 @@ # Import MetaCLIP modules from src.mini_clip.factory import create_model_and_transforms - from transformers import ( AutoTokenizer, CLIPImageProcessor, diff --git a/src/transformers/utils/hub.py b/src/transformers/utils/hub.py index 306b406137af..7f3b2ed53c8d 100644 --- a/src/transformers/utils/hub.py +++ b/src/transformers/utils/hub.py @@ -19,12 +19,10 @@ import os import re import sys -import tempfile import warnings from concurrent import futures from pathlib import Path from typing import TypedDict -from urllib.parse import urlparse from uuid import uuid4 import httpx From c6474d3a2dd97e11980030624f482a1a8c7572b5 Mon Sep 17 00:00:00 2001 From: Wauplin Date: Tue, 25 Nov 2025 10:31:04 +0100 Subject: [PATCH 3/4] ruff --- src/transformers/models/metaclip_2/convert_metaclip_2_to_hf.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/transformers/models/metaclip_2/convert_metaclip_2_to_hf.py b/src/transformers/models/metaclip_2/convert_metaclip_2_to_hf.py index a5292ccae108..b086a5844b34 100644 --- a/src/transformers/models/metaclip_2/convert_metaclip_2_to_hf.py +++ b/src/transformers/models/metaclip_2/convert_metaclip_2_to_hf.py @@ -26,6 +26,7 @@ # Import MetaCLIP modules from src.mini_clip.factory import create_model_and_transforms + from transformers import ( AutoTokenizer, CLIPImageProcessor, From a17914355e4c1960199e3c117ebd7529ef4b4c99 Mon Sep 17 00:00:00 2001 From: Wauplin Date: Tue, 25 Nov 2025 10:33:43 +0100 Subject: [PATCH 4/4] format --- src/transformers/tokenization_utils_base.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/transformers/tokenization_utils_base.py b/src/transformers/tokenization_utils_base.py index 22c3603df3f1..ce2b597c6327 100644 --- a/src/transformers/tokenization_utils_base.py +++ b/src/transformers/tokenization_utils_base.py @@ -2068,9 +2068,7 @@ def from_pretrained( if template_dir.is_dir(): for template_file in template_dir.glob("*.jinja"): template_name = template_file.name.removesuffix(".jinja") - vocab_files[f"chat_template_{template_name}"] = ( - f"{CHAT_TEMPLATE_DIR}/{template_file.name}" - ) + vocab_files[f"chat_template_{template_name}"] = f"{CHAT_TEMPLATE_DIR}/{template_file.name}" else: for template in list_repo_templates( pretrained_model_name_or_path,