From f3ab4d7978fe2cdfdf70a0acb47640087c19d041 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Wed, 21 May 2025 20:05:23 +0000 Subject: [PATCH] Refactor: Improve dataset deprecation message I've upgraded the deprecation message for the `load_dataset` function to be more informative for you. The new message now includes: - A statement that `load_dataset` will be removed in version 1.0.0. - An explanation that `dataset_load` (the replacement) offers more flexibility and new features. - A clear code example showing how to migrate from the old to the new function: # OLD: load_dataset(adapter, handle, path, ...) # NEW: dataset_load(adapter, handle, path, ...) I've also added a unit test to verify that the `DeprecationWarning` is triggered correctly and that the content of the warning message is as expected. The test ensures that the testing environment uses the most up-to-date version of the code by modifying `sys.path` and using `importlib.reload`. --- src/kagglehub/datasets.py | 7 +++++- tests/test_dataset_load.py | 50 ++++++++++++++++++++++++++++++++++++-- 2 files changed, 54 insertions(+), 3 deletions(-) diff --git a/src/kagglehub/datasets.py b/src/kagglehub/datasets.py index 8a298ae..57f135c 100755 --- a/src/kagglehub/datasets.py +++ b/src/kagglehub/datasets.py @@ -171,7 +171,12 @@ def load_dataset( hf_kwargs: Any = None, # noqa: ANN401 ) -> Any: # noqa: ANN401 warnings.warn( - "Use dataset_load() instead of load_dataset(). load_dataset() will be removed in a future version.", + ( + "Use dataset_load() instead of load_dataset(). load_dataset() will be removed in version 1.0.0. " + "`dataset_load` offers more flexibility and new features.\n" + "# OLD: load_dataset(adapter, handle, path, ...)\n" + "# NEW: dataset_load(adapter, handle, path, ...)" + ), DeprecationWarning, stacklevel=2, ) diff --git a/tests/test_dataset_load.py b/tests/test_dataset_load.py index bf4c57d..b23b073 100644 --- a/tests/test_dataset_load.py +++ b/tests/test_dataset_load.py @@ -1,13 +1,26 @@ +import sys +import os +# Assuming tests are in 'tests/' and source is in 'src/' +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../src'))) + +import importlib +import kagglehub.datasets import io import logging -import os from typing import Any from unittest.mock import MagicMock, patch import polars as pl +import pytest from requests import Response -from kagglehub.datasets import KaggleDatasetAdapter, PolarsFrameType, dataset_load, logger +from kagglehub.datasets import ( + PolarsFrameType, + dataset_load, + load_dataset, + logger, +) +from kagglehub.datasets_enums import KaggleDatasetAdapter # Corrected import from kagglehub.exceptions import KaggleApiHTTPError from tests.fixtures import BaseTestCase @@ -440,3 +453,36 @@ def test_polars_dataset_sends_user_agent(self, mock_get: MagicMock) -> None: with self.assertRaises(KaggleApiHTTPError): dataset_load(KaggleDatasetAdapter.POLARS, DATASET_HANDLE, AUTO_COMPRESSED_FILE_NAME) self.assertIn("polars_data_loader", mock_get.call_args.kwargs["headers"]["User-Agent"]) + + +class TestLoadDatasetDeprecation(BaseTestCase): + def test_load_dataset_deprecation_warning(self) -> None: + # Arrange + adapter = KaggleDatasetAdapter.PANDAS + handle = "owner/dataset" + path = "file.csv" + expected_message = ( + "Use dataset_load() instead of load_dataset(). load_dataset() will be removed in version 1.0.0. " + "`dataset_load` offers more flexibility and new features.\n" + "# OLD: load_dataset(adapter, handle, path, ...)\n" + "# NEW: dataset_load(adapter, handle, path, ...)" + ) + + # Reload the module to ensure we have the latest version + importlib.reload(kagglehub.datasets) + + # Patch dataset_load within the reloaded module for the scope of this call + with patch('kagglehub.datasets.dataset_load', new_callable=MagicMock) as mocked_dataset_load_func: + # Act & Assert + with pytest.warns(DeprecationWarning) as record: + # Call load_dataset from the reloaded module + kagglehub.datasets.load_dataset(adapter, handle, path) + + # Assert that one warning was captured + assert len(record) == 1 + # Assert that the warning message matches the expected message + assert str(record[0].message) == expected_message + # Assert that the mocked dataset_load was called + mocked_dataset_load_func.assert_called_once_with( + adapter, handle, path, pandas_kwargs=None, sql_query=None, hf_kwargs=None + )