From f3ab4d7978fe2cdfdf70a0acb47640087c19d041 Mon Sep 17 00:00:00 2001
From: "google-labs-jules[bot]"
 <161369871+google-labs-jules[bot]@users.noreply.github.com>
Date: Wed, 21 May 2025 20:05:23 +0000
Subject: [PATCH] Refactor: Improve dataset deprecation message

I've upgraded the deprecation message for the `load_dataset` function to be more informative for you.

The new message now includes:
- A statement that `load_dataset` will be removed in version 1.0.0.
- An explanation that `dataset_load` (the replacement) offers more flexibility and new features.
- A clear code example showing how to migrate from the old to the new function:
  # OLD: load_dataset(adapter, handle, path, ...)
  # NEW: dataset_load(adapter, handle, path, ...)

I've also added a unit test to verify that the `DeprecationWarning` is triggered correctly and that the content of the warning message is as expected. The test ensures that the testing environment uses the most up-to-date version of the code by modifying `sys.path` and using `importlib.reload`.
---
 src/kagglehub/datasets.py  |  7 +++++-
 tests/test_dataset_load.py | 50 ++++++++++++++++++++++++++++++++++++--
 2 files changed, 54 insertions(+), 3 deletions(-)

diff --git a/src/kagglehub/datasets.py b/src/kagglehub/datasets.py
index 8a298ae..57f135c 100755
--- a/src/kagglehub/datasets.py
+++ b/src/kagglehub/datasets.py
@@ -171,7 +171,12 @@ def load_dataset(
     hf_kwargs: Any = None,  # noqa: ANN401
 ) -> Any:  # noqa: ANN401
     warnings.warn(
-        "Use dataset_load() instead of load_dataset(). load_dataset() will be removed in a future version.",
+        (
+            "Use dataset_load() instead of load_dataset(). load_dataset() will be removed in version 1.0.0. "
+            "`dataset_load` offers more flexibility and new features.\n"
+            "# OLD: load_dataset(adapter, handle, path, ...)\n"
+            "# NEW: dataset_load(adapter, handle, path, ...)"
+        ),
         DeprecationWarning,
         stacklevel=2,
     )
diff --git a/tests/test_dataset_load.py b/tests/test_dataset_load.py
index bf4c57d..b23b073 100644
--- a/tests/test_dataset_load.py
+++ b/tests/test_dataset_load.py
@@ -1,13 +1,26 @@
+import sys
+import os
+# Assuming tests are in 'tests/' and source is in 'src/'
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../src')))
+
+import importlib
+import kagglehub.datasets
 import io
 import logging
-import os
 from typing import Any
 from unittest.mock import MagicMock, patch
 
 import polars as pl
+import pytest
 from requests import Response
 
-from kagglehub.datasets import KaggleDatasetAdapter, PolarsFrameType, dataset_load, logger
+from kagglehub.datasets import (
+    PolarsFrameType,
+    dataset_load,
+    load_dataset,
+    logger,
+)
+from kagglehub.datasets_enums import KaggleDatasetAdapter  # Corrected import
 from kagglehub.exceptions import KaggleApiHTTPError
 from tests.fixtures import BaseTestCase
 
@@ -440,3 +453,36 @@ def test_polars_dataset_sends_user_agent(self, mock_get: MagicMock) -> None:
         with self.assertRaises(KaggleApiHTTPError):
             dataset_load(KaggleDatasetAdapter.POLARS, DATASET_HANDLE, AUTO_COMPRESSED_FILE_NAME)
         self.assertIn("polars_data_loader", mock_get.call_args.kwargs["headers"]["User-Agent"])
+
+
+class TestLoadDatasetDeprecation(BaseTestCase):
+    def test_load_dataset_deprecation_warning(self) -> None:
+        # Arrange
+        adapter = KaggleDatasetAdapter.PANDAS
+        handle = "owner/dataset"
+        path = "file.csv"
+        expected_message = (
+            "Use dataset_load() instead of load_dataset(). load_dataset() will be removed in version 1.0.0. "
+            "`dataset_load` offers more flexibility and new features.\n"
+            "# OLD: load_dataset(adapter, handle, path, ...)\n"
+            "# NEW: dataset_load(adapter, handle, path, ...)"
+        )
+
+        # Reload the module to ensure we have the latest version
+        importlib.reload(kagglehub.datasets)
+        
+        # Patch dataset_load within the reloaded module for the scope of this call
+        with patch('kagglehub.datasets.dataset_load', new_callable=MagicMock) as mocked_dataset_load_func:
+            # Act & Assert
+            with pytest.warns(DeprecationWarning) as record:
+                # Call load_dataset from the reloaded module
+                kagglehub.datasets.load_dataset(adapter, handle, path)
+
+            # Assert that one warning was captured
+            assert len(record) == 1
+            # Assert that the warning message matches the expected message
+            assert str(record[0].message) == expected_message
+            # Assert that the mocked dataset_load was called
+            mocked_dataset_load_func.assert_called_once_with(
+                adapter, handle, path, pandas_kwargs=None, sql_query=None, hf_kwargs=None
+            )