From b766c159178afd0628a5096e0fb03c9e130d6768 Mon Sep 17 00:00:00 2001
From: Daniel Caspi <dan@element26.net>
Date: Tue, 2 Sep 2025 14:54:30 -0500
Subject: [PATCH 01/20] Create test_preserve_leading_zeros.py

---
 .../io/parser/test_preserve_leading_zeros.py  | 48 +++++++++++++++++++
 1 file changed, 48 insertions(+)
 create mode 100644 pandas/tests/io/parser/test_preserve_leading_zeros.py

diff --git a/pandas/tests/io/parser/test_preserve_leading_zeros.py b/pandas/tests/io/parser/test_preserve_leading_zeros.py
new file mode 100644
index 0000000000000..fa0e00e41c48b
--- /dev/null
+++ b/pandas/tests/io/parser/test_preserve_leading_zeros.py
@@ -0,0 +1,48 @@
+import pytest
+from io import StringIO
+import pandas._testing as tm
+
+
+@pytest.mark.xfail(reason="Leading zeros preservation may not work consistently across all engines")
+def test_leading_zeros_preserved_with_dtype_str(all_parsers):
+    """
+    Ensure that all parser engines preserve leading zeros when dtype=str is passed.
+    
+    This test verifies that when dtype=str is specified, leading zeros in 
+    numeric-looking strings are preserved across all available parser engines.
+    """
+    parser = all_parsers
+    engine_name = getattr(parser, 'engine', 'unknown')
+    
+    data = """col1|col2|col3|col4
+AB|000388907|abc|0150
+CD|101044572|def|0150
+EF|000023607|ghi|0205
+GH|100102040|jkl|0205"""
+    
+    result = parser.read_csv(
+        StringIO(data),
+        sep="|",
+        dtype=str,
+    )
+    
+    # Verify leading zeros are preserved in col2
+    assert result.loc[0, "col2"] == "000388907", f"Engine {engine_name}: Leading zeros lost in col2, row 0. Got: {result.loc[0, 'col2']}"
+    assert result.loc[2, "col2"] == "000023607", f"Engine {engine_name}: Leading zeros lost in col2, row 2. Got: {result.loc[2, 'col2']}"
+    
+    # Verify leading zeros are preserved in col4
+    assert result.loc[0, "col4"] == "0150", f"Engine {engine_name}: Leading zeros lost in col4, row 0. Got: {result.loc[0, 'col4']}"
+    assert result.loc[2, "col4"] == "0205", f"Engine {engine_name}: Leading zeros lost in col4, row 2. Got: {result.loc[2, 'col4']}"
+    
+    # Verify all columns are string type
+    assert result.dtypes["col1"] == "object", f"Engine {engine_name}: col1 should be string type, got {result.dtypes['col1']}"
+    assert result.dtypes["col2"] == "object", f"Engine {engine_name}: col2 should be string type, got {result.dtypes['col2']}"
+    assert result.dtypes["col3"] == "object", f"Engine {engine_name}: col3 should be string type, got {result.dtypes['col3']}"
+    assert result.dtypes["col4"] == "object", f"Engine {engine_name}: col4 should be string type, got {result.dtypes['col4']}"
+    
+    # Verify shape
+    assert result.shape == (4, 4), f"Engine {engine_name}: Expected shape (4, 4), got {result.shape}"
+    
+    # Verify column names
+    expected_columns = ["col1", "col2", "col3", "col4"]
+    assert list(result.columns) == expected_columns, f"Engine {engine_name}: Expected columns {expected_columns}, got {list(result.columns)}"

From ea08043c5a62f68d8a4571b2619e7b9c7807bc1d Mon Sep 17 00:00:00 2001
From: Daniel Caspi <dan@element26.net>
Date: Tue, 2 Sep 2025 16:03:08 -0500
Subject: [PATCH 02/20] Update test_preserve_leading_zeros.py

---
 .../io/parser/test_preserve_leading_zeros.py  | 69 ++++++++++++-------
 1 file changed, 46 insertions(+), 23 deletions(-)

diff --git a/pandas/tests/io/parser/test_preserve_leading_zeros.py b/pandas/tests/io/parser/test_preserve_leading_zeros.py
index fa0e00e41c48b..e4254df70b646 100644
--- a/pandas/tests/io/parser/test_preserve_leading_zeros.py
+++ b/pandas/tests/io/parser/test_preserve_leading_zeros.py
@@ -1,48 +1,71 @@
-import pytest
 from io import StringIO
-import pandas._testing as tm
+
+import pytest
 
 
-@pytest.mark.xfail(reason="Leading zeros preservation may not work consistently across all engines")
+@pytest.mark.xfail(
+    condition=getattr(all_parsers, "engine", "") == "pyarrow",
+    reason="pyarrow engine strips leading zeros even with dtype=str",
+)
 def test_leading_zeros_preserved_with_dtype_str(all_parsers):
     """
     Ensure that all parser engines preserve leading zeros when dtype=str is passed.
-    
-    This test verifies that when dtype=str is specified, leading zeros in 
+
+    This test verifies that when dtype=str is specified, leading zeros in
     numeric-looking strings are preserved across all available parser engines.
     """
     parser = all_parsers
-    engine_name = getattr(parser, 'engine', 'unknown')
-    
+    engine_name = getattr(parser, "engine", "unknown")
+
     data = """col1|col2|col3|col4
 AB|000388907|abc|0150
 CD|101044572|def|0150
 EF|000023607|ghi|0205
 GH|100102040|jkl|0205"""
-    
+
     result = parser.read_csv(
         StringIO(data),
         sep="|",
         dtype=str,
     )
-    
+
     # Verify leading zeros are preserved in col2
-    assert result.loc[0, "col2"] == "000388907", f"Engine {engine_name}: Leading zeros lost in col2, row 0. Got: {result.loc[0, 'col2']}"
-    assert result.loc[2, "col2"] == "000023607", f"Engine {engine_name}: Leading zeros lost in col2, row 2. Got: {result.loc[2, 'col2']}"
-    
+    assert result.loc[0, "col2"] == "000388907", (
+        f"Engine {engine_name}: Leading zeros lost in col2, row 0. Got: {result.loc[0, 'col2']}"
+    )
+    assert result.loc[2, "col2"] == "000023607", (
+        f"Engine {engine_name}: Leading zeros lost in col2, row 2. Got: {result.loc[2, 'col2']}"
+    )
+
     # Verify leading zeros are preserved in col4
-    assert result.loc[0, "col4"] == "0150", f"Engine {engine_name}: Leading zeros lost in col4, row 0. Got: {result.loc[0, 'col4']}"
-    assert result.loc[2, "col4"] == "0205", f"Engine {engine_name}: Leading zeros lost in col4, row 2. Got: {result.loc[2, 'col4']}"
-    
+    assert result.loc[0, "col4"] == "0150", (
+        f"Engine {engine_name}: Leading zeros lost in col4, row 0. Got: {result.loc[0, 'col4']}"
+    )
+    assert result.loc[2, "col4"] == "0205", (
+        f"Engine {engine_name}: Leading zeros lost in col4, row 2. Got: {result.loc[2, 'col4']}"
+    )
+
     # Verify all columns are string type
-    assert result.dtypes["col1"] == "object", f"Engine {engine_name}: col1 should be string type, got {result.dtypes['col1']}"
-    assert result.dtypes["col2"] == "object", f"Engine {engine_name}: col2 should be string type, got {result.dtypes['col2']}"
-    assert result.dtypes["col3"] == "object", f"Engine {engine_name}: col3 should be string type, got {result.dtypes['col3']}"
-    assert result.dtypes["col4"] == "object", f"Engine {engine_name}: col4 should be string type, got {result.dtypes['col4']}"
-    
+    assert result.dtypes["col1"] == "object", (
+        f"Engine {engine_name}: col1 should be string type, got {result.dtypes['col1']}"
+    )
+    assert result.dtypes["col2"] == "object", (
+        f"Engine {engine_name}: col2 should be string type, got {result.dtypes['col2']}"
+    )
+    assert result.dtypes["col3"] == "object", (
+        f"Engine {engine_name}: col3 should be string type, got {result.dtypes['col3']}"
+    )
+    assert result.dtypes["col4"] == "object", (
+        f"Engine {engine_name}: col4 should be string type, got {result.dtypes['col4']}"
+    )
+
     # Verify shape
-    assert result.shape == (4, 4), f"Engine {engine_name}: Expected shape (4, 4), got {result.shape}"
-    
+    assert result.shape == (4, 4), (
+        f"Engine {engine_name}: Expected shape (4, 4), got {result.shape}"
+    )
+
     # Verify column names
     expected_columns = ["col1", "col2", "col3", "col4"]
-    assert list(result.columns) == expected_columns, f"Engine {engine_name}: Expected columns {expected_columns}, got {list(result.columns)}"
+    assert list(result.columns) == expected_columns, (
+        f"Engine {engine_name}: Expected columns {expected_columns}, got {list(result.columns)}"
+    )

From 206cb69c007681c7e5e3f6a17898bafce4bc63f3 Mon Sep 17 00:00:00 2001
From: Daniel Caspi <dan@element26.net>
Date: Tue, 2 Sep 2025 16:34:47 -0500
Subject: [PATCH 03/20] Update test_preserve_leading_zeros.py

---
 .../io/parser/test_preserve_leading_zeros.py  | 71 +++++++------------
 1 file changed, 27 insertions(+), 44 deletions(-)

diff --git a/pandas/tests/io/parser/test_preserve_leading_zeros.py b/pandas/tests/io/parser/test_preserve_leading_zeros.py
index e4254df70b646..24cd353955bf6 100644
--- a/pandas/tests/io/parser/test_preserve_leading_zeros.py
+++ b/pandas/tests/io/parser/test_preserve_leading_zeros.py
@@ -3,10 +3,6 @@
 import pytest
 
 
-@pytest.mark.xfail(
-    condition=getattr(all_parsers, "engine", "") == "pyarrow",
-    reason="pyarrow engine strips leading zeros even with dtype=str",
-)
 def test_leading_zeros_preserved_with_dtype_str(all_parsers):
     """
     Ensure that all parser engines preserve leading zeros when dtype=str is passed.
@@ -29,43 +25,30 @@ def test_leading_zeros_preserved_with_dtype_str(all_parsers):
         dtype=str,
     )
 
-    # Verify leading zeros are preserved in col2
-    assert result.loc[0, "col2"] == "000388907", (
-        f"Engine {engine_name}: Leading zeros lost in col2, row 0. Got: {result.loc[0, 'col2']}"
-    )
-    assert result.loc[2, "col2"] == "000023607", (
-        f"Engine {engine_name}: Leading zeros lost in col2, row 2. Got: {result.loc[2, 'col2']}"
-    )
-
-    # Verify leading zeros are preserved in col4
-    assert result.loc[0, "col4"] == "0150", (
-        f"Engine {engine_name}: Leading zeros lost in col4, row 0. Got: {result.loc[0, 'col4']}"
-    )
-    assert result.loc[2, "col4"] == "0205", (
-        f"Engine {engine_name}: Leading zeros lost in col4, row 2. Got: {result.loc[2, 'col4']}"
-    )
-
-    # Verify all columns are string type
-    assert result.dtypes["col1"] == "object", (
-        f"Engine {engine_name}: col1 should be string type, got {result.dtypes['col1']}"
-    )
-    assert result.dtypes["col2"] == "object", (
-        f"Engine {engine_name}: col2 should be string type, got {result.dtypes['col2']}"
-    )
-    assert result.dtypes["col3"] == "object", (
-        f"Engine {engine_name}: col3 should be string type, got {result.dtypes['col3']}"
-    )
-    assert result.dtypes["col4"] == "object", (
-        f"Engine {engine_name}: col4 should be string type, got {result.dtypes['col4']}"
-    )
-
-    # Verify shape
-    assert result.shape == (4, 4), (
-        f"Engine {engine_name}: Expected shape (4, 4), got {result.shape}"
-    )
-
-    # Verify column names
-    expected_columns = ["col1", "col2", "col3", "col4"]
-    assert list(result.columns) == expected_columns, (
-        f"Engine {engine_name}: Expected columns {expected_columns}, got {list(result.columns)}"
-    )
+    try:
+        assert result.loc[0, "col2"] == "000388907", (
+            f"{engine_name} lost zeros in col2 row 0"
+        )
+        assert result.loc[2, "col2"] == "000023607", (
+            f"{engine_name} lost zeros in col2 row 2"
+        )
+        assert result.loc[0, "col4"] == "0150", (
+            f"{engine_name} lost zeros in col4 row 0"
+        )
+        assert result.loc[2, "col4"] == "0205", (
+            f"{engine_name} lost zeros in col4 row 2"
+        )
+
+        for col in ["col1", "col2", "col3", "col4"]:
+            assert result.dtypes[col] == "object", (
+                f"{engine_name} wrong dtype for {col}"
+            )
+
+        assert result.shape == (4, 4)
+        assert list(result.columns) == ["col1", "col2", "col3", "col4"]
+    except AssertionError as exc:
+        if engine_name == "pyarrow":
+            # Known issue: pyarrow engine strips leading zeros even with dtype=str.
+            pytest.xfail(f"failed assertions: {exc}")
+        else:
+            raise

From 247d514d6bb6e7fcf8e30b23f75666232584ed77 Mon Sep 17 00:00:00 2001
From: Daniel Caspi <dan@element26.net>
Date: Tue, 2 Sep 2025 16:56:20 -0500
Subject: [PATCH 04/20] Update test_preserve_leading_zeros.py

---
 .../io/parser/test_preserve_leading_zeros.py  | 28 ++++++++-----------
 1 file changed, 12 insertions(+), 16 deletions(-)

diff --git a/pandas/tests/io/parser/test_preserve_leading_zeros.py b/pandas/tests/io/parser/test_preserve_leading_zeros.py
index 24cd353955bf6..44ecaaa260311 100644
--- a/pandas/tests/io/parser/test_preserve_leading_zeros.py
+++ b/pandas/tests/io/parser/test_preserve_leading_zeros.py
@@ -3,7 +3,7 @@
 import pytest
 
 
-def test_leading_zeros_preserved_with_dtype_str(all_parsers):
+def test_leading_zeros_preserved_with_dtype_str(all_parsers, request):
     """
     Ensure that all parser engines preserve leading zeros when dtype=str is passed.
 
@@ -26,29 +26,25 @@ def test_leading_zeros_preserved_with_dtype_str(all_parsers):
     )
 
     try:
-        assert result.loc[0, "col2"] == "000388907", (
-            f"{engine_name} lost zeros in col2 row 0"
-        )
-        assert result.loc[2, "col2"] == "000023607", (
-            f"{engine_name} lost zeros in col2 row 2"
-        )
-        assert result.loc[0, "col4"] == "0150", (
-            f"{engine_name} lost zeros in col4 row 0"
-        )
-        assert result.loc[2, "col4"] == "0205", (
-            f"{engine_name} lost zeros in col4 row 2"
-        )
+        assert result.shape == (4, 4)
+        assert list(result.columns) == ["col1", "col2", "col3", "col4"]
+
+        assert result.loc[0, "col2"] == "000388907", "lost zeros in col2 row 0"
+        assert result.loc[2, "col2"] == "000023607", "lost zeros in col2 row 2"
+        assert result.loc[0, "col4"] == "0150", "lost zeros in col4 row 0"
+        assert result.loc[2, "col4"] == "0205", "lost zeros in col4 row 2"
 
         for col in ["col1", "col2", "col3", "col4"]:
             assert result.dtypes[col] == "object", (
                 f"{engine_name} wrong dtype for {col}"
             )
 
-        assert result.shape == (4, 4)
-        assert list(result.columns) == ["col1", "col2", "col3", "col4"]
     except AssertionError as exc:
         if engine_name == "pyarrow":
             # Known issue: pyarrow engine strips leading zeros even with dtype=str.
-            pytest.xfail(f"failed assertions: {exc}")
+            request.node.add_marker(
+                pytest.mark.xfail(reason=f"failed assertions: {exc}", strict=False)
+            )
+            assert False, "trigger xfail for pyarrow"
         else:
             raise

From 3e9f04e1cb23108c453ce40ee0bf624659cbbfca Mon Sep 17 00:00:00 2001
From: Daniel Caspi <dan@element26.net>
Date: Tue, 2 Sep 2025 17:17:45 -0500
Subject: [PATCH 05/20] Update test_preserve_leading_zeros.py

---
 pandas/tests/io/parser/test_preserve_leading_zeros.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/pandas/tests/io/parser/test_preserve_leading_zeros.py b/pandas/tests/io/parser/test_preserve_leading_zeros.py
index 44ecaaa260311..999cc58235e01 100644
--- a/pandas/tests/io/parser/test_preserve_leading_zeros.py
+++ b/pandas/tests/io/parser/test_preserve_leading_zeros.py
@@ -34,11 +34,6 @@ def test_leading_zeros_preserved_with_dtype_str(all_parsers, request):
         assert result.loc[0, "col4"] == "0150", "lost zeros in col4 row 0"
         assert result.loc[2, "col4"] == "0205", "lost zeros in col4 row 2"
 
-        for col in ["col1", "col2", "col3", "col4"]:
-            assert result.dtypes[col] == "object", (
-                f"{engine_name} wrong dtype for {col}"
-            )
-
     except AssertionError as exc:
         if engine_name == "pyarrow":
             # Known issue: pyarrow engine strips leading zeros even with dtype=str.

From c86f33fb09b4dd9584a92c6c77e8101de0d522f2 Mon Sep 17 00:00:00 2001
From: Daniel Caspi <dan@element26.net>
Date: Wed, 3 Sep 2025 08:26:29 -0500
Subject: [PATCH 06/20] Update test_preserve_leading_zeros.py

---
 pandas/tests/io/parser/test_preserve_leading_zeros.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/pandas/tests/io/parser/test_preserve_leading_zeros.py b/pandas/tests/io/parser/test_preserve_leading_zeros.py
index 999cc58235e01..980e0daef60aa 100644
--- a/pandas/tests/io/parser/test_preserve_leading_zeros.py
+++ b/pandas/tests/io/parser/test_preserve_leading_zeros.py
@@ -39,7 +39,4 @@ def test_leading_zeros_preserved_with_dtype_str(all_parsers, request):
             # Known issue: pyarrow engine strips leading zeros even with dtype=str.
             request.node.add_marker(
                 pytest.mark.xfail(reason=f"failed assertions: {exc}", strict=False)
-            )
-            assert False, "trigger xfail for pyarrow"
-        else:
-            raise
+        raise

From 81b50db8e0a6c650878b2f2e888f0f4134656ba5 Mon Sep 17 00:00:00 2001
From: Daniel Caspi <dan@element26.net>
Date: Wed, 3 Sep 2025 08:37:00 -0500
Subject: [PATCH 07/20] Update test_preserve_leading_zeros.py

---
 pandas/tests/io/parser/test_preserve_leading_zeros.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/tests/io/parser/test_preserve_leading_zeros.py b/pandas/tests/io/parser/test_preserve_leading_zeros.py
index 980e0daef60aa..e06ea558608ab 100644
--- a/pandas/tests/io/parser/test_preserve_leading_zeros.py
+++ b/pandas/tests/io/parser/test_preserve_leading_zeros.py
@@ -39,4 +39,5 @@ def test_leading_zeros_preserved_with_dtype_str(all_parsers, request):
             # Known issue: pyarrow engine strips leading zeros even with dtype=str.
             request.node.add_marker(
                 pytest.mark.xfail(reason=f"failed assertions: {exc}", strict=False)
+            )
         raise

From 599040000326dbac9f6e5500f4a53a13a3af791a Mon Sep 17 00:00:00 2001
From: Daniel Caspi <dan@element26.net>
Date: Wed, 3 Sep 2025 10:52:35 -0500
Subject: [PATCH 08/20] Update test_preserve_leading_zeros.py

---
 .../io/parser/test_preserve_leading_zeros.py  | 21 ++++++++-----------
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/pandas/tests/io/parser/test_preserve_leading_zeros.py b/pandas/tests/io/parser/test_preserve_leading_zeros.py
index e06ea558608ab..1bf2810e3834d 100644
--- a/pandas/tests/io/parser/test_preserve_leading_zeros.py
+++ b/pandas/tests/io/parser/test_preserve_leading_zeros.py
@@ -4,24 +4,20 @@
 
 
 def test_leading_zeros_preserved_with_dtype_str(all_parsers, request):
-    """
-    Ensure that all parser engines preserve leading zeros when dtype=str is passed.
+    # GH#57666: pyarrow engine strips leading zeros when dtype=str is passed
+    # GH#61618: further discussion on ensuring string dtype preservation across engines
 
-    This test verifies that when dtype=str is specified, leading zeros in
-    numeric-looking strings are preserved across all available parser engines.
-    """
     parser = all_parsers
     engine_name = getattr(parser, "engine", "unknown")
 
-    data = """col1|col2|col3|col4
-AB|000388907|abc|0150
-CD|101044572|def|0150
-EF|000023607|ghi|0205
-GH|100102040|jkl|0205"""
+    data = """col1,col2,col3,col4
+AB,000388907,abc,0150
+CD,101044572,def,0150
+EF,000023607,ghi,0205
+GH,100102040,jkl,0205"""
 
     result = parser.read_csv(
         StringIO(data),
-        sep="|",
         dtype=str,
     )
 
@@ -36,7 +32,8 @@ def test_leading_zeros_preserved_with_dtype_str(all_parsers, request):
 
     except AssertionError as exc:
         if engine_name == "pyarrow":
-            # Known issue: pyarrow engine strips leading zeros even with dtype=str.
+            # Temporary workaround for GH#57666
+            # Remove once type preservation is fixed in pyarrow engine.
             request.node.add_marker(
                 pytest.mark.xfail(reason=f"failed assertions: {exc}", strict=False)
             )

From d9f6983142487cd221827393637a68b58b8eae52 Mon Sep 17 00:00:00 2001
From: Daniel Caspi <dan@element26.net>
Date: Wed, 3 Sep 2025 14:48:59 -0500
Subject: [PATCH 09/20] draft column_types parsing

---
 pandas/io/parsers/arrow_parser_wrapper.py     | 25 +++++++++++++++
 .../io/parser/test_preserve_leading_zeros.py  | 31 +++++++++++++++++++
 2 files changed, 56 insertions(+)

diff --git a/pandas/io/parsers/arrow_parser_wrapper.py b/pandas/io/parsers/arrow_parser_wrapper.py
index ad39d0ebf4326..24c0c05cdbabd 100644
--- a/pandas/io/parsers/arrow_parser_wrapper.py
+++ b/pandas/io/parsers/arrow_parser_wrapper.py
@@ -15,6 +15,7 @@
 )
 
 from pandas.core.dtypes.common import (
+    is_string_dtype,
     pandas_dtype,
 )
 from pandas.core.dtypes.inference import is_integer
@@ -60,6 +61,8 @@ def _get_pyarrow_options(self) -> None:
         """
         Rename some arguments to pass to pyarrow
         """
+        pa = import_optional_dependency("pyarrow")
+
         mapping = {
             "usecols": "include_columns",
             "na_values": "null_values",
@@ -139,6 +142,28 @@ def handle_warning(invalid_row) -> str:
                 f"f{n}" for n in self.convert_options["include_columns"]
             ]
 
+        if self.dtype is not None:
+            if isinstance(self.dtype, dict):
+                column_types = {}
+                for col, col_dtype in self.dtype.items():
+                    if is_string_dtype(col_dtype):
+                        column_types[col] = pa.string()
+                    else:
+                        warnings.warn(
+                            f"Column '{col}' has dtype '{col_dtype}', "
+                            "which may not be handled correctly by the pyarrow engine.",
+                            ParserWarning,
+                            stacklevel=find_stack_level(),
+                        )
+                if column_types:
+                    self.convert_options["column_types"] = column_types
+            else:
+                warnings.warn(
+                    "The pyarrow engine expects a dict mapping columns to types.",
+                    ParserWarning,
+                    stacklevel=find_stack_level(),
+                )
+
         self.read_options = {
             "autogenerate_column_names": self.header is None,
             "skip_rows": self.header
diff --git a/pandas/tests/io/parser/test_preserve_leading_zeros.py b/pandas/tests/io/parser/test_preserve_leading_zeros.py
index 1bf2810e3834d..31cc3e82d1439 100644
--- a/pandas/tests/io/parser/test_preserve_leading_zeros.py
+++ b/pandas/tests/io/parser/test_preserve_leading_zeros.py
@@ -38,3 +38,34 @@ def test_leading_zeros_preserved_with_dtype_str(all_parsers, request):
                 pytest.mark.xfail(reason=f"failed assertions: {exc}", strict=False)
             )
         raise
+
+
+def test_leading_zeros_preserved_with_dtype_dict(all_parsers):
+    # GH#57666: pyarrow engine strips leading zeros when dtype=str is passed
+    # GH#61618: further discussion on ensuring string dtype preservation across engines
+
+    parser = all_parsers
+
+    data = """col1,col2,col3,col4
+AB,000388907,199,0150
+CD,101044572,200,0150
+EF,000023607,201,0205
+GH,100102040,202,0205"""
+
+    result = parser.read_csv(
+        StringIO(data),
+        dtype={"col2": str, "col4": str},
+    )
+
+    assert result.shape == (4, 4)
+    assert list(result.columns) == ["col1", "col2", "col3", "col4"]
+
+    assert result.loc[0, "col2"] == "000388907", "lost zeros in col2 row 0"
+    assert result.loc[2, "col2"] == "000023607", "lost zeros in col2 row 2"
+    assert result.loc[0, "col4"] == "0150", "lost zeros in col4 row 0"
+    assert result.loc[2, "col4"] == "0205", "lost zeros in col4 row 2"
+
+    assert result.loc[0, "col3"] == 199
+    assert result.loc[1, "col3"] == 200
+    assert result.loc[2, "col3"] == 201
+    assert result.loc[3, "col3"] == 202

From 0ebca38c26beb0857f7900e39d8ed99082c3d6b7 Mon Sep 17 00:00:00 2001
From: Daniel Caspi <dan@element26.net>
Date: Wed, 3 Sep 2025 15:15:04 -0500
Subject: [PATCH 10/20] update tests

---
 .../io/parser/test_preserve_leading_zeros.py  | 63 +++++++++++++++++--
 1 file changed, 58 insertions(+), 5 deletions(-)

diff --git a/pandas/tests/io/parser/test_preserve_leading_zeros.py b/pandas/tests/io/parser/test_preserve_leading_zeros.py
index 31cc3e82d1439..7c2b6961df3b6 100644
--- a/pandas/tests/io/parser/test_preserve_leading_zeros.py
+++ b/pandas/tests/io/parser/test_preserve_leading_zeros.py
@@ -2,6 +2,9 @@
 
 import pytest
 
+import pandas._testing as tm
+from pandas.errors import ParserWarning
+
 
 def test_leading_zeros_preserved_with_dtype_str(all_parsers, request):
     # GH#57666: pyarrow engine strips leading zeros when dtype=str is passed
@@ -16,10 +19,19 @@ def test_leading_zeros_preserved_with_dtype_str(all_parsers, request):
 EF,000023607,ghi,0205
 GH,100102040,jkl,0205"""
 
-    result = parser.read_csv(
-        StringIO(data),
-        dtype=str,
-    )
+    if engine_name == "pyarrow":
+        with tm.assert_produces_warning(
+            ParserWarning, match="pyarrow engine expects a dict mapping"
+        ):
+            result = parser.read_csv(
+                StringIO(data),
+                dtype=str,
+            )
+    else:
+        result = parser.read_csv(
+            StringIO(data),
+            dtype=str,
+        )
 
     try:
         assert result.shape == (4, 4)
@@ -40,7 +52,7 @@ def test_leading_zeros_preserved_with_dtype_str(all_parsers, request):
         raise
 
 
-def test_leading_zeros_preserved_with_dtype_dict(all_parsers):
+def test_leading_zeros_preserved_with_dtype_dict_str_only(all_parsers):
     # GH#57666: pyarrow engine strips leading zeros when dtype=str is passed
     # GH#61618: further discussion on ensuring string dtype preservation across engines
 
@@ -69,3 +81,44 @@ def test_leading_zeros_preserved_with_dtype_dict(all_parsers):
     assert result.loc[1, "col3"] == 200
     assert result.loc[2, "col3"] == 201
     assert result.loc[3, "col3"] == 202
+
+
+def test_leading_zeros_preserved_with_heterogeneous_dtypes(all_parsers):
+    # GH#57666: pyarrow engine strips leading zeros when dtype=str is passed
+    # GH#61618: further discussion on ensuring string dtype preservation across engines
+
+    parser = all_parsers
+    engine_name = getattr(parser, "engine", "unknown")
+
+    data = """col1,col2,col3,col4
+AB,000388907,199,0150
+CD,101044572,200,0150
+EF,000023607,201,0205
+GH,100102040,202,0205"""
+
+    if engine_name == "pyarrow":
+        with tm.assert_produces_warning(
+            ParserWarning, match="may not be handled correctly by the pyarrow engine"
+        ):
+            result = parser.read_csv(
+                StringIO(data),
+                dtype={"col2": str, "col3": int, "col4": str},
+            )
+    else:
+        result = parser.read_csv(
+            StringIO(data),
+            dtype={"col2": str, "col3": int, "col4": str},
+        )
+
+    assert result.shape == (4, 4)
+    assert list(result.columns) == ["col1", "col2", "col3", "col4"]
+
+    assert result.loc[0, "col2"] == "000388907", "lost zeros in col2 row 0"
+    assert result.loc[2, "col2"] == "000023607", "lost zeros in col2 row 2"
+    assert result.loc[0, "col4"] == "0150", "lost zeros in col4 row 0"
+    assert result.loc[2, "col4"] == "0205", "lost zeros in col4 row 2"
+
+    assert result.loc[0, "col3"] == 199
+    assert result.loc[1, "col3"] == 200
+    assert result.loc[2, "col3"] == 201
+    assert result.loc[3, "col3"] == 202

From 22bd4e370c7f020bb131d1faddf47371532580a5 Mon Sep 17 00:00:00 2001
From: Daniel Caspi <dan@element26.net>
Date: Wed, 3 Sep 2025 15:20:24 -0500
Subject: [PATCH 11/20] Update v3.0.0.rst

---
 doc/source/whatsnew/v3.0.0.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index ffa65032e6aae..c3812b4e0a151 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -987,6 +987,7 @@ I/O
 - Bug in :meth:`read_csv` raising ``TypeError`` when ``nrows`` and ``iterator`` are specified without specifying a ``chunksize``. (:issue:`59079`)
 - Bug in :meth:`read_csv` where the order of the ``na_values`` makes an inconsistency when ``na_values`` is a list non-string values. (:issue:`59303`)
 - Bug in :meth:`read_csv` with ``engine="pyarrow"`` and ``dtype="Int64"`` losing precision (:issue:`56136`)
+- Bug in :meth:`read_csv`` with dictionary-based dtype specifications not preserving leading zeros consistently across parser engines (:issue:`57666`)
 - Bug in :meth:`read_excel` raising ``ValueError`` when passing array of boolean values when ``dtype="boolean"``. (:issue:`58159`)
 - Bug in :meth:`read_html` where ``rowspan`` in header row causes incorrect conversion to ``DataFrame``. (:issue:`60210`)
 - Bug in :meth:`read_json` ignoring the given ``dtype`` when ``engine="pyarrow"`` (:issue:`59516`)

From 22e41298dc0162d2571763659d3296cfd1aa1070 Mon Sep 17 00:00:00 2001
From: Daniel Caspi <dan@element26.net>
Date: Wed, 3 Sep 2025 15:23:15 -0500
Subject: [PATCH 12/20] Update test_preserve_leading_zeros.py

---
 pandas/tests/io/parser/test_preserve_leading_zeros.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/io/parser/test_preserve_leading_zeros.py b/pandas/tests/io/parser/test_preserve_leading_zeros.py
index 7c2b6961df3b6..3cc53a0218d80 100644
--- a/pandas/tests/io/parser/test_preserve_leading_zeros.py
+++ b/pandas/tests/io/parser/test_preserve_leading_zeros.py
@@ -2,9 +2,10 @@
 
 import pytest
 
-import pandas._testing as tm
 from pandas.errors import ParserWarning
 
+import pandas._testing as tm
+
 
 def test_leading_zeros_preserved_with_dtype_str(all_parsers, request):
     # GH#57666: pyarrow engine strips leading zeros when dtype=str is passed

From 74f01a7fde57076fa272ec0cba156a8a5ca5cf6e Mon Sep 17 00:00:00 2001
From: Daniel Caspi <dan@element26.net>
Date: Wed, 3 Sep 2025 16:20:04 -0500
Subject: [PATCH 13/20] use numpy dtype mapping

---
 doc/source/whatsnew/v3.0.0.rst                |  2 +-
 pandas/io/parsers/arrow_parser_wrapper.py     | 18 ++++---
 .../io/parser/test_preserve_leading_zeros.py  | 49 ++-----------------
 3 files changed, 16 insertions(+), 53 deletions(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index c3812b4e0a151..ead60ad82f8cf 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -987,7 +987,7 @@ I/O
 - Bug in :meth:`read_csv` raising ``TypeError`` when ``nrows`` and ``iterator`` are specified without specifying a ``chunksize``. (:issue:`59079`)
 - Bug in :meth:`read_csv` where the order of the ``na_values`` makes an inconsistency when ``na_values`` is a list non-string values. (:issue:`59303`)
 - Bug in :meth:`read_csv` with ``engine="pyarrow"`` and ``dtype="Int64"`` losing precision (:issue:`56136`)
-- Bug in :meth:`read_csv`` with dictionary-based dtype specifications not preserving leading zeros consistently across parser engines (:issue:`57666`)
+- Bug in :meth:`read_csv` with dictionary-based dtype specifications not preserving leading zeros consistently across parser engines (:issue:`57666`)
 - Bug in :meth:`read_excel` raising ``ValueError`` when passing array of boolean values when ``dtype="boolean"``. (:issue:`58159`)
 - Bug in :meth:`read_html` where ``rowspan`` in header row causes incorrect conversion to ``DataFrame``. (:issue:`60210`)
 - Bug in :meth:`read_json` ignoring the given ``dtype`` when ``engine="pyarrow"`` (:issue:`59516`)
diff --git a/pandas/io/parsers/arrow_parser_wrapper.py b/pandas/io/parsers/arrow_parser_wrapper.py
index 24c0c05cdbabd..4651c27d9ddbf 100644
--- a/pandas/io/parsers/arrow_parser_wrapper.py
+++ b/pandas/io/parsers/arrow_parser_wrapper.py
@@ -6,6 +6,7 @@
 from pandas._libs import lib
 from pandas.compat._optional import import_optional_dependency
 from pandas.errors import (
+    DtypeWarning,
     Pandas4Warning,
     ParserError,
     ParserWarning,
@@ -15,7 +16,6 @@
 )
 
 from pandas.core.dtypes.common import (
-    is_string_dtype,
     pandas_dtype,
 )
 from pandas.core.dtypes.inference import is_integer
@@ -146,21 +146,25 @@ def handle_warning(invalid_row) -> str:
             if isinstance(self.dtype, dict):
                 column_types = {}
                 for col, col_dtype in self.dtype.items():
-                    if is_string_dtype(col_dtype):
-                        column_types[col] = pa.string()
-                    else:
+                    try:
+                        numpy_dtype = pandas_dtype(col_dtype).type
+                        pyarrow_dtype = pa.from_numpy_dtype(numpy_dtype)
+                        column_types[col] = pyarrow_dtype
+                    except (TypeError, ValueError, pa.ArrowNotImplementedError):
                         warnings.warn(
                             f"Column '{col}' has dtype '{col_dtype}', "
                             "which may not be handled correctly by the pyarrow engine.",
-                            ParserWarning,
+                            DtypeWarning,
                             stacklevel=find_stack_level(),
                         )
+
                 if column_types:
                     self.convert_options["column_types"] = column_types
             else:
                 warnings.warn(
-                    "The pyarrow engine expects a dict mapping columns to types.",
-                    ParserWarning,
+                    f"Global dtype '{self.dtype}' not supported with pyarrow engine. "
+                    "Use dtype dictionary instead.",
+                    DtypeWarning,
                     stacklevel=find_stack_level(),
                 )
 
diff --git a/pandas/tests/io/parser/test_preserve_leading_zeros.py b/pandas/tests/io/parser/test_preserve_leading_zeros.py
index 3cc53a0218d80..a1f460d7e47ee 100644
--- a/pandas/tests/io/parser/test_preserve_leading_zeros.py
+++ b/pandas/tests/io/parser/test_preserve_leading_zeros.py
@@ -2,7 +2,7 @@
 
 import pytest
 
-from pandas.errors import ParserWarning
+from pandas.errors import DtypeWarning
 
 import pandas._testing as tm
 
@@ -22,7 +22,7 @@ def test_leading_zeros_preserved_with_dtype_str(all_parsers, request):
 
     if engine_name == "pyarrow":
         with tm.assert_produces_warning(
-            ParserWarning, match="pyarrow engine expects a dict mapping"
+            DtypeWarning, match="not supported with pyarrow engine"
         ):
             result = parser.read_csv(
                 StringIO(data),
@@ -53,7 +53,7 @@ def test_leading_zeros_preserved_with_dtype_str(all_parsers, request):
         raise
 
 
-def test_leading_zeros_preserved_with_dtype_dict_str_only(all_parsers):
+def test_leading_zeros_preserved_with_dtype_dict(all_parsers):
     # GH#57666: pyarrow engine strips leading zeros when dtype=str is passed
     # GH#61618: further discussion on ensuring string dtype preservation across engines
 
@@ -67,7 +67,7 @@ def test_leading_zeros_preserved_with_dtype_dict_str_only(all_parsers):
 
     result = parser.read_csv(
         StringIO(data),
-        dtype={"col2": str, "col4": str},
+        dtype={"col2": str, "col3": int, "col4": str},
     )
 
     assert result.shape == (4, 4)
@@ -82,44 +82,3 @@ def test_leading_zeros_preserved_with_dtype_dict_str_only(all_parsers):
     assert result.loc[1, "col3"] == 200
     assert result.loc[2, "col3"] == 201
     assert result.loc[3, "col3"] == 202
-
-
-def test_leading_zeros_preserved_with_heterogeneous_dtypes(all_parsers):
-    # GH#57666: pyarrow engine strips leading zeros when dtype=str is passed
-    # GH#61618: further discussion on ensuring string dtype preservation across engines
-
-    parser = all_parsers
-    engine_name = getattr(parser, "engine", "unknown")
-
-    data = """col1,col2,col3,col4
-AB,000388907,199,0150
-CD,101044572,200,0150
-EF,000023607,201,0205
-GH,100102040,202,0205"""
-
-    if engine_name == "pyarrow":
-        with tm.assert_produces_warning(
-            ParserWarning, match="may not be handled correctly by the pyarrow engine"
-        ):
-            result = parser.read_csv(
-                StringIO(data),
-                dtype={"col2": str, "col3": int, "col4": str},
-            )
-    else:
-        result = parser.read_csv(
-            StringIO(data),
-            dtype={"col2": str, "col3": int, "col4": str},
-        )
-
-    assert result.shape == (4, 4)
-    assert list(result.columns) == ["col1", "col2", "col3", "col4"]
-
-    assert result.loc[0, "col2"] == "000388907", "lost zeros in col2 row 0"
-    assert result.loc[2, "col2"] == "000023607", "lost zeros in col2 row 2"
-    assert result.loc[0, "col4"] == "0150", "lost zeros in col4 row 0"
-    assert result.loc[2, "col4"] == "0205", "lost zeros in col4 row 2"
-
-    assert result.loc[0, "col3"] == 199
-    assert result.loc[1, "col3"] == 200
-    assert result.loc[2, "col3"] == 201
-    assert result.loc[3, "col3"] == 202

From 9d68f91e6366bc55c0cea6f267cbfd2953030752 Mon Sep 17 00:00:00 2001
From: Daniel Caspi <dan@element26.net>
Date: Wed, 3 Sep 2025 17:27:58 -0500
Subject: [PATCH 14/20] change warnings behavior

---
 pandas/io/parsers/arrow_parser_wrapper.py     | 24 +++++++++----------
 .../io/parser/test_preserve_leading_zeros.py  | 21 ++++------------
 2 files changed, 15 insertions(+), 30 deletions(-)

diff --git a/pandas/io/parsers/arrow_parser_wrapper.py b/pandas/io/parsers/arrow_parser_wrapper.py
index 4651c27d9ddbf..8b1970ef86ceb 100644
--- a/pandas/io/parsers/arrow_parser_wrapper.py
+++ b/pandas/io/parsers/arrow_parser_wrapper.py
@@ -6,7 +6,6 @@
 from pandas._libs import lib
 from pandas.compat._optional import import_optional_dependency
 from pandas.errors import (
-    DtypeWarning,
     Pandas4Warning,
     ParserError,
     ParserWarning,
@@ -146,27 +145,26 @@ def handle_warning(invalid_row) -> str:
             if isinstance(self.dtype, dict):
                 column_types = {}
                 for col, col_dtype in self.dtype.items():
+                    # TODO: Category dtypes are not currently handled - may cause issues
+                    # with categorical data preservation in pyarrow engine
+                    if col_dtype == "category":
+                        continue
+
                     try:
                         numpy_dtype = pandas_dtype(col_dtype).type
                         pyarrow_dtype = pa.from_numpy_dtype(numpy_dtype)
                         column_types[col] = pyarrow_dtype
                     except (TypeError, ValueError, pa.ArrowNotImplementedError):
-                        warnings.warn(
-                            f"Column '{col}' has dtype '{col_dtype}', "
-                            "which may not be handled correctly by the pyarrow engine.",
-                            DtypeWarning,
-                            stacklevel=find_stack_level(),
-                        )
+                        # TODO: Unsupported dtypes silently ignored - may cause unexpected
+                        # behavior when pyarrow applies default inference instead of user's dtype
+                        continue
 
                 if column_types:
                     self.convert_options["column_types"] = column_types
             else:
-                warnings.warn(
-                    f"Global dtype '{self.dtype}' not supported with pyarrow engine. "
-                    "Use dtype dictionary instead.",
-                    DtypeWarning,
-                    stacklevel=find_stack_level(),
-                )
+                # TODO: Global dtypes not supported - may cause inconsistent behavior
+                # between engines, especially for leading zero preservation
+                pass
 
         self.read_options = {
             "autogenerate_column_names": self.header is None,
diff --git a/pandas/tests/io/parser/test_preserve_leading_zeros.py b/pandas/tests/io/parser/test_preserve_leading_zeros.py
index a1f460d7e47ee..757962d06817a 100644
--- a/pandas/tests/io/parser/test_preserve_leading_zeros.py
+++ b/pandas/tests/io/parser/test_preserve_leading_zeros.py
@@ -2,10 +2,6 @@
 
 import pytest
 
-from pandas.errors import DtypeWarning
-
-import pandas._testing as tm
-
 
 def test_leading_zeros_preserved_with_dtype_str(all_parsers, request):
     # GH#57666: pyarrow engine strips leading zeros when dtype=str is passed
@@ -20,19 +16,10 @@ def test_leading_zeros_preserved_with_dtype_str(all_parsers, request):
 EF,000023607,ghi,0205
 GH,100102040,jkl,0205"""
 
-    if engine_name == "pyarrow":
-        with tm.assert_produces_warning(
-            DtypeWarning, match="not supported with pyarrow engine"
-        ):
-            result = parser.read_csv(
-                StringIO(data),
-                dtype=str,
-            )
-    else:
-        result = parser.read_csv(
-            StringIO(data),
-            dtype=str,
-        )
+    result = parser.read_csv(
+        StringIO(data),
+        dtype=str,
+    )
 
     try:
         assert result.shape == (4, 4)

From a324f4aceaf9709b3adfbabc401b3ac33555228c Mon Sep 17 00:00:00 2001
From: Daniel Caspi <dan@element26.net>
Date: Wed, 3 Sep 2025 17:32:53 -0500
Subject: [PATCH 15/20] Update arrow_parser_wrapper.py

---
 pandas/io/parsers/arrow_parser_wrapper.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/io/parsers/arrow_parser_wrapper.py b/pandas/io/parsers/arrow_parser_wrapper.py
index 8b1970ef86ceb..4fdeec95b5a2a 100644
--- a/pandas/io/parsers/arrow_parser_wrapper.py
+++ b/pandas/io/parsers/arrow_parser_wrapper.py
@@ -156,7 +156,8 @@ def handle_warning(invalid_row) -> str:
                         column_types[col] = pyarrow_dtype
                     except (TypeError, ValueError, pa.ArrowNotImplementedError):
                         # TODO: Unsupported dtypes silently ignored - may cause unexpected
-                        # behavior when pyarrow applies default inference instead of user's dtype
+                        # behavior when pyarrow applies default inference instead of
+                        # user's dtype
                         continue
 
                 if column_types:

From 8cda906fdbdedb9330e565d21110050721d6f8c1 Mon Sep 17 00:00:00 2001
From: Daniel Caspi <dan@element26.net>
Date: Wed, 3 Sep 2025 17:40:03 -0500
Subject: [PATCH 16/20] Update arrow_parser_wrapper.py

---
 pandas/io/parsers/arrow_parser_wrapper.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/io/parsers/arrow_parser_wrapper.py b/pandas/io/parsers/arrow_parser_wrapper.py
index 4fdeec95b5a2a..0147203b4ceab 100644
--- a/pandas/io/parsers/arrow_parser_wrapper.py
+++ b/pandas/io/parsers/arrow_parser_wrapper.py
@@ -155,9 +155,9 @@ def handle_warning(invalid_row) -> str:
                         pyarrow_dtype = pa.from_numpy_dtype(numpy_dtype)
                         column_types[col] = pyarrow_dtype
                     except (TypeError, ValueError, pa.ArrowNotImplementedError):
-                        # TODO: Unsupported dtypes silently ignored - may cause unexpected
-                        # behavior when pyarrow applies default inference instead of
-                        # user's dtype
+                        # TODO: Unsupported dtypes silently ignored - may cause
+                        # unexpected behavior when pyarrow applies default inference
+                        # instead of user's dtype
                         continue
 
                 if column_types:

From df9e96d23cb327276b5e70f713173a9793bf26af Mon Sep 17 00:00:00 2001
From: Daniel Caspi <dan@element26.net>
Date: Wed, 3 Sep 2025 19:15:11 -0500
Subject: [PATCH 17/20] Update arrow_parser_wrapper.py

---
 pandas/io/parsers/arrow_parser_wrapper.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/io/parsers/arrow_parser_wrapper.py b/pandas/io/parsers/arrow_parser_wrapper.py
index 0147203b4ceab..26228607882d2 100644
--- a/pandas/io/parsers/arrow_parser_wrapper.py
+++ b/pandas/io/parsers/arrow_parser_wrapper.py
@@ -154,7 +154,7 @@ def handle_warning(invalid_row) -> str:
                         numpy_dtype = pandas_dtype(col_dtype).type
                         pyarrow_dtype = pa.from_numpy_dtype(numpy_dtype)
                         column_types[col] = pyarrow_dtype
-                    except (TypeError, ValueError, pa.ArrowNotImplementedError):
+                    except (ValueError, pa.ArrowNotImplementedError):
                         # TODO: Unsupported dtypes silently ignored - may cause
                         # unexpected behavior when pyarrow applies default inference
                         # instead of user's dtype

From d2925bcf51d7820f798691d20fc1d971739b6ea1 Mon Sep 17 00:00:00 2001
From: Daniel Caspi <dan@element26.net>
Date: Thu, 4 Sep 2025 08:35:20 -0500
Subject: [PATCH 18/20] use native to_pyarrow_type

---
 pandas/io/parsers/arrow_parser_wrapper.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/pandas/io/parsers/arrow_parser_wrapper.py b/pandas/io/parsers/arrow_parser_wrapper.py
index 26228607882d2..d361c0b8f6952 100644
--- a/pandas/io/parsers/arrow_parser_wrapper.py
+++ b/pandas/io/parsers/arrow_parser_wrapper.py
@@ -19,6 +19,8 @@
 )
 from pandas.core.dtypes.inference import is_integer
 
+from pandas.core.arrays.arrow.array import to_pyarrow_type
+
 from pandas.io._util import arrow_table_to_pandas
 from pandas.io.parsers.base_parser import ParserBase
 
@@ -145,20 +147,18 @@ def handle_warning(invalid_row) -> str:
             if isinstance(self.dtype, dict):
                 column_types = {}
                 for col, col_dtype in self.dtype.items():
-                    # TODO: Category dtypes are not currently handled - may cause issues
-                    # with categorical data preservation in pyarrow engine
-                    if col_dtype == "category":
-                        continue
+                    source_dtype = pandas_dtype(col_dtype)
 
                     try:
-                        numpy_dtype = pandas_dtype(col_dtype).type
-                        pyarrow_dtype = pa.from_numpy_dtype(numpy_dtype)
-                        column_types[col] = pyarrow_dtype
-                    except (ValueError, pa.ArrowNotImplementedError):
+                        target_dtype = to_pyarrow_type(source_dtype)
+                        if target_dtype:
+                            column_types[col] = target_dtype
+
+                    except TypeError:
                         # TODO: Unsupported dtypes silently ignored - may cause
                         # unexpected behavior when pyarrow applies default inference
                         # instead of user's dtype
-                        continue
+                        pass
 
                 if column_types:
                     self.convert_options["column_types"] = column_types

From d6f3a5c648a2ec720e6f1e1021a487465b38b6a5 Mon Sep 17 00:00:00 2001
From: Daniel Caspi <dan@element26.net>
Date: Thu, 4 Sep 2025 08:50:55 -0500
Subject: [PATCH 19/20] remove unused dep

---
 pandas/io/parsers/arrow_parser_wrapper.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/pandas/io/parsers/arrow_parser_wrapper.py b/pandas/io/parsers/arrow_parser_wrapper.py
index d361c0b8f6952..fcf50ded43fad 100644
--- a/pandas/io/parsers/arrow_parser_wrapper.py
+++ b/pandas/io/parsers/arrow_parser_wrapper.py
@@ -62,8 +62,6 @@ def _get_pyarrow_options(self) -> None:
         """
         Rename some arguments to pass to pyarrow
         """
-        pa = import_optional_dependency("pyarrow")
-
         mapping = {
             "usecols": "include_columns",
             "na_values": "null_values",

From 5df2f20681a6a648d494f4fabbf1afbac864c871 Mon Sep 17 00:00:00 2001
From: Daniel Caspi <dan@element26.net>
Date: Thu, 4 Sep 2025 10:07:22 -0500
Subject: [PATCH 20/20] Update arrow_parser_wrapper.py

---
 pandas/io/parsers/arrow_parser_wrapper.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/io/parsers/arrow_parser_wrapper.py b/pandas/io/parsers/arrow_parser_wrapper.py
index fcf50ded43fad..294cccea189ed 100644
--- a/pandas/io/parsers/arrow_parser_wrapper.py
+++ b/pandas/io/parsers/arrow_parser_wrapper.py
@@ -148,7 +148,7 @@ def handle_warning(invalid_row) -> str:
                     source_dtype = pandas_dtype(col_dtype)
 
                     try:
-                        target_dtype = to_pyarrow_type(source_dtype)
+                        target_dtype = to_pyarrow_type(source_dtype.type)
                         if target_dtype:
                             column_types[col] = target_dtype