1+ from __future__ import annotations
2+
13import json
24import webbrowser
35from pathlib import Path
@@ -22,7 +24,8 @@ def read_pyogrio(path: Path) -> Table:
2224 from pyogrio .raw import open_arrow
2325 except ImportError as e :
2426 raise ImportError (
25- "pyogrio is a required dependency for the CLI. "
27+ "pyogrio is a required dependency for the CLI for reading data sources \n "
28+ "other than GeoParquet.\n "
2629 "Install with `pip install pyogrio`."
2730 ) from e
2831
@@ -58,29 +61,55 @@ def read_pyogrio(path: Path) -> Table:
5861 return table .with_schema (new_schema )
5962
6063
61- def read_geoparquet (path : Path ) -> Table :
62- """Read GeoParquet file at path using pyarrow
64+ def read_parquet (path : Path ) -> tuple [Table , dict ]:
65+ """Read Parquet file using either pyarrow or arro3.
66+
67+ arro3.io.read_parquet is not multi-threaded (as of arro3 0.2.1), so pyarrow can be
68+ up to 4x faster on an 8-core machine. Because of this, we prefer pyarrow if it's
69+ installed, and fall back to arro3 otherwise.
6370
6471 Args:
65- path: Path to GeoParquet file
72+ path: path to Parquet file.
73+
74+ Raises:
75+ ValueError: if there's no GeoParquet metadata in the file
76+
77+ Returns:
78+ arro3 Table
6679 """
6780 try :
6881 import pyarrow .parquet as pq
69- except ImportError as e :
70- raise ImportError (
71- "pyarrow currently required for reading GeoParquet files.\n "
72- "Run `pip install pyarrow`."
73- ) from e
7482
75- file = pq .ParquetFile (path )
76- geo_meta = file .metadata .metadata .get (b"geo" )
77- if not geo_meta :
78- raise ValueError ("Expected geo metadata in Parquet file" )
83+ file = pq .ParquetFile (path )
84+ if b"geo" not in file .metadata .metadata :
85+ raise ValueError ("Expected geo metadata in Parquet file" )
86+ geo_meta = json .loads (file .metadata .metadata .get (b"geo" ))
87+
88+ table = Table .from_arrow (file .read ())
89+
90+ return table , geo_meta
7991
80- pyarrow_table = file . read ()
81- table = Table . from_arrow ( pyarrow_table )
92+ except ImportError :
93+ from arro3 . io import read_parquet
8294
83- geo_meta = json .loads (geo_meta )
95+ reader = read_parquet (path )
96+
97+ if "geo" not in reader .schema .metadata_str .keys ():
98+ raise ValueError ("Expected geo metadata in Parquet file" )
99+
100+ table = reader .read_all ()
101+ geo_meta = json .loads (table .schema .metadata_str ["geo" ])
102+
103+ return table , geo_meta
104+
105+
106+ def read_geoparquet (path : Path ) -> Table :
107+ """Read GeoParquet file at path using pyarrow or arro3.io
108+
109+ Args:
110+ path: Path to GeoParquet file
111+ """
112+ table , geo_meta = read_parquet (path )
84113 geometry_column_name = geo_meta ["primary_column" ]
85114 geometry_column_index = [
86115 i for (i , name ) in enumerate (table .schema .names ) if name == geometry_column_name
0 commit comments