Skip to content

feat: add ArrowDBC module #2786

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 31 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
837046d
adding adbc-driver-postgresql
LeonLuttenberger Feb 20, 2024
374efdd
add ADBC module
LeonLuttenberger Feb 20, 2024
56d5b5b
add test_read_write_equality
LeonLuttenberger Feb 20, 2024
026bdc9
document connect
LeonLuttenberger Feb 20, 2024
7db4caa
Merge branch 'main' into feat/arrow-dbc
LeonLuttenberger Feb 21, 2024
0f7e982
Merge branch 'main' into feat/arrow-dbc
LeonLuttenberger Feb 27, 2024
8d98cba
update adbc-driver-postgresql
LeonLuttenberger Feb 27, 2024
d6051a3
Merge branch 'main' into feat/arrow-dbc
LeonLuttenberger Apr 3, 2024
4729923
Merge branch 'main' into feat/arrow-dbc
LeonLuttenberger Apr 9, 2024
a041a57
Merge branch 'main' into feat/arrow-dbc
LeonLuttenberger Apr 22, 2024
79cb387
update modin and pandas
LeonLuttenberger Apr 22, 2024
2416478
fix test_read_write_equality
LeonLuttenberger Apr 22, 2024
e8663db
update adbc-driver-postgresql
LeonLuttenberger Apr 22, 2024
480a423
add documentation
LeonLuttenberger Apr 22, 2024
5b9876c
remove params from read_sql_query
LeonLuttenberger Apr 22, 2024
8fa3a03
add tests
LeonLuttenberger Apr 22, 2024
856e2a0
add ADBC stubs to docs
LeonLuttenberger Apr 22, 2024
eb1b865
Merge branch 'main' into feat/arrow-dbc
LeonLuttenberger Apr 22, 2024
1f23ddf
fix static check for 3.8
LeonLuttenberger Apr 23, 2024
e8187d2
move adbc to optional argument
LeonLuttenberger Apr 23, 2024
6a4b273
clean up test_read_write_equality
LeonLuttenberger Apr 24, 2024
d02ca63
fix formatting
LeonLuttenberger Apr 24, 2024
802a710
fix modin version
LeonLuttenberger Apr 24, 2024
5c26733
Merge branch 'main' into feat/arrow-dbc
LeonLuttenberger Apr 29, 2024
24c37a6
Merge branch 'main' into feat/arrow-dbc
LeonLuttenberger May 1, 2024
04d7f2c
add xfail
LeonLuttenberger May 2, 2024
3570028
Merge branch 'main' into feat/arrow-dbc
LeonLuttenberger May 2, 2024
963c43e
Merge branch 'main' into feat/arrow-dbc
LeonLuttenberger May 6, 2024
b5f0610
Merge branch 'main' into feat/arrow-dbc
LeonLuttenberger May 22, 2024
464ef08
remove sql methods
LeonLuttenberger May 22, 2024
002a04e
update adbc
LeonLuttenberger May 22, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions awswrangler/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
import logging as _logging

from awswrangler import (
_arrow, # noqa: F401
adbc,
athena,
catalog,
chime,
Expand Down Expand Up @@ -40,6 +42,7 @@
engine.register()

__all__ = [
"adbc",
"athena",
"catalog",
"chime",
Expand Down
1 change: 1 addition & 0 deletions awswrangler/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@
"gremlin_python": "gremlin",
"opensearchpy": "opensearch",
"oracledb": "oracle",
"pg_abapi": "adbc-driver-postgresql",
}


Expand Down
89 changes: 89 additions & 0 deletions awswrangler/adbc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
"""Amazon ADBC Module."""

from __future__ import annotations

import logging
from typing import TYPE_CHECKING
from urllib.parse import urlencode

import boto3

from awswrangler import _databases as _db_utils
from awswrangler import _utils, exceptions

if TYPE_CHECKING:
try:
import adbc_driver_postgresql.dbapi as pg_dbapi
from adbc_driver_manager import dbapi
except ImportError:
pass
else:
pg_dbapi = _utils.import_optional_dependency("adbc_driver_postgresql.dbapi")
db_api = _utils.import_optional_dependency("adbc_driver_manager.dbapi")


_logger: logging.Logger = logging.getLogger(__name__)


def _validate_connection(con: "dbapi.Connection") -> None:
if not isinstance(con, pg_dbapi.Connection):
raise exceptions.InvalidConnection(
"Invalid 'con' argument, please pass a "
"adbc_driver_postgresql.dbapi.Connection object. "
"Use adbc_driver_postgresql.dbapi.connect() to use "
"credentials directly or wr.adbc.connect() to fetch it from the Glue Catalog."
)


@_utils.check_optional_dependency(pg_dbapi, "pg_abapi")
def connect(
connection: str | None = None,
secret_id: str | None = None,
catalog_id: str | None = None,
dbname: str | None = None,
timeout: int | None = None,
boto3_session: boto3.Session | None = None,
) -> "dbapi.Connection":
"""
Connect to a database using the ArrowDBC connector.

Parameters
----------
connection: str, optional
Glue Catalog Connection name.
secret_id: str, optional
Specifies the secret containing the credentials that are used to connect to the database.
You can specify either the Amazon Resource Name (ARN) or the friendly name of the secret.
catalog_id: str, optional
The ID of the Data Catalog.
If none is provided, the AWS account ID is used by default.
dbname: str, optional
The name of a database.
timeout : int, optional
Timeout in seconds.
boto3_session: boto3.Session(), optional
Boto3 Session. The default boto3 session will be used if boto3_session receive None.

Returns
-------
adbc_driver_manager.dbapi.Connection
Connection object.
"""
attrs: _db_utils.ConnectionAttributes = _db_utils.get_connection_attributes(
connection=connection, secret_id=secret_id, catalog_id=catalog_id, dbname=dbname, boto3_session=boto3_session
)
if attrs.kind not in ("postgresql", "postgres"):
raise exceptions.InvalidDatabaseType(
f"Invalid connection type ({attrs.kind}. It must be a postgresql connection.)"
)

connection_arguments = {
"host": attrs.host,
"port": attrs.port,
"user": attrs.user,
"password": attrs.password,
}
if timeout:
connection_arguments["connect_timeout"] = timeout

return pg_dbapi.connect(uri=f"postgresql:///{attrs.database}?{urlencode(connection_arguments)}") # type: ignore[no-any-return]
6 changes: 6 additions & 0 deletions awswrangler/pandas/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
notna,
read_csv,
read_excel,
read_sql,
read_sql_table,
to_datetime,
)
elif memory_format.get() == MemoryFormatEnum.MODIN:
Expand All @@ -35,6 +37,8 @@
notna,
read_csv,
read_excel,
read_sql,
read_sql_table,
to_datetime,
)
else:
Expand All @@ -53,4 +57,6 @@
"read_csv",
"read_excel",
"to_datetime",
"read_sql",
"read_sql_table",
]
14 changes: 14 additions & 0 deletions docs/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ API Reference
* `MySQL`_
* `Microsoft SQL Server`_
* `Oracle`_
* `ArrowDBC`_
* `Data API Redshift`_
* `Data API RDS`_
* `AWS Glue Data Quality`_
Expand Down Expand Up @@ -213,6 +214,19 @@ ____________________

.. currentmodule:: awswrangler.oracle

.. autosummary::
:toctree: stubs

connect
read_sql_query
read_sql_table
to_sql

ArrowDBC
____________________

.. currentmodule:: awswrangler.adbc

.. autosummary::
:toctree: stubs

Expand Down
Loading
Loading