Skip to content

Commit 0f9062b

Browse files
authored
Add Azure SQL support to clouddb_extractor (#69)
1 parent a0935cb commit 0f9062b

File tree

5 files changed

+259
-7
lines changed

5 files changed

+259
-7
lines changed

Community-Supported/clouddb-extractor/README.md

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ following methods:
2020
For a full list of methods and args see the docstrings in the BaseExtractor class.
2121

2222
## Contents
23+
* __azuresql_extractor.py__ - Azure SQL Database implementation of Base Hyper Extractor ABC
2324
* __base_extractor.py__ - provides an Abstract Base Class with some utility methods to extract from cloud databases to "live to hyper" Tableau Datasources. Database specific Extractor classes extend this to manage connections and schema discovery
2425
and may override the generic query processing methods based on DBAPIv2 standards with database specific optimizations.
2526
* __bigquery_extractor.py__ - Google BigQuery implementation of Base Hyper Extractor ABC
@@ -61,6 +62,18 @@ $ python3 extractor_cli.py --help
6162
- delete: Delete rows from a Tableau datasource that match key columns in a changeset from a query
6263
```
6364

65+
### Sample Usage
66+
67+
```console
68+
# Load a sample (default=1000 lines) from test_table to sample_extract in test_project
69+
python3 extractor_cli.py load_sample --tableau_token_name hyperapitest --tableau_token_secretfile hyperapitest.token --source_table_id test_table --tableau_project test_project --tableau_datasource sample_extract
70+
71+
# Load a full extract from test_table to full_extract in test_project
72+
python3 extractor_cli.py export_load --tableau_token_name hyperapitest --tableau_token_secretfile hyperapitest.token --source_table_id test_table --tableau_project test_project --tableau_datasource full_extract
73+
74+
# Execute updated_rows.sql to retrieve a changeset and update full_extract where ROW_ID in changeset matches
75+
python3 extractor_cli.py update --tableau_token_name hyperapitest --tableau_token_secretfile hyperapitest.token --sqlfile updated_rows.sql --tableau_project test_project --tableau_datasource full_extract --match_columns ROW_ID ROW_ID
76+
```
6477

6578
# Installation
6679

@@ -103,6 +116,10 @@ cd hyper-api-samples/Community-Supported/clouddb-extractor
103116
pip install -r requirements.txt
104117
```
105118

119+
## Azure SQL Database Configuration
120+
The following steps are required if using azuresql_extractor.
121+
- Install ODBC Drivers and Azure utilities for your platform using the following instructions: https://github.com/Azure-Samples/AzureSqlGettingStartedSamples/tree/master/python/Unix-based
122+
106123
## Google BigQuery Configuration
107124
The following steps are required if using bigquery_extractor.
108125

Lines changed: 209 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,209 @@
1+
"""Azure SQL Database implementation of Base Hyper Extractor ABC
2+
3+
Tableau Community supported Hyper API sample
4+
5+
-----------------------------------------------------------------------------
6+
7+
This file is the copyrighted property of Tableau Software and is protected
8+
by registered patents and other applicable U.S. and international laws and
9+
regulations.
10+
11+
You may adapt this file and modify it to fit into your context and use it
12+
as a template to start your own projects.
13+
14+
-----------------------------------------------------------------------------
15+
"""
16+
import logging
17+
from typing import Any, Optional, Dict
18+
19+
import pyodbc
20+
from tableauhyperapi import Nullability, SqlType, TableDefinition, TableName
21+
22+
from base_extractor import DEFAULT_SITE_ID, BaseExtractor, HyperSQLTypeMappingError
23+
24+
logger = logging.getLogger("hyper_samples.extractor.mySQL")
25+
26+
class QuerySizeLimitError(Exception):
27+
pass
28+
29+
class AzureSQLExtractor(BaseExtractor):
30+
"""Azure SQL Database Implementation of Extractor Interface
31+
32+
Authentication to Tableau Server can be either by Personal Access Token or
33+
Username and Password.
34+
35+
Constructor Args:
36+
- source_database_config (dict): Source database parameters
37+
- tableau_hostname (string): URL for Tableau Server, e.g. "http://localhost"
38+
- tableau_site_id (string): Tableau site identifier - if default use ""
39+
- tableau_project (string): Tableau project identifier
40+
- tableau_token_name (string): PAT name
41+
- tableau_token_secret (string): PAT secret
42+
- tableau_username (string): Tableau username
43+
- tableau_password (string): Tableau password
44+
NOTE: Authentication to Tableau Server can be either by Personal Access Token or
45+
Username and Password. If both are specified then token takes precedence.
46+
"""
47+
48+
def __init__(
49+
self,
50+
source_database_config: dict,
51+
tableau_hostname: str,
52+
tableau_project: str,
53+
tableau_site_id: str = DEFAULT_SITE_ID,
54+
tableau_token_name: Optional[str] = None,
55+
tableau_token_secret: Optional[str] = None,
56+
tableau_username: Optional[str] = None,
57+
tableau_password: Optional[str] = None,
58+
) -> None:
59+
super().__init__(
60+
source_database_config=source_database_config,
61+
tableau_hostname=tableau_hostname,
62+
tableau_project=tableau_project,
63+
tableau_site_id=tableau_site_id,
64+
tableau_token_name=tableau_token_name,
65+
tableau_token_secret=tableau_token_secret,
66+
tableau_username=tableau_username,
67+
tableau_password=tableau_password,
68+
)
69+
self._source_database_connection = None
70+
self.sql_identifier_quote = ""
71+
72+
def source_database_cursor(self) -> Any:
73+
"""
74+
Returns a DBAPI Cursor to the source database
75+
"""
76+
assert self.source_database_config is not None
77+
if self._source_database_connection is None:
78+
logger.info("Connecting to source Azure SQL Database Instance...")
79+
80+
db_connection_args = self.source_database_config.get("connection")
81+
assert type(db_connection_args) is dict
82+
83+
key_vault_url = db_connection_args.get("key_vault_url")
84+
secret_name = db_connection_args.get("secret_name")
85+
if key_vault_url is not None:
86+
#Recommended: Read password from keyvault
87+
from azure.identity import DefaultAzureCredential
88+
from azure.keyvault.secrets import SecretClient
89+
credential = DefaultAzureCredential()
90+
secret_client = SecretClient(vault_url=key_vault_url, credential=credential)
91+
secret = secret_client.get_secret(secret_name)
92+
this_password = secret.value
93+
else:
94+
#Password is stored as plain text
95+
this_password = db_connection_args["password"]
96+
97+
connection_str = "Driver={{ODBC Driver 17 for SQL Server}};Server={host},{port};Database={database};Uid={username};Pwd={password};{connect_str_suffix}".format(
98+
host=db_connection_args["host"],
99+
port=db_connection_args["port"],
100+
database=db_connection_args["database"],
101+
username=db_connection_args["username"],
102+
password=this_password,
103+
connect_str_suffix=db_connection_args["connect_str_suffix"]
104+
)
105+
self._source_database_connection = pyodbc.connect(connection_str)
106+
107+
return self._source_database_connection.cursor()
108+
109+
def hyper_sql_type(self, source_column: Any) -> SqlType:
110+
"""
111+
Finds the corresponding Hyper column type for source_column
112+
113+
source_column (obj): Instance of DBAPI Column description tuple
114+
115+
Returns a tableauhyperapi.SqlType Object
116+
"""
117+
118+
"""
119+
Note: pyodbc returns a description which contains a tuple per column with the following fields
120+
0 column name (or alias, if specified in the SQL)
121+
1 type object
122+
2 display size (pyodbc does not set this value)
123+
3 internal size (in bytes)
124+
4 precision
125+
5 scale
126+
6 nullable (True/False)
127+
e.g. ('schema_id', <class 'int'>, None, 10, 10, 0, False)
128+
The mapping from SQL types to python types is defined in pyodbx.SQL_data_type_dict
129+
"""
130+
source_column_type = source_column[1].__name__
131+
source_column_precision = source_column[4]
132+
source_column_scale = source_column[5]
133+
134+
type_lookup = {
135+
"str": SqlType.text,
136+
"unicode": SqlType.text,
137+
"bytearray": SqlType.text,
138+
"bool": SqlType.bool,
139+
140+
"int": SqlType.int,
141+
"float": SqlType.double,
142+
"long": SqlType.big_int,
143+
#"Decimal": SqlType.numeric,
144+
145+
"date": SqlType.date,
146+
"time": SqlType.time,
147+
"datetime": SqlType.timestamp_tz,
148+
}
149+
150+
if source_column_type == 'Decimal':
151+
return_sql_type = SqlType.numeric(source_column_precision, source_column_scale)
152+
else:
153+
return_sql_type = type_lookup.get(source_column_type)
154+
155+
if return_sql_type is None:
156+
error_message = "No Hyper SqlType defined for MySQL source type: {}".format(source_column_type)
157+
logger.error(error_message)
158+
raise HyperSQLTypeMappingError(error_message)
159+
160+
return_sql_type = return_sql_type()
161+
162+
logger.debug("Translated source column type {} to Hyper SqlType {}".format(source_column_type, return_sql_type))
163+
return return_sql_type
164+
165+
def hyper_table_definition(self, source_table: Any, hyper_table_name: str = "Extract") -> TableDefinition:
166+
"""
167+
Build a hyper table definition from source_schema
168+
169+
source_table (obj): Source table (Instance of DBAPI Cursor Description)
170+
hyper_table_name (string): Name of the target Hyper table, default="Extract"
171+
172+
Returns a tableauhyperapi.TableDefinition Object
173+
"""
174+
logger.debug(
175+
"Building Hyper TableDefinition for table {}".format(source_table)
176+
)
177+
target_cols = []
178+
for source_column in source_table:
179+
this_name = source_column[0]
180+
this_type = self.hyper_sql_type(source_column)
181+
if source_column[6] == False:
182+
this_col = TableDefinition.Column(this_name, this_type, Nullability.NOT_NULLABLE)
183+
else:
184+
this_col = TableDefinition.Column(name=this_name, type=this_type)
185+
target_cols.append(this_col)
186+
logger.info("..Column {} - Type {}".format(this_name, this_type))
187+
188+
# Create the target schema for our Hyper File
189+
target_schema = TableDefinition(table_name=TableName("Extract", hyper_table_name), columns=target_cols)
190+
return target_schema
191+
192+
def load_sample(
193+
self,
194+
tab_ds_name: str,
195+
source_table: Optional[str] = None,
196+
sql_query: Optional[str] = None,
197+
sample_rows: int = 0,
198+
publish_mode: Any = None,
199+
) -> None:
200+
error_message = "METHOD load_sample is not implemented for SQL Server (Transact-SQL does not support the LIMIT statement)"
201+
logger.error(error_message)
202+
raise NotImplementedError(error_message)
203+
204+
def main():
205+
pass
206+
207+
208+
if __name__ == "__main__":
209+
main()

Community-Supported/clouddb-extractor/base_extractor.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,7 @@ class BaseExtractor(ABC):
215215

216216
def __init__(
217217
self,
218-
source_database_config: Dict,
218+
source_database_config: Dict[str, Any],
219219
tableau_hostname: str,
220220
tableau_project: str,
221221
tableau_site_id: str = DEFAULT_SITE_ID,
@@ -272,7 +272,7 @@ def quoted_sql_identifier(self, sql_identifier: str) -> str:
272272
raise Exception("Invalid SQL identifier: {} - exceeded max allowed length: {}".format(sql_identifier, maxlength))
273273

274274
# char_whitelist = re.compile("^[A-Za-z0-9_-.]*$")
275-
char_whitelist = re.compile("\A[\w\.\-]*\Z")
275+
char_whitelist = re.compile(r"\A[\w\.\-]*\Z")
276276
if char_whitelist.match(sql_identifier) is None:
277277
raise Exception("Invalid SQL identifier: {} - found invalid characters".format(sql_identifier))
278278

@@ -346,7 +346,7 @@ def query_result_to_hyper_file(
346346
self,
347347
target_table_def: Optional[TableDefinition] = None,
348348
cursor: Any = None,
349-
query_result_iter: Iterable[Iterable[object]] = None,
349+
query_result_iter: Optional[Iterable[Iterable[object]]] = None,
350350
hyper_table_name: str = "Extract",
351351
) -> Path:
352352
"""
@@ -379,9 +379,9 @@ def query_result_to_hyper_file(
379379
if cursor.description is None:
380380
raise Exception("DBAPI Cursor did not return any schema description for query:{}".format(cursor.query))
381381
target_table_def = self.hyper_table_definition(source_table=cursor.description, hyper_table_name=hyper_table_name)
382+
assert target_table_def is not None
382383

383384
path_to_database = Path(tempfile_name(prefix="temp_", suffix=".hyper"))
384-
385385
with HyperProcess(telemetry=TELEMETRY) as hyper:
386386

387387
# Creates new Hyper extract file

Community-Supported/clouddb-extractor/config.yml

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,23 @@ mysql: #Mysql configuration defaults
2020
database : "dev"
2121
port : 3306
2222
username : "test"
23+
#Recommended: use key vault instead of password
24+
#key_vault_url : "https://<your_keyvault_name>.vault.azure.net"
25+
#secret_name : my-password-secret
2326
password : "password"
2427
raise_on_warnings : True
28+
azuresql: #Azure SQL Database configuration defaults
29+
connection:
30+
host : "mydbserver.test"
31+
port : 1433
32+
database : "test"
33+
username : "test"
34+
password : "password"
35+
connect_str_suffix : "Encrypt=yes;TrustServerCertificate=no;Connection Timeout=30"
36+
raise_on_warnings : True
2537
redshift: #Redshift configuration defaults
2638
connection:
2739
host : 'redshift-cluster-1.XXX.eu-west-1.redshift.amazonaws.com'
2840
database : 'dev'
2941
user : 'test'
30-
password : 'password'
42+
password : 'password'

Community-Supported/clouddb-extractor/extractor_cli.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,12 +24,15 @@
2424
import json
2525
import yaml
2626

27+
import tableauserverclient as TSC
28+
2729
# Globals
2830
EXTRACTORS = {
2931
"bigquery": "bigquery_extractor.BigQueryExtractor",
3032
"redshift": "redshift_extractor.RedshiftExtractor",
3133
"mysql": "mysql_extractor.MySQLExtractor",
3234
"postgres": "postgres_extractor.PostgresExtractor",
35+
"azuresql": "azuresql_extractor.AzureSQLExtractor",
3336
}
3437
CONFIGURATION_FILE = "config.yml"
3538

@@ -60,7 +63,7 @@ def _exclusive_args(args, *arg_names, required=True, message=None):
6063
if required:
6164
if count_args != 1:
6265
if message is None:
63-
raise IllegalArgumentError(message="Must specify one of {}".format(",".join(arg_names)))
66+
raise IllegalArgumentError("Must specify one of {}".format(",".join(arg_names)))
6467
else:
6568
raise IllegalArgumentError(message)
6669
else:
@@ -185,7 +188,11 @@ def main():
185188
"--match_conditions_json",
186189
help="Json file defining conditions for matching rows when command=[update|delete].",
187190
)
188-
191+
parser.add_argument(
192+
"--overwrite",
193+
action='store_true',
194+
help="Overwrite published datasource when command=[load_sample|export_load] - default behaviour returns error if target datasource exists",
195+
)
189196
# Tableau Server / Tableau Online options
190197
_add_arg_with_default(parser, config, "tableau_env.server_address", "Tableau connection string", True, "--tableau_hostname", "-H")
191198
_add_arg_with_default(parser, config, "tableau_env.site_id", "Tableau site id", True, "--tableau_site_id", "-S")
@@ -229,6 +236,7 @@ def main():
229236
# These are loaded on demand so that you don't have to install
230237
# client libraries for all source database implementations
231238
extractor_class_str = EXTRACTORS.get(selected_extractor)
239+
assert extractor_class_str is not None
232240
extractor_module_str = extractor_class_str.split(".")[0]
233241
extractor_class_str = extractor_class_str.split(".")[1]
234242
extractor_module = importlib.import_module(extractor_module_str)
@@ -263,6 +271,10 @@ def main():
263271
tableau_username=args.tableau_username,
264272
tableau_password=tableau_password,
265273
)
274+
275+
publishmode=TSC.Server.PublishMode.CreateNew
276+
if args.overwrite:
277+
publishmode=TSC.Server.PublishMode.Overwrite
266278

267279
if selected_command == "load_sample":
268280
_required_arg(
@@ -275,13 +287,15 @@ def main():
275287
source_table=args.source_table_id,
276288
tab_ds_name=args.tableau_datasource,
277289
sample_rows=_get_int_from_arg(args.sample_rows, "sample_rows", True),
290+
publish_mode=publishmode,
278291
)
279292

280293
if selected_command == "export_load":
281294
extractor.export_load(
282295
sql_query=sql_string,
283296
source_table=args.source_table_id,
284297
tab_ds_name=args.tableau_datasource,
298+
publish_mode=publishmode,
285299
)
286300

287301
if selected_command == "append":

0 commit comments

Comments
 (0)