88from modules .data_sources .ChangeTrackingInfo import ChangeTrackingInfo
99from sqlalchemy .sql import text
1010
11+
1112class MsSqlDataSource (object ):
1213
1314 def __init__ (self , connection_string , logger = None ):
@@ -31,32 +32,40 @@ def prefix_column(column_name, full_refresh, primary_key_column_name):
3132 else :
3233 return "t.{0}" .format (column_name )
3334
34- def build_select_statement (self , table_configuration , columns , batch_configuration , previous_batch_key , full_refresh , change_tracking_info ):
35- column_array = list (map (lambda cfg : self .prefix_column (cfg ['source_name' ], full_refresh , table_configuration ['primary_key' ]), columns ))
35+ def build_select_statement (self , table_configuration , columns , batch_configuration , batch_key_tracker , full_refresh ,
36+ change_tracking_info ):
37+ column_array = list (
38+ map (lambda cfg : self .prefix_column (cfg ['source_name' ], full_refresh , table_configuration ['primary_keys' ]),
39+ columns ))
3640 column_names = ", " .join (column_array )
41+
3742 if full_refresh :
38- return "SELECT TOP ({0}) {1} FROM {2}.{3} t WHERE t.{4} > {5} ORDER BY t.{4}" .format (batch_configuration ['size' ],
39- column_names ,
40- table_configuration [
41- 'schema' ],
42- table_configuration [
43- 'name' ],
44- table_configuration [
45- 'primary_key' ],
46- previous_batch_key )
43+ order_by = ", t." .join (table_configuration ['primary_keys' ])
44+
45+ return "SELECT TOP ({0}) {1} FROM {2}.{3} t WHERE {4} ORDER BY {5}" .format (batch_configuration ['size' ],
46+ column_names ,
47+ table_configuration [
48+ 'schema' ],
49+ table_configuration [
50+ 'name' ],
51+ self .build_where_clause (batch_key_tracker , "t" ),
52+ order_by )
4753 else :
54+ order_by = ", chg." .join (table_configuration ['primary_keys' ])
55+
4856 sql_builder = io .StringIO ()
4957 sql_builder .write ("SELECT TOP ({0}) {1}, " .format (batch_configuration ['size' ], column_names ))
50- sql_builder .write ("chg.SYS_CHANGE_VERSION as data_pipeline_change_version, CASE chg.SYS_CHANGE_OPERATION WHEN 'D' THEN 1 ELSE 0 END as data_pipeline_is_deleted \n " )
58+ sql_builder .write (
59+ "chg.SYS_CHANGE_VERSION as data_pipeline_change_version, CASE chg.SYS_CHANGE_OPERATION WHEN 'D' THEN 1 ELSE 0 END as data_pipeline_is_deleted \n " )
5160 sql_builder .write ("FROM CHANGETABLE(CHANGES {0}.{1}, {2}) chg " .format (table_configuration ['schema' ],
5261 table_configuration ['name' ],
5362 change_tracking_info .this_sync_version ))
54- sql_builder .write (" LEFT JOIN {0}.{1} t on chg. {2} = t.{2} " .format ( table_configuration ['schema' ],
55- table_configuration ['name' ],
56- table_configuration [ 'primary_key' ], ))
63+ sql_builder .write (" LEFT JOIN {0}.{1} t on {2} " .format (table_configuration ['schema' ],
64+ table_configuration ['name' ],
65+ self . build_change_table_on_clause ( batch_key_tracker ) ))
5766
58- sql_builder .write ("WHERE chg. {0} > {1} ORDER BY chg.{0} " .format (table_configuration [ 'primary_key' ],
59- previous_batch_key ))
67+ sql_builder .write ("WHERE {0}" .format (self . build_where_clause ( batch_key_tracker , "t" )))
68+ sql_builder . write ( "ORDER BY {0}" . format ( order_by ))
6069
6170 return sql_builder .getvalue ()
6271
@@ -65,7 +74,8 @@ def assert_data_source_is_valid(self, table_configuration, configured_columns):
6574 columns_in_database = self .get_table_columns (table_configuration )
6675
6776 for column in configured_columns :
68- self .assert_column_exists (column ['source_name' ], columns_in_database , "{0}.{1}" .format (table_configuration ['schema' ], table_configuration ['name' ]))
77+ self .assert_column_exists (column ['source_name' ], columns_in_database ,
78+ "{0}.{1}" .format (table_configuration ['schema' ], table_configuration ['name' ]))
6979
7080 def assert_column_exists (self , column_name , columns_in_database , table_name ):
7181 if column_name in columns_in_database :
@@ -82,9 +92,10 @@ def get_table_columns(self, table_configuration):
8292 autoload_with = self .database_engine )
8393 return list (map (lambda column : column .name , table .columns ))
8494
85-
86- def get_next_data_frame (self , table_configuration , columns , batch_configuration , batch_tracker , previous_batch_key , full_refresh , change_tracking_info ):
87- sql = self .build_select_statement (table_configuration , columns , batch_configuration , previous_batch_key , full_refresh , change_tracking_info ,)
95+ def get_next_data_frame (self , table_configuration , columns , batch_configuration , batch_tracker , batch_key_tracker ,
96+ full_refresh , change_tracking_info ):
97+ sql = self .build_select_statement (table_configuration , columns , batch_configuration , batch_key_tracker ,
98+ full_refresh , change_tracking_info , )
8899 self .logger .debug ("Starting read of SQL Statement: {0}" .format (sql ))
89100 data_frame = pandas .read_sql_query (sql , self .database_engine )
90101
@@ -111,9 +122,11 @@ def init_change_tracking(self, table_configuration, last_sync_version):
111122 sql_builder .write ("DECLARE @last_sync_version bigint = {0}; \n " .format (last_sync_version ))
112123 sql_builder .write ("DECLARE @this_sync_version bigint = 0; \n " )
113124 sql_builder .write ("DECLARE @next_sync_version bigint = CHANGE_TRACKING_CURRENT_VERSION(); \n " )
114- sql_builder .write ("IF @last_sync_version >= CHANGE_TRACKING_MIN_VALID_VERSION(OBJECT_ID('{0}.{1}'))\n " .format (table_configuration ['schema' ],table_configuration ['name' ]))
125+ sql_builder .write ("IF @last_sync_version >= CHANGE_TRACKING_MIN_VALID_VERSION(OBJECT_ID('{0}.{1}'))\n " .format (
126+ table_configuration ['schema' ], table_configuration ['name' ]))
115127 sql_builder .write (" SET @this_sync_version = @last_sync_version; \n " )
116- sql_builder .write (" SELECT @next_sync_version as next_sync_version, @this_sync_version as this_sync_version; \n " )
128+ sql_builder .write (
129+ " SELECT @next_sync_version as next_sync_version, @this_sync_version as this_sync_version; \n " )
117130
118131 self .logger .debug ("Getting ChangeTrackingInformation for {0}.{1}. {2}" .format (table_configuration ['schema' ],
119132 table_configuration ['name' ],
@@ -125,3 +138,38 @@ def init_change_tracking(self, table_configuration, last_sync_version):
125138
126139 force_full_load = bool (row ["this_sync_version" ] == 0 or row ["next_sync_version" ] == 0 )
127140 return ChangeTrackingInfo (row ["this_sync_version" ], row ["next_sync_version" ], force_full_load )
141+
142+ @staticmethod
143+ def build_where_clause (batch_key_tracker , table_alias ):
144+ has_value = False
145+
146+ try :
147+ sql_builder = io .StringIO ()
148+ for primary_key in batch_key_tracker .bookmarks :
149+ if has_value :
150+ sql_builder .write (" AND " )
151+
152+ sql_builder .write (
153+ " {0}.{1} > {2}" .format (table_alias , primary_key , batch_key_tracker .bookmarks [primary_key ]))
154+ has_value = True
155+
156+ return sql_builder .getvalue ()
157+ finally :
158+ sql_builder .close ()
159+
160+ @staticmethod
161+ def build_change_table_on_clause (batch_key_tracker ):
162+ has_value = False
163+
164+ try :
165+ sql_builder = io .StringIO ()
166+ for primary_key in batch_key_tracker .bookmarks :
167+ if has_value :
168+ sql_builder .write (" AND " )
169+
170+ sql_builder .write (" chg.{0} = t.{0}" .format (primary_key ))
171+ has_value = True
172+
173+ return sql_builder .getvalue ()
174+ finally :
175+ sql_builder .close ()
0 commit comments