Skip to content

Commit 30ec927

Browse files
Replaced regular expressions for parsing SQL with regular expressions
that perform better (#172).
1 parent 801fe07 commit 30ec927

File tree

2 files changed

+25
-14
lines changed

2 files changed

+25
-14
lines changed

doc/src/release_notes.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,9 @@ Thin Mode Changes
1515

1616
#) Fixed bug when SQL is executed after first being parsed with Oracle
1717
Database 23c.
18+
#) Replaced regular expressions for parsing SQL with regular expressions that
19+
perform better
20+
(`issue 172 <https://github.com/oracle/python-oracledb/issues/172>`__).
1821
#) Fixed bug when a query is re-executed with the setting
1922
``oracledb.defaults.fetch_lobs = False`` after a table underlying the query
2023
is dropped and recreated.

src/oracledb/impl/thin/statement.pyx

Lines changed: 22 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -38,12 +38,18 @@
3838
# the dollar sign and the pound sign.
3939
# 5. Non-quoted binds cannot be Oracle Database Reserved Names (Server handles
4040
# this case and returns an appropriate error)
41-
BIND_PATTERN = r'(?<!"\:)(?<=\:)\s*("[^\"]*"|[^\W\d_][\w\$#]*|\d+)'
41+
BIND_PATTERN = r':\s*((?:".*?")|(?:[^\W\d_][\w\$#]*)|\d+)'
4242

4343
# pattern used for detecting a DML returning clause; bind variables in the
44-
# first group are input variables; bind variables in the second group are
45-
# output only variables
46-
DML_RETURNING_PATTERN = r'(?si)([)\s]RETURNING[(\s][\s\S]+[)\s]INTO\s+)(.*?$)'
44+
# SQL prior to the INTO keyword are input variables; bind varibles in the SQL
45+
# after the INTO keyword are output variables
46+
DML_RETURNING_PATTERN = r'(?si)(?<=\bRETURNING\b)(.*?)(?=\bINTO\b)'
47+
48+
# patterns for identifying comments and quoted strings
49+
SINGLE_LINE_COMMENT_PATTERN = r'--.*'
50+
MULTI_LINE_COMMENT_PATTERN = r'(?s)/\*.*?\*/'
51+
CONSTANT_STRING_PATTERN = r"(?s)'.*?'"
52+
QUOTED_NAME_PATTERN = r'(:\s*)?(".*?")'
4753

4854
cdef class BindInfo:
4955

@@ -178,14 +184,16 @@ cdef class Statement:
178184
self._bind_info_dict = collections.OrderedDict()
179185
self._bind_info_list = []
180186

181-
# Strip single/multiline comments and strings from the sql statement to
182-
# ease searching for bind variables.
183-
sql = re.sub(r"/\*[\S\n ]+?\*/", "", sql)
184-
sql = re.sub(r"\--.*(\n|$)", "", sql)
185-
sql = re.sub(r"""'[^']*'(?=(?:[^']*[^']*')*[^']*$)*""", "", sql,
186-
flags=re.MULTILINE)
187-
sql = re.sub(r'(:\s*)?("([^"]*)")',
188-
lambda m: m.group(0) if sql[m.start(0)] == ":" else "",
187+
# Strip single/multiline comments and replace constant strings and
188+
# quoted names with single characters in order to facilitate detection
189+
# of bind variables; note that bind variables can be quoted so a check
190+
# must be made to ensure that a quoted string doesn't refer to a bind
191+
# variable first before it can be replaced
192+
sql = re.sub(MULTI_LINE_COMMENT_PATTERN, "", sql)
193+
sql = re.sub(SINGLE_LINE_COMMENT_PATTERN, "", sql)
194+
sql = re.sub(CONSTANT_STRING_PATTERN, "S", sql)
195+
sql = re.sub(QUOTED_NAME_PATTERN,
196+
lambda m: m.group(0) if sql[m.start(0)] == ":" else "Q",
189197
sql)
190198

191199
# determine statement type
@@ -197,9 +205,9 @@ cdef class Statement:
197205
if not self._is_plsql:
198206
match = re.search(DML_RETURNING_PATTERN, sql)
199207
if match is not None:
200-
pos = match.start(2)
208+
pos = match.end()
201209
input_sql = sql[:pos]
202-
returning_sql = sql[pos:]
210+
returning_sql = sql[pos + 4:]
203211
self._add_binds(input_sql, is_return_bind=False)
204212
if returning_sql is not None:
205213
self._is_returning = True

0 commit comments

Comments
 (0)