Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions daffodil/__init__.pyx
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from .parser import Daffodil, TimeStamp
from .predicate import DictionaryPredicateDelegate
from .hstore_predicate import HStoreQueryDelegate
from .clickhouse_query_delegate import ClickHouseQueryDelegate
from .pretty_print import PrettyPrintDelegate
from .key_expectation_delegate import KeyExpectationDelegate
from .simulation_delegate import SimulationMatchingDelegate
88 changes: 88 additions & 0 deletions daffodil/clickhouse_query_delegate.pyx
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
from .parser cimport Token, BaseDaffodilDelegate
import re


cdef class ClickHouseQueryDelegate(BaseDaffodilDelegate):
cdef public str table

def __cinit__(self, table_name="hs_data"):
self.table = table_name

def mk_any(self, children):
if not children or not any(children):
return "0"

if isinstance(children, list):
children = [c for c in children if c]

return " OR ".join(f"({child})" for child in children)

def mk_all(self, children):
if not children or not any(children):
return "1"

if isinstance(children, list):
children = [c for c in children if c]

return " AND ".join(f"({child})" for child in children)

def mk_not_any(self, children):
return "NOT ({0})".format(self.mk_any(children))

def mk_not_all(self, children):
return "NOT ({0})".format(self.mk_all(children))

def mk_comment(self, comment, is_inline):
return ""

def _escape(self, s):
return s.replace("'", "''")

def _format_value(self, val):
if isinstance(val, list):
return "(" + ", ".join(self._format_value(v) for v in val) + ")"
if isinstance(val, str):
return "'{}'".format(self._escape(val))
if isinstance(val, bool):
return "1" if val else "0"
return str(val)

cdef mk_cmp(self, Token key, Token test, Token val):
cdef str key_str = key.content
cdef object val_obj = val.content
cdef str op = test.content
cdef str key_expr
cdef str cast_expr
cdef str val_expr

if re.match(r"^[A-Za-z_][A-Za-z0-9_]*$", key_str):
key_expr = f"{self.table}.{key_str}"
else:
key_expr = f"{self.table}.`{key_str}`"

if op == "?=":
if val_obj is False:
return f"isNull({key_expr})"
else:
return f"isNotNull({key_expr})"

if op in ("in", "!in"):
cast_expr = key_expr
if isinstance(val_obj, list) and val_obj:
if all(isinstance(v, int) for v in val_obj):
cast_expr = f"toUInt64({key_expr})"
elif all(isinstance(v, str) for v in val_obj):
cast_expr = f"toString({key_expr})"
val_expr = self._format_value(val_obj)
if op == "in":
return f"{cast_expr} IN {val_expr}"
else:
return f"{cast_expr} NOT IN {val_expr}"
else:
val_expr = self._format_value(val_obj)
if op == "!=":
return f"({key_expr} != {val_expr}) OR ({key_expr} IS NULL)"
return f"{key_expr} {op} {val_expr}"

def call(self, predicate, query=None):
return predicate
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from distutils.core import setup
from setuptools import setup
from Cython.Build import cythonize
import Cython.Compiler.Options

Expand Down
62 changes: 62 additions & 0 deletions test/test_clickhouse.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import unittest

from daffodil import Daffodil, ClickHouseQueryDelegate


class ClickHouseDelegateTests(unittest.TestCase):
def _render(self, fltr):
delegate = ClickHouseQueryDelegate("hs_data")
return Daffodil(fltr, delegate=delegate)()

def test_simple(self):
sql = self._render('zip_code = 8002')
self.assertEqual(sql, "(hs_data.zip_code = 8002)")

def test_medium(self):
fltr = '[ dbn = "01M292"\n dbn = "01M448" ]'
expected = "((hs_data.dbn = '01M292') OR (hs_data.dbn = '01M448'))"
self.assertEqual(self._render(fltr), expected)

def test_advanced(self):
fltr = '{\n tag_with_null_value ?= true\n sat_math_avg_score >= 500\n ![\n zip_code = 10004\n zip_code = 10002\n ]\n}'
expected = "((isNotNull(hs_data.tag_with_null_value)) AND (hs_data.sat_math_avg_score >= 500) AND (NOT ((hs_data.zip_code = 10004) OR (hs_data.zip_code = 10002))))"
self.assertEqual(self._render(fltr), expected)

def test_timestamp(self):
sql = self._render('created >= timestamp(2017-06-01)')
self.assertEqual(sql, "(hs_data.created >= 1496275200.0)")

def test_in_operators(self):
sql = self._render('num_of_sat_test_takers in (50, 60)')
self.assertEqual(sql,
"(toUInt64(hs_data.num_of_sat_test_takers) IN (50, 60))")
sql = self._render('num_of_sat_test_takers !in (50)')
self.assertEqual(sql,
"(toUInt64(hs_data.num_of_sat_test_takers) NOT IN (50))")

def test_in_string_operators(self):
sql = self._render('dbn in ("01M292", "01M448")')
self.assertEqual(sql,
"(toString(hs_data.dbn) IN ('01M292', '01M448'))")

def test_key_with_special_characters(self):
sql = self._render('"$calculated_pct" = "85"')
self.assertEqual(sql, "(hs_data.`$calculated_pct` = '85')")
sql = self._render('"$calculated_pct" ?= true')
self.assertEqual(sql, "(isNotNull(hs_data.`$calculated_pct`))")
sql = self._render('"$calculated_pct" != "85"')
self.assertEqual(sql,
"((hs_data.`$calculated_pct` != '85') OR (hs_data.`$calculated_pct` IS NULL))")

def test_existence_false(self):
sql = self._render('zip_code ?= false')
self.assertEqual(sql, "(isNull(hs_data.zip_code))")

def test_not_equal_with_null(self):
fltr = '{\n tag_with_null_value ?= true\n zip_code != "10004"\n}'
expected = "((isNotNull(hs_data.tag_with_null_value)) AND ((hs_data.zip_code != '10004') OR (hs_data.zip_code IS NULL)))"
self.assertEqual(self._render(fltr), expected)


if __name__ == '__main__':
unittest.main()