diff --git a/daffodil/__init__.py b/daffodil/__init__.py new file mode 100644 index 0000000..c8fd34d --- /dev/null +++ b/daffodil/__init__.py @@ -0,0 +1,10 @@ +import pkgutil +__path__ = pkgutil.extend_path(__path__, __name__) + +from .parser import Daffodil, TimeStamp +from .predicate import DictionaryPredicateDelegate +from .hstore_predicate import HStoreQueryDelegate +from .clickhouse_query_delegate import ClickHouseQueryDelegate +from .pretty_print import PrettyPrintDelegate +from .key_expectation_delegate import KeyExpectationDelegate +from .simulation_delegate import SimulationMatchingDelegate diff --git a/daffodil/__init__.pyx b/daffodil/__init__.pyx index 8c22511..dc2ccf1 100644 --- a/daffodil/__init__.pyx +++ b/daffodil/__init__.pyx @@ -1,6 +1,7 @@ from .parser import Daffodil, TimeStamp from .predicate import DictionaryPredicateDelegate from .hstore_predicate import HStoreQueryDelegate +from .clickhouse_query_delegate import ClickHouseQueryDelegate from .pretty_print import PrettyPrintDelegate from .key_expectation_delegate import KeyExpectationDelegate from .simulation_delegate import SimulationMatchingDelegate diff --git a/daffodil/clickhouse_query_delegate.py b/daffodil/clickhouse_query_delegate.py new file mode 100644 index 0000000..866a167 --- /dev/null +++ b/daffodil/clickhouse_query_delegate.py @@ -0,0 +1,68 @@ +from .parser import BaseDaffodilDelegate, TimeStamp + + +class ClickHouseQueryDelegate(BaseDaffodilDelegate): + """Render ClickHouse SQL for Daffodil expressions.""" + + def __init__(self, map_field_name: str = "hs_data") -> None: + self.field = map_field_name + + def mk_any(self, children): + children = [c for c in children if c] + if not children: + return "0" + return " OR ".join(f"({child})" for child in children) + + def mk_all(self, children): + children = [c for c in children if c] + if not children: + return "1" + return " AND ".join(f"({child})" for child in children) + + def mk_not_any(self, children): + return f"NOT ({self.mk_any(children)})" + + def mk_not_all(self, children): + return f"NOT ({self.mk_all(children)})" + + def mk_comment(self, comment, is_inline: bool): + return "" + + def mk_test(self, test_str: str): + return test_str + + def mk_cmp(self, key, test, val): + return self._mk_cmp(key.content, val, test.content) + + def _mk_cmp(self, key: str, val, test: str): + val = val.content + field_expr = f"{self.field}.{key}" + + if test == "?=": + return f"isNotNull({field_expr})" if val else f"isNull({field_expr})" + + value = self.format_value(val) + + if test == "in": + return f"{field_expr} IN {value}" + elif test == "!in": + return f"{field_expr} NOT IN {value}" + else: + return f"{field_expr} {test} {value}" + + def format_value(self, val): + if isinstance(val, list): + formatted = ", ".join(self.format_value(v) for v in val) + return f"({formatted})" + elif isinstance(val, str): + return "'{}'".format(val.replace("'", "''")) + elif isinstance(val, TimeStamp): + return str(val.content) + elif isinstance(val, bool): + return "1" if val else "0" + else: + return str(val) + + def call(self, predicate, *args): + return predicate + diff --git a/test/test_clickhouse_delegate.py b/test/test_clickhouse_delegate.py new file mode 100644 index 0000000..ec16538 --- /dev/null +++ b/test/test_clickhouse_delegate.py @@ -0,0 +1,45 @@ +import unittest +import os +import sys + +ROOT = os.path.join(os.path.dirname(__file__), '..') +sys.path.insert(0, ROOT) + +from daffodil import Daffodil +from daffodil.clickhouse_query_delegate import ClickHouseQueryDelegate + +class ClickHouseDelegateTests(unittest.TestCase): + def _render(self, fltr): + delegate = ClickHouseQueryDelegate() + return Daffodil(fltr, delegate=delegate)() + + def test_simple(self): + sql = self._render('zip_code = 8002') + self.assertEqual(sql, "(hs_data.zip_code = 8002)") + + def test_medium(self): + fltr = '[ gender = "female"\n gender = "male" ]' + expected = "((hs_data.gender = 'female') OR (hs_data.gender = 'male'))" + self.assertEqual(self._render(fltr), expected) + + def test_advanced(self): + fltr = '{\n gender ?= true\n sat_math_avg_score >= 500\n ![\n zip_code = 10001\n zip_code = 10002\n ]\n}' + expected = "((isNotNull(hs_data.gender)) AND (hs_data.sat_math_avg_score >= 500) AND (NOT ((hs_data.zip_code = 10001) OR (hs_data.zip_code = 10002))))" + self.assertEqual(self._render(fltr), expected) + + def test_timestamp(self): + sql = self._render('created >= timestamp(2017-06-01)') + self.assertEqual(sql, "(hs_data.created >= 1496275200.0)") + + def test_in_operators(self): + sql = self._render('num_of_sat_test_takers in (50, 60)') + self.assertEqual(sql, "(hs_data.num_of_sat_test_takers IN (50, 60))") + sql = self._render('num_of_sat_test_takers !in (50)') + self.assertEqual(sql, "(hs_data.num_of_sat_test_takers NOT IN (50))") + + def test_existence_false(self): + sql = self._render('zip_code ?= false') + self.assertEqual(sql, "(isNull(hs_data.zip_code))") + +if __name__ == '__main__': + unittest.main()