Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions daffodil/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
import pkgutil
__path__ = pkgutil.extend_path(__path__, __name__)

from .parser import Daffodil, TimeStamp
from .predicate import DictionaryPredicateDelegate
from .hstore_predicate import HStoreQueryDelegate
from .clickhouse_query_delegate import ClickHouseQueryDelegate
from .pretty_print import PrettyPrintDelegate
from .key_expectation_delegate import KeyExpectationDelegate
from .simulation_delegate import SimulationMatchingDelegate
1 change: 1 addition & 0 deletions daffodil/__init__.pyx
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from .parser import Daffodil, TimeStamp
from .predicate import DictionaryPredicateDelegate
from .hstore_predicate import HStoreQueryDelegate
from .clickhouse_query_delegate import ClickHouseQueryDelegate
from .pretty_print import PrettyPrintDelegate
from .key_expectation_delegate import KeyExpectationDelegate
from .simulation_delegate import SimulationMatchingDelegate
68 changes: 68 additions & 0 deletions daffodil/clickhouse_query_delegate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
from .parser import BaseDaffodilDelegate, TimeStamp


class ClickHouseQueryDelegate(BaseDaffodilDelegate):
"""Render ClickHouse SQL for Daffodil expressions."""

def __init__(self, map_field_name: str = "hs_data") -> None:
self.field = map_field_name

def mk_any(self, children):
children = [c for c in children if c]
if not children:
return "0"
return " OR ".join(f"({child})" for child in children)

def mk_all(self, children):
children = [c for c in children if c]
if not children:
return "1"
return " AND ".join(f"({child})" for child in children)

def mk_not_any(self, children):
return f"NOT ({self.mk_any(children)})"

def mk_not_all(self, children):
return f"NOT ({self.mk_all(children)})"

def mk_comment(self, comment, is_inline: bool):
return ""

def mk_test(self, test_str: str):
return test_str

def mk_cmp(self, key, test, val):
return self._mk_cmp(key.content, val, test.content)

def _mk_cmp(self, key: str, val, test: str):
val = val.content
field_expr = f"{self.field}.{key}"

if test == "?=":
return f"isNotNull({field_expr})" if val else f"isNull({field_expr})"

value = self.format_value(val)

if test == "in":
return f"{field_expr} IN {value}"
elif test == "!in":
return f"{field_expr} NOT IN {value}"
else:
return f"{field_expr} {test} {value}"

def format_value(self, val):
if isinstance(val, list):
formatted = ", ".join(self.format_value(v) for v in val)
return f"({formatted})"
elif isinstance(val, str):
return "'{}'".format(val.replace("'", "''"))
elif isinstance(val, TimeStamp):
return str(val.content)
elif isinstance(val, bool):
return "1" if val else "0"
else:
return str(val)

def call(self, predicate, *args):
return predicate

45 changes: 45 additions & 0 deletions test/test_clickhouse_delegate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import unittest
import os
import sys

ROOT = os.path.join(os.path.dirname(__file__), '..')
sys.path.insert(0, ROOT)

from daffodil import Daffodil
from daffodil.clickhouse_query_delegate import ClickHouseQueryDelegate

class ClickHouseDelegateTests(unittest.TestCase):
def _render(self, fltr):
delegate = ClickHouseQueryDelegate()
return Daffodil(fltr, delegate=delegate)()

def test_simple(self):
sql = self._render('zip_code = 8002')
self.assertEqual(sql, "(hs_data.zip_code = 8002)")

def test_medium(self):
fltr = '[ gender = "female"\n gender = "male" ]'
expected = "((hs_data.gender = 'female') OR (hs_data.gender = 'male'))"
self.assertEqual(self._render(fltr), expected)

def test_advanced(self):
fltr = '{\n gender ?= true\n sat_math_avg_score >= 500\n ![\n zip_code = 10001\n zip_code = 10002\n ]\n}'
expected = "((isNotNull(hs_data.gender)) AND (hs_data.sat_math_avg_score >= 500) AND (NOT ((hs_data.zip_code = 10001) OR (hs_data.zip_code = 10002))))"
self.assertEqual(self._render(fltr), expected)

def test_timestamp(self):
sql = self._render('created >= timestamp(2017-06-01)')
self.assertEqual(sql, "(hs_data.created >= 1496275200.0)")

def test_in_operators(self):
sql = self._render('num_of_sat_test_takers in (50, 60)')
self.assertEqual(sql, "(hs_data.num_of_sat_test_takers IN (50, 60))")
sql = self._render('num_of_sat_test_takers !in (50)')
self.assertEqual(sql, "(hs_data.num_of_sat_test_takers NOT IN (50))")

def test_existence_false(self):
sql = self._render('zip_code ?= false')
self.assertEqual(sql, "(isNull(hs_data.zip_code))")

if __name__ == '__main__':
unittest.main()