From 9ce137e59f45601839371541e3308100f73b0766 Mon Sep 17 00:00:00 2001 From: semyonsinchenko Date: Thu, 17 Jul 2025 07:00:46 +0200 Subject: [PATCH] Fix schema mismatch between native and python Add basic tests that assert that functionality works at least. --- python/falsa/utils.py | 26 +++++++++++++------------- tests/tests_main.py | 17 +++++++++++++++++ 2 files changed, 30 insertions(+), 13 deletions(-) diff --git a/python/falsa/utils.py b/python/falsa/utils.py index ce3b1e8..714ddd7 100644 --- a/python/falsa/utils.py +++ b/python/falsa/utils.py @@ -76,26 +76,26 @@ class Schemas(Enum): ("id1", pa.int64(), False), ("id2", pa.int64(), False), ("id3", pa.int64(), False), - ("id4", pa.utf8()), - ("id5", pa.utf8()), - ("id6", pa.utf8()), - ("v1", pa.float64()), + ("id4", pa.utf8(), False), + ("id5", pa.utf8(), False), + ("id6", pa.utf8(), False), + ("v1", pa.float64(), False), ] ) JOIN_RHS_SMALL = pa.schema( [ ("id1", pa.int64(), False), - ("id4", pa.utf8()), - ("v2", pa.float64()), + ("id4", pa.utf8(), False), + ("v2", pa.float64(), False), ] ) JOIN_RHS_MEDIUM = pa.schema( [ ("id1", pa.int64(), False), ("id2", pa.int64(), False), - ("id4", pa.utf8()), - ("id5", pa.utf8()), - ("v2", pa.float64()), + ("id4", pa.utf8(), False), + ("id5", pa.utf8(), False), + ("v2", pa.float64(), False), ] ) JOIN_RHS_BIG = pa.schema( @@ -103,10 +103,10 @@ class Schemas(Enum): ("id1", pa.int64(), False), ("id2", pa.int64(), False), ("id3", pa.int64(), False), - ("id4", pa.utf8()), - ("id5", pa.utf8()), - ("id6", pa.utf8()), - ("v2", pa.float64()), + ("id4", pa.utf8(), False), + ("id5", pa.utf8(), False), + ("id6", pa.utf8(), False), + ("v2", pa.float64(), False), ] ) diff --git a/tests/tests_main.py b/tests/tests_main.py index ee37403..f859dbc 100644 --- a/tests/tests_main.py +++ b/tests/tests_main.py @@ -27,4 +27,21 @@ sys.stdout.flush() sys.exit(1) + # Test main functionality with parquet files + command_gb = ["falsa", "groupby", "--path-prefix", "./", "--data-format", "PARQUET"] + proc_gb = Popen(command_gb, stdout=PIPE, stderr=STDOUT) + res_gb = proc_gb.communicate() + if proc_gb.returncode != 0: + sys.stdout.write("Error in groupby parquet") + sys.stdout.flush() + sys.exit(1) + + command_join = ["falsa", "join", "--path-prefix", "./", "--data-format", "PARQUET"] + proc_join = Popen(command_join, stdout=PIPE, stderr=STDOUT) + res_join = proc_join.communicate() + if proc_join.returncode != 0: + sys.stdout.write("Error in join parquet") + sys.stdout.flush() + sys.exit(1) + sys.exit(0)