From 9d830b7e5f5d49ba25cbf9338c9c0c78a77b5a70 Mon Sep 17 00:00:00 2001 From: darshanime-d11 Date: Tue, 18 Mar 2025 19:04:49 +0530 Subject: [PATCH 1/4] add encoder for list, dict when using native types --- snowfakery/data_generator_runtime_object_model.py | 2 +- snowfakery/output_streams.py | 2 ++ tests/test_json.yml | 4 ++++ 3 files changed, 7 insertions(+), 1 deletion(-) create mode 100644 tests/test_json.yml diff --git a/snowfakery/data_generator_runtime_object_model.py b/snowfakery/data_generator_runtime_object_model.py index 6183fcdd..ce97d4df 100644 --- a/snowfakery/data_generator_runtime_object_model.py +++ b/snowfakery/data_generator_runtime_object_model.py @@ -19,7 +19,7 @@ # objects that represent the hierarchy of a data generator. # roughly similar to the YAML structure but with domain-specific objects Definition = Union["ObjectTemplate", "SimpleValue", "StructuredValue"] -FieldValue = Union[None, Scalar, ObjectRow, tuple, PluginResult, ObjectReference] +FieldValue = Union[None, Scalar, ObjectRow, tuple, PluginResult, ObjectReference, list, dict] class FieldDefinition(ABC): diff --git a/snowfakery/output_streams.py b/snowfakery/output_streams.py index d10b50b9..5db8dfb8 100644 --- a/snowfakery/output_streams.py +++ b/snowfakery/output_streams.py @@ -61,6 +61,8 @@ class OutputStream(ABC): type(None): noop, bool: int, Decimal: str, + list: lambda data_list: json.dumps(data_list), + dict: lambda data_list: json.dumps(data_list), } uses_folder = False uses_path = False diff --git a/tests/test_json.yml b/tests/test_json.yml new file mode 100644 index 00000000..0ef80394 --- /dev/null +++ b/tests/test_json.yml @@ -0,0 +1,4 @@ +- snowfakery_version: 3 +- object: SomeObject + fields: + formula_field_example: ${{fake.json(data_columns={'Spec':'@1.0.1', 'ID':'pyint','Details':{'Name':'name', 'Address':'address'}}, num_rows=10)}} From eb57cafa016283ac269e4fe08541c7b333017768 Mon Sep 17 00:00:00 2001 From: darshanime-d11 Date: Tue, 18 Mar 2025 19:32:59 +0530 Subject: [PATCH 2/4] add test for list/dict --- snowfakery/output_streams.py | 4 ++-- tests/test_json.yml | 4 ---- tests/test_output_streams.py | 9 ++++++++- 3 files changed, 10 insertions(+), 7 deletions(-) delete mode 100644 tests/test_json.yml diff --git a/snowfakery/output_streams.py b/snowfakery/output_streams.py index 5db8dfb8..948fb967 100644 --- a/snowfakery/output_streams.py +++ b/snowfakery/output_streams.py @@ -61,8 +61,8 @@ class OutputStream(ABC): type(None): noop, bool: int, Decimal: str, - list: lambda data_list: json.dumps(data_list), - dict: lambda data_list: json.dumps(data_list), + list: lambda data: json.dumps(data), + dict: lambda data: json.dumps(data), } uses_folder = False uses_path = False diff --git a/tests/test_json.yml b/tests/test_json.yml deleted file mode 100644 index 0ef80394..00000000 --- a/tests/test_json.yml +++ /dev/null @@ -1,4 +0,0 @@ -- snowfakery_version: 3 -- object: SomeObject - fields: - formula_field_example: ${{fake.json(data_columns={'Spec':'@1.0.1', 'ID':'pyint','Details':{'Name':'name', 'Address':'address'}}, num_rows=10)}} diff --git a/tests/test_output_streams.py b/tests/test_output_streams.py index 25de7ae2..79fd2e7f 100644 --- a/tests/test_output_streams.py +++ b/tests/test_output_streams.py @@ -353,6 +353,7 @@ def do_output(self, yaml): def test_csv_output(self): yaml = """ + - snowfakery_version: 3 - object: foo fields: a: 1 @@ -365,6 +366,10 @@ def test_csv_output(self): fields: barb: 2 bard: 4 + - object: faz + fields: + list_t: ${{fake.json(data_columns={'Spec':'@1.0.1', 'ID':'pyint','Details':{'Name':'name', 'Address':'address'}}, num_rows=2)}} + dict_t: ${{fake.json(data_columns={'Spec':'@1.0.1', 'ID':'pyint','Details':{'Name':'name', 'Address':'address'}}, num_rows=1)}} """ with TemporaryDirectory() as t: output_stream = CSVOutputStream(Path(t) / "csvoutput") @@ -373,13 +378,15 @@ def test_csv_output(self): assert messages assert "foo.csv" in messages[0] assert "bar.csv" in messages[1] - assert "csvw" in messages[2] + assert "faz.csv" in messages[2] + assert "csvw" in messages[3] assert (Path(t) / "csvoutput" / "foo.csv").exists() with open(Path(t) / "csvoutput" / "csvw_metadata.json") as f: metadata = json.load(f) assert {table["url"] for table in metadata["tables"]} == { "foo.csv", "bar.csv", + "faz.csv", } def test_null(self): From 517385fb2f0456ed546e426c17f6c10cd1e57701 Mon Sep 17 00:00:00 2001 From: darshanime-d11 Date: Mon, 5 May 2025 12:26:46 +0530 Subject: [PATCH 3/4] Update snowfakery/data_generator_runtime_object_model.py Co-authored-by: James Estevez --- snowfakery/data_generator_runtime_object_model.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/snowfakery/data_generator_runtime_object_model.py b/snowfakery/data_generator_runtime_object_model.py index ce97d4df..4bedcfa1 100644 --- a/snowfakery/data_generator_runtime_object_model.py +++ b/snowfakery/data_generator_runtime_object_model.py @@ -19,7 +19,10 @@ # objects that represent the hierarchy of a data generator. # roughly similar to the YAML structure but with domain-specific objects Definition = Union["ObjectTemplate", "SimpleValue", "StructuredValue"] -FieldValue = Union[None, Scalar, ObjectRow, tuple, PluginResult, ObjectReference, list, dict] +FieldValue = Union[ + None, bool, int, float, str, bytes, list, dict, tuple, set, + Scalar, ObjectRow, PluginResult, ObjectReference +] class FieldDefinition(ABC): From 7854e29cc750014ad70e58bbff550367956a4ae8 Mon Sep 17 00:00:00 2001 From: darshanime-d11 Date: Mon, 5 May 2025 16:18:30 +0530 Subject: [PATCH 4/4] add encoders for more types --- snowfakery/output_streams.py | 3 +++ tests/test_output_streams.py | 3 +++ 2 files changed, 6 insertions(+) diff --git a/snowfakery/output_streams.py b/snowfakery/output_streams.py index 948fb967..ce2c597a 100644 --- a/snowfakery/output_streams.py +++ b/snowfakery/output_streams.py @@ -63,6 +63,9 @@ class OutputStream(ABC): Decimal: str, list: lambda data: json.dumps(data), dict: lambda data: json.dumps(data), + bytes: str, + tuple: lambda data: data, + set: lambda data: data, } uses_folder = False uses_path = False diff --git a/tests/test_output_streams.py b/tests/test_output_streams.py index 79fd2e7f..b6b08494 100644 --- a/tests/test_output_streams.py +++ b/tests/test_output_streams.py @@ -370,6 +370,9 @@ def test_csv_output(self): fields: list_t: ${{fake.json(data_columns={'Spec':'@1.0.1', 'ID':'pyint','Details':{'Name':'name', 'Address':'address'}}, num_rows=2)}} dict_t: ${{fake.json(data_columns={'Spec':'@1.0.1', 'ID':'pyint','Details':{'Name':'name', 'Address':'address'}}, num_rows=1)}} + bool_t: ${{fake.pybool()}} + tuple_t: ${{(1, 2)}} + bytes_t: ${{fake.binary(length=64)}} """ with TemporaryDirectory() as t: output_stream = CSVOutputStream(Path(t) / "csvoutput")