From 82e7a4d7215faa97fc3b4c6a840a53fc46345384 Mon Sep 17 00:00:00 2001 From: Paul Prescod Date: Tue, 20 Dec 2022 21:09:04 -0800 Subject: [PATCH 1/4] Add a summation feature. --- docs/extending.md | 3 +- docs/index.md | 102 +++++++++++++++++ examples/math_partition_simple.recipe.yml | 17 +++ examples/sum_pennies.yml | 13 +++ examples/sum_pennies_param.yml | 15 +++ examples/sum_plugin_example.yml | 25 +++++ examples/sum_simple_example.yml | 13 +++ schema/snowfakery_recipe.jsonschema.json | 8 ++ snowfakery/standard_plugins/_math.py | 95 +++++++++++++++- tests/test_summation.py | 129 ++++++++++++++++++++++ 10 files changed, 414 insertions(+), 6 deletions(-) create mode 100644 examples/math_partition_simple.recipe.yml create mode 100644 examples/sum_pennies.yml create mode 100644 examples/sum_pennies_param.yml create mode 100644 examples/sum_plugin_example.yml create mode 100644 examples/sum_simple_example.yml create mode 100644 tests/test_summation.py diff --git a/docs/extending.md b/docs/extending.md index 48798da5..5f1dd94b 100644 --- a/docs/extending.md +++ b/docs/extending.md @@ -413,7 +413,8 @@ use `context.evaluate_raw()` instead of `context.evaluate()`. Plugins that require "memory" or "state" are possible using `PluginResult` objects or subclasses. Consider a plugin that generates child objects -that include values that sum up values on child objects to a value specified on a parent: +that include values that sum up values on child objects to a value specified on a parent (similar to a simple version +of `Math.random_partition`): ```yaml # examples/sum_child_values.yml diff --git a/docs/index.md b/docs/index.md index c2186b71..8a72aeaf 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1861,6 +1861,108 @@ Or: twelve: ${Math.sqrt} ``` +#### Rolling up numbers: `Math.random_partition` + +Sometimes you want a parent object to have a field value which +is the sum of many child values. Snowfakery allow you to +specify or randomly generate the parent sum value and then +it will generate an appropriate number of children with +values that sum up to match it, using `Math.random_partition`: + +```yaml +# examples/math_partition_simple.recipe.yml +- plugin: snowfakery.standard_plugins.Math +- object: ParentObject__c + count: 2 + fields: + TotalAmount__c: + random_number: + min: 30 + max: 90 + friends: + - object: ChildObject__c + for_each: + var: child_value + value: + Math.random_partition: + total: ${{ParentObject__c.TotalAmount__c}} + fields: + Amount__c: ${{child_value}} +``` + +The `Math.random_partition` function splits up a number. +So this recipe might spit out the following +set of parents and children: + +```json +ParentObject__c(id=1, TotalAmount__c=40) +ChildObject__c(id=1, Amount__c=3) +ChildObject__c(id=2, Amount__c=1) +ChildObject__c(id=3, Amount__c=24) +ChildObject__c(id=4, Amount__c=12) +ParentObject__c(id=2, TotalAmount__c=83) +ChildObject__c(id=5, Amount__c=2) +ChildObject__c(id=6, Amount__c=81) +``` + +There are 2 Parent objects created and a random number of +children per parent. + +The `Math.random_partition`function takes argument +`min`, which is the smallest +value each part can have, `max`, which is the largest +possible value, `total` which is what all of the values +sum up to and `step` which is a number that each value +must have as a factor. E.g. if `step` is `4` then +values of `4`, `8`, `12` are valid. + +For example: + +```yaml +# examples/sum_simple_example.yml +- plugin: snowfakery.standard_plugins.Math + +- object: Values + for_each: + var: current_value + value: + Math.random_partition: + total: 100 + min: 10 + max: 50 + step: 5 + fields: + Amount: ${{current_value}} +``` + +Which might generate `15,15,25,20,15,10` or `50,50` or `25,50,25`. + +If `step` is a number smaller then `1`, then you can generate +pennies for numeric calculations. Valid values are `0.01` (penny +granularity), `0.05` (nickle), `0.10` (dime), `0.25` (quarter) and +`0.50` (half dollars). Other values are not supported. + +```yaml +# examples/sum_pennies.yml +- plugin: snowfakery.standard_plugins.Math + +- object: Values + for_each: + var: current_value + value: + Math.random_partition: + total: 100 + min: 10 + max: 50 + step: 0.1 + fields: + Amount: ${{current_value}} +``` + +It is possible to specify values which are inconsistent. +When that happens one of the constraints will be +violated. + ### Advanced Unique IDs with the UniqueId plugin There is a plugin which gives you more control over the generation of diff --git a/examples/math_partition_simple.recipe.yml b/examples/math_partition_simple.recipe.yml new file mode 100644 index 00000000..29f4b59c --- /dev/null +++ b/examples/math_partition_simple.recipe.yml @@ -0,0 +1,17 @@ +- plugin: snowfakery.standard_plugins.Math +- object: ParentObject__c + count: 2 + fields: + TotalAmount__c: + random_number: + min: 30 + max: 90 + friends: + - object: ChildObject__c + for_each: + var: child_value + value: + Math.random_partition: + total: ${{ParentObject__c.TotalAmount__c}} + fields: + Amount__c: ${{child_value}} diff --git a/examples/sum_pennies.yml b/examples/sum_pennies.yml new file mode 100644 index 00000000..d58cf72a --- /dev/null +++ b/examples/sum_pennies.yml @@ -0,0 +1,13 @@ +- plugin: snowfakery.standard_plugins.Math + +- object: Values + for_each: + var: current_value + value: + Math.random_partition: + total: 100 + min: 10 + max: 50 + step: 0.1 + fields: + Amount: ${{current_value}} diff --git a/examples/sum_pennies_param.yml b/examples/sum_pennies_param.yml new file mode 100644 index 00000000..e9498037 --- /dev/null +++ b/examples/sum_pennies_param.yml @@ -0,0 +1,15 @@ +- plugin: snowfakery.standard_plugins.Math +- option: step + default: 0.01 + +- object: Values + for_each: + var: current_value + value: + Math.random_partition: + total: 100 + min: 10 + max: 50 + step: ${{step}} + fields: + Amount: ${{current_value}} diff --git a/examples/sum_plugin_example.yml b/examples/sum_plugin_example.yml new file mode 100644 index 00000000..303d6a59 --- /dev/null +++ b/examples/sum_plugin_example.yml @@ -0,0 +1,25 @@ +# This shows how you could create a plugin or feature where +# a parent object generates child objects which sum up +# to any particular value. + +- plugin: examples.sum_totals.SummationPlugin +- var: summation_helper + value: + SummationPlugin.summer: + total: 100 + step: 10 + +- object: ParentObject__c + count: 10 + fields: + MinimumChildObjectAmount__c: 10 + MinimumStep: 5 + TotalAmount__c: ${{summation_helper.total}} + friends: + - object: ChildObject__c + count: ${{summation_helper.count}} + fields: + Parent__c: + reference: ParentObject__c + Amount__c: ${{summation_helper.next_amount}} + RunningTotal__c: ${{summation_helper.running_total}} diff --git a/examples/sum_simple_example.yml b/examples/sum_simple_example.yml new file mode 100644 index 00000000..2301b66f --- /dev/null +++ b/examples/sum_simple_example.yml @@ -0,0 +1,13 @@ +- plugin: snowfakery.standard_plugins.Math + +- object: Values + for_each: + var: current_value + value: + Math.random_partition: + total: 100 + min: 10 + max: 50 + step: 5 + fields: + Amount: ${{current_value}} diff --git a/schema/snowfakery_recipe.jsonschema.json b/schema/snowfakery_recipe.jsonschema.json index acc5902d..e81c6fee 100644 --- a/schema/snowfakery_recipe.jsonschema.json +++ b/schema/snowfakery_recipe.jsonschema.json @@ -61,6 +61,14 @@ } ] }, + "for_each": { + "type": "object", + "anyOf": [ + { + "$ref": "#/$defs/var" + } + ] + }, "fields": { "type": "object", "additionalProperties": true diff --git a/snowfakery/standard_plugins/_math.py b/snowfakery/standard_plugins/_math.py index 9af57125..58edcc6b 100644 --- a/snowfakery/standard_plugins/_math.py +++ b/snowfakery/standard_plugins/_math.py @@ -1,20 +1,105 @@ import math -from snowfakery.plugins import SnowfakeryPlugin +from random import randint, shuffle +from types import SimpleNamespace +from typing import List, Optional, Union +from snowfakery.plugins import SnowfakeryPlugin, memorable, PluginResultIterator class Math(SnowfakeryPlugin): def custom_functions(self, *args, **kwargs): "Expose math functions to Snowfakery" - class MathNamespace: - pass + class MathNamespace(SimpleNamespace): + @memorable + def random_partition( + self, + total: int, + *, + min: int = 1, + max: Optional[int] = None, + step: int = 1, + ): + return GenericPluginResultIterator(False, parts(total, min, max, step)) mathns = MathNamespace() - mathns.__dict__ = math.__dict__.copy() + mathns.__dict__.update(math.__dict__.copy()) mathns.pi = math.pi mathns.round = round mathns.min = min mathns.max = max - + mathns.context = self.context return mathns + + +class GenericPluginResultIterator(PluginResultIterator): + def __init__(self, repeat, iterable): + super().__init__(repeat) + self.next = iter(iterable).__next__ + + +def parts(total: int, min_: int = 1, max_=None, step=1) -> List[Union[int, float]]: + """Split a number into a randomized set of 'pieces'. + The pieces add up to the `total`. E.g. + + parts(12) -> [3, 6, 3] + parts(16) -> [8, 4, 2, 2] + + The numbers generated will never be less than `min_`, if provided. + + The numbers generated will never be less than `max_`, if provided. + + The numbers generated will always be a multiple of `step`, if provided. + + But...if you provide inconsistent constraints then your values + will be inconsistent with them. e.g. if `total` is not a multiple + of `step`. + """ + max_ = max_ or total + factor = 0 + + if step < 1: + assert step in [0.01, 0.5, 0.1, 0.20, 0.25, 0.50], step + factor = step + total = int(total / factor) + step = int(total / factor) + min_ = int(total / factor) + max_ = int(total / factor) + + pieces = [] + + while sum(pieces) < total: + remaining = total - sum(pieces) + smallest = max(min_, step) + if remaining < smallest: + # try to add it to a random other piece + for i, val in enumerate(pieces): + if val + remaining <= max_: + pieces[i] += remaining + remaining = 0 + break + + # just tack it on the end despite + # it being too small...our + # constraints must have been impossible + # to fulfil + if remaining: + pieces.append(remaining) + + else: + part = randint(smallest, min(remaining, max_)) + round_up = part + step - (part % step) + if round_up <= min(remaining, max_) and randint(0, 1): + part = round_up + else: + part -= part % step + + pieces.append(part) + + assert sum(pieces) == total, pieces + assert 0 not in pieces, pieces + + shuffle(pieces) + if factor: + pieces = [round(p * factor, 2) for p in pieces] + return pieces diff --git a/tests/test_summation.py b/tests/test_summation.py new file mode 100644 index 00000000..9cedf311 --- /dev/null +++ b/tests/test_summation.py @@ -0,0 +1,129 @@ +import pytest +from io import StringIO +from snowfakery import generate_data + +REPS = 1 + + +@pytest.mark.parametrize("_", range(REPS)) +class TestSummation: + def test_example(self, generated_rows, _): + generate_data("examples/math_partition_simple.recipe.yml") + parents = generated_rows.table_values("ParentObject__c") + children = generated_rows.table_values("ChildObject__c") + assert sum(p["TotalAmount__c"] for p in parents) == sum( + c["Amount__c"] for c in children + ), (parents, children) + + def test_example_pennies(self, generated_rows, _): + generate_data("examples/sum_pennies.yml") + objs = generated_rows.table_values("Values") + assert round(sum(p["Amount"] for p in objs)) == 100, sum( + p["Amount"] for p in objs + ) + + @pytest.mark.parametrize("step", [0.01, 0.5, 0.1, 0.20, 0.25, 0.50]) + def test_example_pennies_param(self, generated_rows, _, step: int): + generate_data("examples/sum_pennies_param.yml", user_options={"step": step}) + objs = generated_rows.table_values("Values") + assert round(sum(p["Amount"] for p in objs)) == 100, sum( + p["Amount"] for p in objs + ) + + def test_step(self, generated_rows, _): + yaml = """ + - plugin: snowfakery.standard_plugins.Math + - object: Obj + for_each: + var: child_value + value: + Math.random_partition: + total: 60 + step: 10 + fields: + Amount: ${{child_value}} + """ + generate_data(StringIO(yaml)) + values = generated_rows.table_values("Obj") + assert 1 <= len(values) <= 6 + amounts = [r["Amount"] for r in values] + assert sum(amounts) == 60, amounts + assert sum([r % 10 for r in amounts]) == 0, amounts + + def test_min(self, generated_rows, _): + yaml = """ + - plugin: snowfakery.standard_plugins.Math + - object: Obj + for_each: + var: child_value + value: + Math.random_partition: + total: 60 + min: 5 + fields: + Amount: ${{child_value}} + """ + generate_data(StringIO(yaml)) + values = generated_rows.table_values("Obj") + results = [r["Amount"] for r in values] + assert sum(results) == 60, results + assert not [r for r in results if r < 5], results + + def test_min_not_factor_of_total(self, generated_rows, _): + yaml = """ + - plugin: snowfakery.standard_plugins.Math + - object: Obj + for_each: + var: child_value + value: + Math.random_partition: + total: 63 + min: 5 + fields: + Amount: ${{child_value}} + """ + generate_data(StringIO(yaml)) + values = generated_rows.table_values("Obj") + results = [r["Amount"] for r in values] + assert sum(results) == 63 + assert not [r for r in results if r < 5], results + + def test_step_not_factor_of_total(self, generated_rows, _): + yaml = """ + - plugin: snowfakery.standard_plugins.Math + - object: Obj + for_each: + var: child_value + value: + Math.random_partition: + total: 63 + step: 5 + fields: + Amount: ${{child_value}} + """ + generate_data(StringIO(yaml)) + values = generated_rows.table_values("Obj") + results = [r["Amount"] for r in values] + assert sum(results) == 63, results + assert len([r for r in results if r < 5]) <= 1, results + + def test_max(self, generated_rows, _): + yaml = """ + - plugin: snowfakery.standard_plugins.Math + - object: Obj + for_each: + var: child_value + value: + Math.random_partition: + total: 28 + step: 2 + max: 6 + fields: + Amount: ${{child_value}} + """ + generate_data(StringIO(yaml)) + values = generated_rows.table_values("Obj") + results = [r["Amount"] for r in values] + assert sum(results) == 28, results + assert not [r for r in results if r % 2], results + assert not [r for r in results if r > 6], results From f3f639a0056a5fce943b964ae0ad3051edcef0a6 Mon Sep 17 00:00:00 2001 From: Paul Prescod Date: Fri, 23 Dec 2022 12:53:18 -0800 Subject: [PATCH 2/4] Semi-deterministic testing --- docs/index.md | 2 +- examples/sum_pennies.yml | 2 +- snowfakery/api.py | 2 + snowfakery/data_generator.py | 2 + snowfakery/data_generator_runtime.py | 3 + snowfakery/plugins.py | 5 +- snowfakery/standard_plugins/_math.py | 108 ++++++++++++++++++--------- tests/test_bad_step.recipe.yml | 10 +++ tests/test_summation.py | 54 +++++++++----- 9 files changed, 130 insertions(+), 58 deletions(-) create mode 100644 tests/test_bad_step.recipe.yml diff --git a/docs/index.md b/docs/index.md index 8a72aeaf..669a0dda 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1954,7 +1954,7 @@ granularity), `0.05` (nickle), `0.10` (dime), `0.25` (quarter) and total: 100 min: 10 max: 50 - step: 0.1 + step: 0.01 fields: Amount: ${{current_value}} ``` diff --git a/examples/sum_pennies.yml b/examples/sum_pennies.yml index d58cf72a..6b61e792 100644 --- a/examples/sum_pennies.yml +++ b/examples/sum_pennies.yml @@ -8,6 +8,6 @@ total: 100 min: 10 max: 50 - step: 0.1 + step: 0.01 fields: Amount: ${{current_value}} diff --git a/snowfakery/api.py b/snowfakery/api.py index 7b76ac82..442d7294 100644 --- a/snowfakery/api.py +++ b/snowfakery/api.py @@ -151,6 +151,7 @@ def generate_data( update_passthrough_fields: T.Sequence[ str ] = (), # pass through these fields from input to output + seed: T.Optional[int] = None, ) -> None: stopping_criteria = stopping_criteria_from_target_number(target_number) dburls = dburls or ([dburl] if dburl else []) @@ -193,6 +194,7 @@ def open_with_cleanup(file, mode, **kwargs): plugin_options=plugin_options, update_input_file=open_update_input_file, update_passthrough_fields=update_passthrough_fields, + seed=seed, ) if open_cci_mapping_file: diff --git a/snowfakery/data_generator.py b/snowfakery/data_generator.py index 53c1b6a6..864572b5 100644 --- a/snowfakery/data_generator.py +++ b/snowfakery/data_generator.py @@ -131,6 +131,7 @@ def generate( plugin_options: dict = None, update_input_file: OpenFileLike = None, update_passthrough_fields: T.Sequence[str] = (), + seed: T.Optional[int] = None, ) -> ExecutionSummary: """The main entry point to the package for Python applications.""" from .api import SnowfakeryApplication @@ -188,6 +189,7 @@ def generate( parse_result=parse_result, globals=globls, continuing=bool(continuation_data), + seed=seed, ) as interpreter: runtime_context = interpreter.execute() diff --git a/snowfakery/data_generator_runtime.py b/snowfakery/data_generator_runtime.py index aa8ea9f0..618f4daa 100644 --- a/snowfakery/data_generator_runtime.py +++ b/snowfakery/data_generator_runtime.py @@ -3,6 +3,7 @@ from collections import defaultdict, ChainMap from datetime import date, datetime, timezone from contextlib import contextmanager +from random import Random from typing import Optional, Dict, Sequence, Mapping, NamedTuple, Set import typing as T @@ -300,6 +301,7 @@ def __init__( snowfakery_plugins: Optional[Mapping[str, callable]] = None, faker_providers: Sequence[object] = (), continuing=False, + seed: Optional[int] = None, ): self.output_stream = output_stream self.options = options or {} @@ -354,6 +356,7 @@ def __init__( self.globals.nicknames_and_tables, ) self.resave_objects_from_continuation(globals, self.tables_to_keep_history_for) + self.random_number_generator = Random(seed) def resave_objects_from_continuation( self, globals: Globals, tables_to_keep_history_for: T.Iterable[str] diff --git a/snowfakery/plugins.py b/snowfakery/plugins.py index 29766d94..89e33543 100644 --- a/snowfakery/plugins.py +++ b/snowfakery/plugins.py @@ -1,3 +1,4 @@ +from random import Random import sys from typing import Any, Callable, Mapping, Union, NamedTuple, List, Tuple @@ -141,8 +142,8 @@ def current_filename(self): return self.interpreter.current_context.current_template.filename @property - def current_filename(self): - return self.interpreter.current_context.current_template.filename + def random_number_generator(self) -> Random: + return self.interpreter.random_number_generator def lazy(func: Any) -> Callable: diff --git a/snowfakery/standard_plugins/_math.py b/snowfakery/standard_plugins/_math.py index 58edcc6b..66c1922d 100644 --- a/snowfakery/standard_plugins/_math.py +++ b/snowfakery/standard_plugins/_math.py @@ -1,5 +1,5 @@ import math -from random import randint, shuffle +from random import Random from types import SimpleNamespace from typing import List, Optional, Union from snowfakery.plugins import SnowfakeryPlugin, memorable, PluginResultIterator @@ -17,9 +17,12 @@ def random_partition( *, min: int = 1, max: Optional[int] = None, - step: int = 1, + step: float = 1, ): - return GenericPluginResultIterator(False, parts(total, min, max, step)) + random = self.context.random_number_generator + return GenericPluginResultIterator( + False, parts(total, min, max, step, random) + ) mathns = MathNamespace() mathns.__dict__.update(math.__dict__.copy()) @@ -38,7 +41,13 @@ def __init__(self, repeat, iterable): self.next = iter(iterable).__next__ -def parts(total: int, min_: int = 1, max_=None, step=1) -> List[Union[int, float]]: +def parts( + total: int, + min_: int = 1, + max_: Optional[int] = None, + requested_step: float = 1, + rand: Optional[Random] = None, +) -> List[Union[int, float]]: """Split a number into a randomized set of 'pieces'. The pieces add up to the `total`. E.g. @@ -56,15 +65,21 @@ def parts(total: int, min_: int = 1, max_=None, step=1) -> List[Union[int, float of `step`. """ max_ = max_ or total - factor = 0 - - if step < 1: - assert step in [0.01, 0.5, 0.1, 0.20, 0.25, 0.50], step - factor = step - total = int(total / factor) - step = int(total / factor) - min_ = int(total / factor) - max_ = int(total / factor) + rand = rand or Random() + + if requested_step < 1: + allowed_steps = [0.01, 0.5, 0.1, 0.20, 0.25, 0.50] + assert ( + requested_step in allowed_steps + ), f"`step` must be one of {', '.join(str(f) for f in allowed_steps)}, not {requested_step}" + # multiply up into the integer range so we don't need to do float math + total = int(total / requested_step) + step = 1 + min_ = int(min_ / requested_step) + max_ = int(max_ / requested_step) + else: + step = int(requested_step) + assert step == requested_step, f"`step` should be an integer, not {step}" pieces = [] @@ -72,34 +87,53 @@ def parts(total: int, min_: int = 1, max_=None, step=1) -> List[Union[int, float remaining = total - sum(pieces) smallest = max(min_, step) if remaining < smallest: - # try to add it to a random other piece - for i, val in enumerate(pieces): - if val + remaining <= max_: - pieces[i] += remaining - remaining = 0 - break - - # just tack it on the end despite - # it being too small...our - # constraints must have been impossible - # to fulfil - if remaining: - pieces.append(remaining) + # mutates pieces + handle_last_bit(pieces, rand, remaining, min_, max_) else: - part = randint(smallest, min(remaining, max_)) - round_up = part + step - (part % step) - if round_up <= min(remaining, max_) and randint(0, 1): - part = round_up - else: - part -= part % step - - pieces.append(part) + pieces.append(generate_piece(pieces, rand, smallest, remaining, max_, step)) assert sum(pieces) == total, pieces assert 0 not in pieces, pieces - shuffle(pieces) - if factor: - pieces = [round(p * factor, 2) for p in pieces] + if requested_step != step: + pieces = [round(p * requested_step, 2) for p in pieces] return pieces + + +def handle_last_bit( + pieces: List[int], rand: Random, remaining: int, min_: int, max_: int +): + """If the piece is big enough, add it. + Otherwise, try to add it to another piece.""" + + if remaining > min_: + pos = rand.randint(0, len(pieces)) + pieces.insert(pos, remaining) + return + + # try to add it to some other piece + for i, val in enumerate(pieces): + if val + remaining <= max_: + pieces[i] += remaining + remaining = 0 + return + + # just insert it despite it being too small...our + # constraints must have been impossible to fulfill + if remaining: + pos = rand.randint(0, len(pieces)) + pieces.insert(pos, remaining) + + +def generate_piece( + pieces: List[int], rand: Random, smallest: int, remaining: int, max_: int, step: int +): + part = rand.randint(smallest, min(remaining, max_)) + round_up = part + step - (part % step) + if round_up <= min(remaining, max_) and rand.randint(0, 1): + part = round_up + else: + part -= part % step + + return part diff --git a/tests/test_bad_step.recipe.yml b/tests/test_bad_step.recipe.yml new file mode 100644 index 00000000..42465eb8 --- /dev/null +++ b/tests/test_bad_step.recipe.yml @@ -0,0 +1,10 @@ +- plugin: snowfakery.standard_plugins.Math +- object: Obj + for_each: + var: child_value + value: + Math.random_partition: + total: 28 + step: 0.3 + fields: + Amount: ${{child_value}} diff --git a/tests/test_summation.py b/tests/test_summation.py index 9cedf311..682f2377 100644 --- a/tests/test_summation.py +++ b/tests/test_summation.py @@ -1,36 +1,40 @@ import pytest +from random import randint from io import StringIO from snowfakery import generate_data +from snowfakery.data_gen_exceptions import DataGenError REPS = 1 -@pytest.mark.parametrize("_", range(REPS)) +@pytest.mark.parametrize("seed", [randint(0, 2 ** 32) for r in range(REPS)]) class TestSummation: - def test_example(self, generated_rows, _): - generate_data("examples/math_partition_simple.recipe.yml") + def test_example(self, generated_rows, seed): + generate_data("examples/math_partition_simple.recipe.yml", seed=seed) parents = generated_rows.table_values("ParentObject__c") children = generated_rows.table_values("ChildObject__c") assert sum(p["TotalAmount__c"] for p in parents) == sum( c["Amount__c"] for c in children ), (parents, children) - def test_example_pennies(self, generated_rows, _): - generate_data("examples/sum_pennies.yml") + def test_example_pennies(self, generated_rows, seed): + generate_data("examples/sum_pennies.yml", seed=seed) objs = generated_rows.table_values("Values") assert round(sum(p["Amount"] for p in objs)) == 100, sum( p["Amount"] for p in objs ) @pytest.mark.parametrize("step", [0.01, 0.5, 0.1, 0.20, 0.25, 0.50]) - def test_example_pennies_param(self, generated_rows, _, step: int): - generate_data("examples/sum_pennies_param.yml", user_options={"step": step}) + def test_example_pennies_param(self, generated_rows, seed, step: int): + generate_data( + "examples/sum_pennies_param.yml", user_options={"step": step}, seed=1 + ) objs = generated_rows.table_values("Values") assert round(sum(p["Amount"] for p in objs)) == 100, sum( p["Amount"] for p in objs ) - def test_step(self, generated_rows, _): + def test_step(self, generated_rows, seed): yaml = """ - plugin: snowfakery.standard_plugins.Math - object: Obj @@ -43,14 +47,14 @@ def test_step(self, generated_rows, _): fields: Amount: ${{child_value}} """ - generate_data(StringIO(yaml)) + generate_data(StringIO(yaml), seed=seed) values = generated_rows.table_values("Obj") assert 1 <= len(values) <= 6 amounts = [r["Amount"] for r in values] assert sum(amounts) == 60, amounts assert sum([r % 10 for r in amounts]) == 0, amounts - def test_min(self, generated_rows, _): + def test_min(self, generated_rows, seed): yaml = """ - plugin: snowfakery.standard_plugins.Math - object: Obj @@ -63,13 +67,13 @@ def test_min(self, generated_rows, _): fields: Amount: ${{child_value}} """ - generate_data(StringIO(yaml)) + generate_data(StringIO(yaml), seed=seed) values = generated_rows.table_values("Obj") results = [r["Amount"] for r in values] assert sum(results) == 60, results assert not [r for r in results if r < 5], results - def test_min_not_factor_of_total(self, generated_rows, _): + def test_min_not_factor_of_total(self, generated_rows, seed): yaml = """ - plugin: snowfakery.standard_plugins.Math - object: Obj @@ -82,13 +86,13 @@ def test_min_not_factor_of_total(self, generated_rows, _): fields: Amount: ${{child_value}} """ - generate_data(StringIO(yaml)) + generate_data(StringIO(yaml), seed=seed) values = generated_rows.table_values("Obj") results = [r["Amount"] for r in values] assert sum(results) == 63 assert not [r for r in results if r < 5], results - def test_step_not_factor_of_total(self, generated_rows, _): + def test_step_not_factor_of_total(self, generated_rows, seed): yaml = """ - plugin: snowfakery.standard_plugins.Math - object: Obj @@ -101,13 +105,13 @@ def test_step_not_factor_of_total(self, generated_rows, _): fields: Amount: ${{child_value}} """ - generate_data(StringIO(yaml)) + generate_data(StringIO(yaml), seed=seed) values = generated_rows.table_values("Obj") results = [r["Amount"] for r in values] assert sum(results) == 63, results assert len([r for r in results if r < 5]) <= 1, results - def test_max(self, generated_rows, _): + def test_max(self, generated_rows, seed): yaml = """ - plugin: snowfakery.standard_plugins.Math - object: Obj @@ -121,9 +125,25 @@ def test_max(self, generated_rows, _): fields: Amount: ${{child_value}} """ - generate_data(StringIO(yaml)) + generate_data(StringIO(yaml), seed=seed) values = generated_rows.table_values("Obj") results = [r["Amount"] for r in values] assert sum(results) == 28, results assert not [r for r in results if r % 2], results assert not [r for r in results if r > 6], results + + def test_bad_step(self, generated_rows, seed): + yaml = """ + - plugin: snowfakery.standard_plugins.Math + - object: Obj + for_each: + var: child_value + value: + Math.random_partition: + total: 28 + step: 0.3 + fields: + Amount: ${{child_value}} + """ + with pytest.raises(DataGenError, match="step.*0.3"): + generate_data(StringIO(yaml), seed=seed) From 68fbd48d6e5ef8b3a4169b41d70e217f13e7f695 Mon Sep 17 00:00:00 2001 From: Paul Prescod Date: Fri, 23 Dec 2022 15:49:35 -0800 Subject: [PATCH 3/4] Rename some files --- docs/index.md | 6 +++--- .../math_partition_simple.recipe.yml | 0 .../sum_pennies.recipe.yml} | 0 .../sum_pennies_param.recipe.yml} | 0 .../sum_simple_example.recipe.yml} | 0 .../math_partition}/test_bad_step.recipe.yml | 0 tests/{test_summation.py => tests_math_partition.py} | 10 +++++++--- 7 files changed, 10 insertions(+), 6 deletions(-) rename examples/{ => math_partition}/math_partition_simple.recipe.yml (100%) rename examples/{sum_pennies.yml => math_partition/sum_pennies.recipe.yml} (100%) rename examples/{sum_pennies_param.yml => math_partition/sum_pennies_param.recipe.yml} (100%) rename examples/{sum_simple_example.yml => math_partition/sum_simple_example.recipe.yml} (100%) rename {tests => examples/math_partition}/test_bad_step.recipe.yml (100%) rename tests/{test_summation.py => tests_math_partition.py} (93%) diff --git a/docs/index.md b/docs/index.md index 669a0dda..af778e06 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1870,7 +1870,7 @@ it will generate an appropriate number of children with values that sum up to match it, using `Math.random_partition`: ```yaml -# examples/math_partition_simple.recipe.yml +# examples/math_partition/math_partition_simple.recipe.yml - plugin: snowfakery.standard_plugins.Math - object: ParentObject__c count: 2 @@ -1919,7 +1919,7 @@ values of `4`, `8`, `12` are valid. For example: ```yaml -# examples/sum_simple_example.yml +# examples/math_partition/sum_simple_example.recipe.yml - plugin: snowfakery.standard_plugins.Math - object: Values @@ -1943,7 +1943,7 @@ granularity), `0.05` (nickle), `0.10` (dime), `0.25` (quarter) and `0.50` (half dollars). Other values are not supported. ```yaml -# examples/sum_pennies.yml +# examples/math_partition/sum_pennies.recipe.yml - plugin: snowfakery.standard_plugins.Math - object: Values diff --git a/examples/math_partition_simple.recipe.yml b/examples/math_partition/math_partition_simple.recipe.yml similarity index 100% rename from examples/math_partition_simple.recipe.yml rename to examples/math_partition/math_partition_simple.recipe.yml diff --git a/examples/sum_pennies.yml b/examples/math_partition/sum_pennies.recipe.yml similarity index 100% rename from examples/sum_pennies.yml rename to examples/math_partition/sum_pennies.recipe.yml diff --git a/examples/sum_pennies_param.yml b/examples/math_partition/sum_pennies_param.recipe.yml similarity index 100% rename from examples/sum_pennies_param.yml rename to examples/math_partition/sum_pennies_param.recipe.yml diff --git a/examples/sum_simple_example.yml b/examples/math_partition/sum_simple_example.recipe.yml similarity index 100% rename from examples/sum_simple_example.yml rename to examples/math_partition/sum_simple_example.recipe.yml diff --git a/tests/test_bad_step.recipe.yml b/examples/math_partition/test_bad_step.recipe.yml similarity index 100% rename from tests/test_bad_step.recipe.yml rename to examples/math_partition/test_bad_step.recipe.yml diff --git a/tests/test_summation.py b/tests/tests_math_partition.py similarity index 93% rename from tests/test_summation.py rename to tests/tests_math_partition.py index 682f2377..4eb14ab4 100644 --- a/tests/test_summation.py +++ b/tests/tests_math_partition.py @@ -10,7 +10,9 @@ @pytest.mark.parametrize("seed", [randint(0, 2 ** 32) for r in range(REPS)]) class TestSummation: def test_example(self, generated_rows, seed): - generate_data("examples/math_partition_simple.recipe.yml", seed=seed) + generate_data( + "examples/math_partition/math_partition_simple.recipe.yml", seed=seed + ) parents = generated_rows.table_values("ParentObject__c") children = generated_rows.table_values("ChildObject__c") assert sum(p["TotalAmount__c"] for p in parents) == sum( @@ -18,7 +20,7 @@ def test_example(self, generated_rows, seed): ), (parents, children) def test_example_pennies(self, generated_rows, seed): - generate_data("examples/sum_pennies.yml", seed=seed) + generate_data("examples/math_partition/sum_pennies.recipe.yml", seed=seed) objs = generated_rows.table_values("Values") assert round(sum(p["Amount"] for p in objs)) == 100, sum( p["Amount"] for p in objs @@ -27,7 +29,9 @@ def test_example_pennies(self, generated_rows, seed): @pytest.mark.parametrize("step", [0.01, 0.5, 0.1, 0.20, 0.25, 0.50]) def test_example_pennies_param(self, generated_rows, seed, step: int): generate_data( - "examples/sum_pennies_param.yml", user_options={"step": step}, seed=1 + "examples/math_partition/sum_pennies_param.recipe.yml", + user_options={"step": step}, + seed=1, ) objs = generated_rows.table_values("Values") assert round(sum(p["Amount"] for p in objs)) == 100, sum( From a4b5bad3959ddbe8c8b4c8c323efe18a4e8b1db3 Mon Sep 17 00:00:00 2001 From: Paul Prescod Date: Fri, 23 Dec 2022 19:39:57 -0800 Subject: [PATCH 4/4] Handle a corner case. --- snowfakery/standard_plugins/_math.py | 66 +++++++++++-------- ...th_partition.py => test_math_partition.py} | 32 +++++++-- 2 files changed, 65 insertions(+), 33 deletions(-) rename tests/{tests_math_partition.py => test_math_partition.py} (85%) diff --git a/snowfakery/standard_plugins/_math.py b/snowfakery/standard_plugins/_math.py index 66c1922d..0359144c 100644 --- a/snowfakery/standard_plugins/_math.py +++ b/snowfakery/standard_plugins/_math.py @@ -42,10 +42,10 @@ def __init__(self, repeat, iterable): def parts( - total: int, - min_: int = 1, - max_: Optional[int] = None, - requested_step: float = 1, + user_total: int, + user_min: int = 1, + user_max: Optional[int] = None, + user_step: float = 1, rand: Optional[Random] = None, ) -> List[Union[int, float]]: """Split a number into a randomized set of 'pieces'. @@ -64,22 +64,24 @@ def parts( will be inconsistent with them. e.g. if `total` is not a multiple of `step`. """ - max_ = max_ or total + max_ = user_max or user_total rand = rand or Random() - if requested_step < 1: + if user_step < 1: allowed_steps = [0.01, 0.5, 0.1, 0.20, 0.25, 0.50] assert ( - requested_step in allowed_steps - ), f"`step` must be one of {', '.join(str(f) for f in allowed_steps)}, not {requested_step}" + user_step in allowed_steps + ), f"`step` must be one of {', '.join(str(f) for f in allowed_steps)}, not {user_step}" # multiply up into the integer range so we don't need to do float math - total = int(total / requested_step) + total = int(user_total / user_step) step = 1 - min_ = int(min_ / requested_step) - max_ = int(max_ / requested_step) + min_ = int(user_min / user_step) + max_ = int(max_ / user_step) else: - step = int(requested_step) - assert step == requested_step, f"`step` should be an integer, not {step}" + step = int(user_step) + min_ = user_min + total = user_total + assert step == user_step, f"`step` should be an integer, not {step}" pieces = [] @@ -88,47 +90,55 @@ def parts( smallest = max(min_, step) if remaining < smallest: # mutates pieces - handle_last_bit(pieces, rand, remaining, min_, max_) + success = handle_last_bit(pieces, rand, remaining, min_, max_) + # our constraints must have been impossible to fulfill + assert ( + success + ), f"No way to match all constraints: total: {user_total}, min: {user_min}, max: {user_max}, step: {user_step}" else: - pieces.append(generate_piece(pieces, rand, smallest, remaining, max_, step)) + pieces.append(generate_piece(rand, smallest, remaining, max_, step)) assert sum(pieces) == total, pieces assert 0 not in pieces, pieces - if requested_step != step: - pieces = [round(p * requested_step, 2) for p in pieces] + if user_step != step: + pieces = [round(p * user_step, 2) for p in pieces] return pieces def handle_last_bit( pieces: List[int], rand: Random, remaining: int, min_: int, max_: int -): +) -> bool: """If the piece is big enough, add it. Otherwise, try to add it to another piece.""" if remaining > min_: pos = rand.randint(0, len(pieces)) pieces.insert(pos, remaining) - return + return True # try to add it to some other piece for i, val in enumerate(pieces): if val + remaining <= max_: pieces[i] += remaining remaining = 0 - return + return True - # just insert it despite it being too small...our - # constraints must have been impossible to fulfill - if remaining: - pos = rand.randint(0, len(pieces)) - pieces.insert(pos, remaining) + # No other piece has enough room...so + # split it up among several other pieces + for i, val in enumerate(pieces): + chunk = min(max_ - pieces[i], remaining) + remaining -= chunk + pieces[i] = max_ + assert remaining >= 0 + if remaining == 0: + return True + + return False -def generate_piece( - pieces: List[int], rand: Random, smallest: int, remaining: int, max_: int, step: int -): +def generate_piece(rand: Random, smallest: int, remaining: int, max_: int, step: int): part = rand.randint(smallest, min(remaining, max_)) round_up = part + step - (part % step) if round_up <= min(remaining, max_) and rand.randint(0, 1): diff --git a/tests/tests_math_partition.py b/tests/test_math_partition.py similarity index 85% rename from tests/tests_math_partition.py rename to tests/test_math_partition.py index 4eb14ab4..c83c1f27 100644 --- a/tests/tests_math_partition.py +++ b/tests/test_math_partition.py @@ -5,10 +5,11 @@ from snowfakery.data_gen_exceptions import DataGenError REPS = 1 +SEEDS = [randint(0, 2 ** 32) for r in range(REPS)] -@pytest.mark.parametrize("seed", [randint(0, 2 ** 32) for r in range(REPS)]) -class TestSummation: +@pytest.mark.parametrize("seed", SEEDS) +class TestMathPartition: def test_example(self, generated_rows, seed): generate_data( "examples/math_partition/math_partition_simple.recipe.yml", seed=seed @@ -19,8 +20,11 @@ def test_example(self, generated_rows, seed): c["Amount__c"] for c in children ), (parents, children) - def test_example_pennies(self, generated_rows, seed): - generate_data("examples/math_partition/sum_pennies.recipe.yml", seed=seed) + regression_seeds = [824956277] + + @pytest.mark.parametrize("seed2", regression_seeds + SEEDS) + def test_example_pennies(self, generated_rows, seed, seed2): + generate_data("examples/math_partition/sum_pennies.recipe.yml", seed=seed2) objs = generated_rows.table_values("Values") assert round(sum(p["Amount"] for p in objs)) == 100, sum( p["Amount"] for p in objs @@ -31,7 +35,7 @@ def test_example_pennies_param(self, generated_rows, seed, step: int): generate_data( "examples/math_partition/sum_pennies_param.recipe.yml", user_options={"step": step}, - seed=1, + seed=seed, ) objs = generated_rows.table_values("Values") assert round(sum(p["Amount"] for p in objs)) == 100, sum( @@ -151,3 +155,21 @@ def test_bad_step(self, generated_rows, seed): """ with pytest.raises(DataGenError, match="step.*0.3"): generate_data(StringIO(yaml), seed=seed) + + def test_inconsistent_constraints(self, generated_rows, seed): + yaml = """ + - plugin: snowfakery.standard_plugins.Math + - object: Obj + for_each: + var: child_value + value: + Math.random_partition: + total: 10 + min: 8 + max: 8 + step: 5 + fields: + Amount: ${{child_value}} + """ + with pytest.raises(DataGenError, match="constraints"): + generate_data(StringIO(yaml), seed=seed)