diff --git a/docs/index.md b/docs/index.md index bac438aa..dbc18bbb 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1912,6 +1912,58 @@ Other encodings of binary data are not currently supported, and output streams g Snowfakery can be extended with custom plugins and fake data providers as described in [Extending Snowfakery with Python Code](./extending.md). +### Update mode + +If your goal is to loop over every row of an input, update it, and then output +the result, Snowfakery has a special mode for you that does that with minimal +recipe code. For example, if you have a CSV of contacts, and you want to add +an address for every one, you do it like this: + +``` +# examples/update_contexts.recipe.yml +- object: Contact + fields: + BillingStreet: + fake: StreetAddress + BillingCity: + fake: City + BillingState: + fake: State + BillingPostalCode: + fake: PostalCode + BillingCountry: + fake: CurrentCountry +``` + +Given an input file like this: + +``` +id,FirstName,LastName +0032D00000V6UvUQAV,Michael,Bluth +032D00000V6UvVQAV,Isabella,Wright +032D00000V6UvfQAF,Desiree,Shelton +032D00000V6UvkQAF,Deanna,Mcdaniel +``` + +You can run: + +``` +$ snowfakery examples/update_records.recipe.yml --update-input-file examples/contacts.csv +``` + +This will generate output like this: + +``` +id,FirstName,LastName +0032D00000V6UvUQAV,Michael,Bluth,32252 Marc Mall Suite 349,South Kristenview,Massachusetts,65450,United States +032D00000V6UvVQAV,Isabella,Wright,41497 Henson Motorway,West Marisaland,Alaska,10293,United States +032D00000V6UvfQAF,Desiree,Shelton,84504 Darren Knolls Suite 023,Port Sandra,Pennsylvania,68863,United States +032D00000V6UvkQAF,Deanna,Mcdaniel,838 Montoya Circle Apt. 857,West Robert,Louisiana,03074,United States +``` + +An update recipe should have a single top-level object with no `count` on it. +It will generate the same number of output rows as input rows. + ## Use Snowfakery with Salesforce Snowfakery recipes that generate Salesforce records are like any other Snowfakery recipes, but instead use `SObject` names for the `objects`. There are several examples [in the Snowfakery repository](https://github.com/SFDO-Tooling/Snowfakery/tree/main/examples/salesforce). diff --git a/docs/salesforce.md b/docs/salesforce.md index 58f110b2..35ed3b35 100644 --- a/docs/salesforce.md +++ b/docs/salesforce.md @@ -21,7 +21,6 @@ creates Snowfakery. The easiest way to learn about CumulusCI (and to learn how to install it) is with its [Trailhead Trail](https://trailhead.salesforce.com/en/content/learn/trails/build-applications-with-cumulusci). - ```s $ cci task run generate_and_load_from_yaml -o generator_yaml examples/salesforce/Contact.recipe.yml -o num_records 300 -o num_records_tablename Contact --org qa ... @@ -51,6 +50,11 @@ $ cci flow run test_everything ... ``` +The order in which sobjects is loaded is inferred by Snowfakery and CumulusCI +based primarily on which tables have fields that depend on which other +tables. Tables declared earlier in the recipe are also generally loaded +before tables declared later. + ## Incorporating Information from Salesforce There are various cases where it might be helpful to relate newly created synthetic @@ -323,7 +327,8 @@ in the `PersonContactId` field. There is also an alternate syntax which allows nicknaming: ```yaml -... + +--- - object: Account fields: PersonContactId: diff --git a/examples/contacts.csv b/examples/contacts.csv new file mode 100644 index 00000000..9c4c9d83 --- /dev/null +++ b/examples/contacts.csv @@ -0,0 +1,5 @@ +id,FirstName,LastName +0032D00000V6UvUQAV,Michael,Bluth,32252 Marc Mall Suite 349,South Kristenview,Massachusetts,65450,United States +032D00000V6UvVQAV,Isabella,Wright,41497 Henson Motorway,West Marisaland,Alaska,10293,United States +032D00000V6UvfQAF,Desiree,Shelton,84504 Darren Knolls Suite 023,Port Sandra,Pennsylvania,68863,United States +032D00000V6UvkQAF,Deanna,Mcdaniel,838 Montoya Circle Apt. 857,West Robert,Louisiana,03074,United States diff --git a/examples/update_contexts.recipe.yml b/examples/update_contexts.recipe.yml new file mode 100644 index 00000000..0027ba91 --- /dev/null +++ b/examples/update_contexts.recipe.yml @@ -0,0 +1,12 @@ +- object: Contact + fields: + BillingStreet: + fake: StreetAddress + BillingCity: + fake: City + BillingState: + fake: State + BillingPostalCode: + fake: PostalCode + BillingCountry: + fake: CurrentCountry diff --git a/snowfakery/api.py b/snowfakery/api.py index 7524a0b2..2182a7ef 100644 --- a/snowfakery/api.py +++ b/snowfakery/api.py @@ -4,7 +4,6 @@ import sys import yaml -from click.utils import LazyFile from snowfakery.data_generator import generate @@ -23,9 +22,7 @@ from snowfakery.data_generator_runtime import ( StoppingCriteria, ) - -OpenFileLike = T.Union[T.TextIO, LazyFile] -FileLike = T.Union[OpenFileLike, Path, str] +from snowfakery.utils.files import FileLike, open_file_like OUTPUT_FORMATS = { "png": "snowfakery.output_streams.ImageOutputStream", @@ -150,6 +147,7 @@ def generate_data( FileLike ] = None, # read these load declarations for CCI plugin_options: T.Mapping = None, + update_input_file: FileLike = None, # use this input file in update mode ) -> None: stopping_criteria = stopping_criteria_from_target_number(target_number) dburls = dburls or ([dburl] if dburl else []) @@ -186,15 +184,9 @@ def open_with_cleanup(file, mode): continuation_file=open_continuation_file, stopping_criteria=stopping_criteria, plugin_options=plugin_options, + update_input_file=update_input_file, ) - # This feature seems seldom useful. Delete it if it isn't missed - # by fall 2021: - - # if debug_internals: - # debuginfo = yaml.dump(summary.summarize_for_debugging(), sort_keys=False) - # sys.stderr.write(debuginfo) - if open_cci_mapping_file: declarations = gather_declarations(yaml_path or "", load_declarations) yaml.safe_dump( @@ -319,23 +311,3 @@ def infer_load_file_path(yaml_file: T.Union[str, Path]): return Path(yaml_file.replace(suffixes, ".load.yml")) else: return Path("") - - -@contextmanager -def open_file_like( - file_like: T.Optional[FileLike], mode -) -> T.ContextManager[T.Tuple[str, OpenFileLike]]: - if not file_like: - yield None, None - if isinstance(file_like, str): - file_like = Path(file_like) - - if isinstance(file_like, Path): - with file_like.open(mode) as f: - yield file_like, f - - elif hasattr(file_like, "name"): - yield file_like.name, file_like - - elif hasattr(file_like, "read"): - yield None, file_like diff --git a/snowfakery/cli.py b/snowfakery/cli.py index 09156e62..63d902ca 100755 --- a/snowfakery/cli.py +++ b/snowfakery/cli.py @@ -155,6 +155,11 @@ def __mod__(self, vals) -> str: help="Declarations to mix into the generated mapping file", multiple=True, ) +@click.option( + "--update-input-file", + type=click.Path(exists=True, readable=True, dir_okay=False), + help="Run an update-style recipe on this input CSV", +) @click.version_option(version=version, prog_name="snowfakery", message=VersionMessage()) def generate_cli( yaml_file, @@ -172,6 +177,7 @@ def generate_cli( plugin_option=(), should_create_cci_record_type_tables=False, load_declarations=None, + update_input_file=None, ): """ Generates records from a YAML file @@ -192,16 +198,16 @@ def generate_cli( """ output_files = list(output_files) if output_files else [] validate_options( - yaml_file, - option, - dburls, - debug_internals, - generate_cci_mapping_file, - output_format, - output_files, - output_folder, - target_number, - reps, + yaml_file=yaml_file, + option=option, + dburl=dburls, + debug_internals=debug_internals, + generate_cci_mapping_file=generate_cci_mapping_file, + output_format=output_format, + output_files=output_files, + output_folder=output_folder, + target_number=target_number, + reps=reps, ) try: user_options = dict(option) @@ -224,6 +230,7 @@ def generate_cli( should_create_cci_record_type_tables=should_create_cci_record_type_tables, load_declarations=load_declarations, plugin_options=plugin_options, + update_input_file=update_input_file, ) except DataGenError as e: if debug_internals: diff --git a/snowfakery/data_generator.py b/snowfakery/data_generator.py index 0e909529..0a097885 100644 --- a/snowfakery/data_generator.py +++ b/snowfakery/data_generator.py @@ -123,6 +123,7 @@ def generate( generate_continuation_file: FileLike = None, continuation_file: TextIO = None, plugin_options: dict = None, + update_input_file: FileLike = None, ) -> ExecutionSummary: """The main entry point to the package for Python applications.""" from .api import SnowfakeryApplication @@ -133,7 +134,7 @@ def generate( output_stream = output_stream or DebugOutputStream() # parse the YAML and any it refers to - parse_result = parse_recipe(open_yaml_file) + parse_result = parse_recipe(open_yaml_file, update_input_file) faker_providers, snowfakery_plugins = process_plugins(parse_result.plugins) diff --git a/snowfakery/parse_recipe_yaml.py b/snowfakery/parse_recipe_yaml.py index cde1b3bc..911a5a02 100644 --- a/snowfakery/parse_recipe_yaml.py +++ b/snowfakery/parse_recipe_yaml.py @@ -24,6 +24,7 @@ from snowfakery.plugins import resolve_plugins, LineTracker, ParserMacroPlugin import snowfakery.data_gen_exceptions as exc +from snowfakery.utils.files import FileLike SHARED_OBJECT = "#SHARED_OBJECT" @@ -633,9 +634,15 @@ def parse_file(stream: IO[str], context: ParseContext) -> List[Dict]: return statements -def parse_recipe(stream: IO[str]) -> ParseResult: +def build_update_recipe( + statements: List[Statement], update_input_file: FileLike = None +) -> List[Statement]: + raise NotImplementedError() + + +def parse_recipe(stream: IO[str], update_input_file: FileLike = None) -> ParseResult: context = ParseContext() - objects = parse_file(stream, context) + objects = parse_file(stream, context) # parse the yaml without semantics statements = parse_statement_list(objects, context) tables = context.table_infos tables = { @@ -643,5 +650,7 @@ def parse_recipe(stream: IO[str]) -> ParseResult: for name, value in context.table_infos.items() if not name.startswith("__") } + if update_input_file: + statements = build_update_recipe(statements, update_input_file) return ParseResult(context.options, tables, statements, plugins=context.plugins)