From 7e0e5a8e5ec865a588dabc63ac6eded37d95c64e Mon Sep 17 00:00:00 2001 From: CM Lubinski Date: Fri, 2 Oct 2015 21:21:40 +0000 Subject: [PATCH 1/8] Eregs command line runner + watch_node Beginnings of a single command line interface for accessing parser functionality. As a first sub command, converts the "watch_node" script --- eregs.py | 29 +++++++++++++++++++++++ regparser/commands/__init__.py | 0 regparser/commands/watch_node.py | 34 +++++++++++++++++++++++++++ watch_node.py | 40 -------------------------------- 4 files changed, 63 insertions(+), 40 deletions(-) create mode 100644 eregs.py create mode 100644 regparser/commands/__init__.py create mode 100644 regparser/commands/watch_node.py delete mode 100644 watch_node.py diff --git a/eregs.py b/eregs.py new file mode 100644 index 0000000..464e06d --- /dev/null +++ b/eregs.py @@ -0,0 +1,29 @@ +from importlib import import_module +import pkgutil + +import click + +from regparser import commands + +try: + import requests_cache # @todo - replace with cache control + requests_cache.install_cache('fr_cache') +except ImportError: + # If the cache library isn't present, do nothing -- we'll just make full + # HTTP requests rather than looking it up from the cache + pass + + +@click.group() +def cli(): + pass + + +for _, command_name, _ in pkgutil.iter_modules(commands.__path__): + module = import_module('regparser.commands.{}'.format(command_name)) + command = getattr(module, command_name) + cli.add_command(command) + + +if __name__ == '__main__': + cli() diff --git a/regparser/commands/__init__.py b/regparser/commands/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/regparser/commands/watch_node.py b/regparser/commands/watch_node.py new file mode 100644 index 0000000..ce0ff3f --- /dev/null +++ b/regparser/commands/watch_node.py @@ -0,0 +1,34 @@ +# @todo - this should be combined with build_from.py +import click + +from regparser.builder import tree_and_builder +from regparser.notice.changes import node_to_dict, pretty_change +from regparser.tree.struct import find + + +@click.command() +@click.argument('node_label') +@click.argument('filename', + type=click.Path(exists=True, dir_okay=False, readable=True)) +@click.argument('title', type=int) +def watch_node(node_label, filename, title): + """Follow changes to a particular label. + + \b + NODE_LABEL: Label for the node you wish to watch. e.g. 1026-5-a + FILENAME: XML file containing the regulation + TITLE: Title number""" + + initial_tree, builder = tree_and_builder(filename, title) + initial_node = find(initial_tree, node_label) + if initial_node: + click.echo("> " + builder.doc_number) + click.echo("\t" + pretty_change( + {'action': 'POST', 'node': node_to_dict(initial_node)})) + + # search for label + for version, changes in builder.changes_in_sequence(): + if node_label in changes: + click.echo("> " + version) + for change in changes[node_label]: + click.echo("\t" + pretty_change(change)) diff --git a/watch_node.py b/watch_node.py deleted file mode 100644 index b48e5ac..0000000 --- a/watch_node.py +++ /dev/null @@ -1,40 +0,0 @@ -# @todo - this should be combined with build_from.py -import argparse - - -try: - import requests_cache - requests_cache.install_cache('fr_cache') -except ImportError: - # If the cache library isn't present, do nothing -- we'll just make full - # HTTP requests rather than looking it up from the cache - pass - -from regparser.builder import tree_and_builder -from regparser.notice.changes import node_to_dict, pretty_change -from regparser.tree.struct import find - - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Node Watcher") - parser.add_argument( - 'node_label', - help='Label for the node you wish to watch. e.g. 1026-5-a') - parser.add_argument('filename', - help='XML file containing the regulation') - parser.add_argument('title', type=int, help='Title number') - args = parser.parse_args() - - initial_tree, builder = tree_and_builder(args.filename, args.title) - initial_node = find(initial_tree, args.node_label) - if initial_node: - print("> " + builder.doc_number) - print("\t" + pretty_change( - {'action': 'POST', 'node': node_to_dict(initial_node)})) - - # search for label - for version, changes in builder.changes_in_sequence(): - if args.node_label in changes: - print("> " + version) - for change in changes[args.node_label]: - print("\t" + pretty_change(change)) From 7d52a59a8f40ea2f223d8af82e0d42a85beb259b Mon Sep 17 00:00:00 2001 From: CM Lubinski Date: Fri, 2 Oct 2015 21:26:29 +0000 Subject: [PATCH 2/8] Move several scripts into a 'legacy_scripts' directory --- build_tree.py => legacy_scripts/build_tree.py | 0 generate_layers.py => legacy_scripts/generate_layers.py | 0 generate_notice.py => legacy_scripts/generate_notice.py | 0 generate_tree.py => legacy_scripts/generate_tree.py | 0 plaintext_keyterms.py => legacy_scripts/plaintext_keyterms.py | 0 5 files changed, 0 insertions(+), 0 deletions(-) rename build_tree.py => legacy_scripts/build_tree.py (100%) rename generate_layers.py => legacy_scripts/generate_layers.py (100%) rename generate_notice.py => legacy_scripts/generate_notice.py (100%) rename generate_tree.py => legacy_scripts/generate_tree.py (100%) rename plaintext_keyterms.py => legacy_scripts/plaintext_keyterms.py (100%) diff --git a/build_tree.py b/legacy_scripts/build_tree.py similarity index 100% rename from build_tree.py rename to legacy_scripts/build_tree.py diff --git a/generate_layers.py b/legacy_scripts/generate_layers.py similarity index 100% rename from generate_layers.py rename to legacy_scripts/generate_layers.py diff --git a/generate_notice.py b/legacy_scripts/generate_notice.py similarity index 100% rename from generate_notice.py rename to legacy_scripts/generate_notice.py diff --git a/generate_tree.py b/legacy_scripts/generate_tree.py similarity index 100% rename from generate_tree.py rename to legacy_scripts/generate_tree.py diff --git a/plaintext_keyterms.py b/legacy_scripts/plaintext_keyterms.py similarity index 100% rename from plaintext_keyterms.py rename to legacy_scripts/plaintext_keyterms.py From c1ca4480c32e6b780c93be5594ed16b6badd80f4 Mon Sep 17 00:00:00 2001 From: CM Lubinski Date: Fri, 2 Oct 2015 21:35:16 +0000 Subject: [PATCH 3/8] Migrate notice_order to the command system --- notice_order.py | 30 ------------------------------ regparser/commands/notice_order.py | 19 +++++++++++++++++++ 2 files changed, 19 insertions(+), 30 deletions(-) delete mode 100644 notice_order.py create mode 100644 regparser/commands/notice_order.py diff --git a/notice_order.py b/notice_order.py deleted file mode 100644 index 770bc76..0000000 --- a/notice_order.py +++ /dev/null @@ -1,30 +0,0 @@ -# @todo - this should be combined with build_from.py -import argparse - -from regparser.builder import notices_for_cfr_part - -try: - import requests_cache - requests_cache.install_cache('fr_cache') -except ImportError: - # If the cache library isn't present, do nothing -- we'll just make full - # HTTP requests rather than looking it up from the cache - pass - - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Notice Orderer") - parser.add_argument('cfr_title', help='CFR_TITLE') - parser.add_argument('cfr_part', help='CFR_PART') - parser.add_argument('--include-notices-without-changes', const=True, - default=False, action='store_const', - help=('Include notices which do not change the ' - 'regulation (default: false)')) - args = parser.parse_args() - - notices_by_date = notices_for_cfr_part(args.cfr_title, args.cfr_part) - for date in sorted(notices_by_date.keys()): - print(date) - for notice in notices_by_date[date]: - if 'changes' in notice or args.include_notices_without_changes: - print("\t" + notice['document_number']) diff --git a/regparser/commands/notice_order.py b/regparser/commands/notice_order.py new file mode 100644 index 0000000..7acf18b --- /dev/null +++ b/regparser/commands/notice_order.py @@ -0,0 +1,19 @@ +# @todo - this should be combined with build_from.py +import click + +from regparser.builder import notices_for_cfr_part + + +@click.command() +@click.argument('cfr_title', type=int) +@click.argument('cfr_part', type=int) +@click.option('--include-notices-without-changes', is_flag=True, + help='Include notices which do not change the regulation') +def notice_order(cfr_title, cfr_part, include_notices_without_changes): + """Order notices associated with a reg.""" + notices_by_date = notices_for_cfr_part(str(cfr_title), str(cfr_part)) + for date in sorted(notices_by_date.keys()): + click.echo(date) + for notice in notices_by_date[date]: + if 'changes' in notice or include_notices_without_changes: + click.echo("\t" + notice['document_number']) From 204efc0f5db0b2bb8429af1d2351c71835173e7f Mon Sep 17 00:00:00 2001 From: CM Lubinski Date: Fri, 2 Oct 2015 22:08:38 +0000 Subject: [PATCH 4/8] Convert build_from into a command --- .../commands/build_from.py | 92 ++++++++++--------- 1 file changed, 47 insertions(+), 45 deletions(-) rename build_from.py => regparser/commands/build_from.py (66%) diff --git a/build_from.py b/regparser/commands/build_from.py similarity index 66% rename from build_from.py rename to regparser/commands/build_from.py index 5f04842..d583646 100755 --- a/build_from.py +++ b/regparser/commands/build_from.py @@ -1,11 +1,8 @@ -#!/usr/bin/env python - - -import argparse -import logging -import hashlib import codecs +import hashlib +import logging +import click try: import requests_cache requests_cache.install_cache('fr_cache') @@ -26,16 +23,17 @@ # @profile -def parse_regulation(args): +def parse_regulation(filename, title, act_title, act_section, checkpoint_dir, + doc_number): """ Run the parser on the specified command-line arguments. Broken out into separate function to assist in profiling. """ - act_title_and_section = [args.act_title, args.act_section] + act_title_and_section = [act_title, act_section] # First, the regulation tree - reg_tree, builder = tree_and_builder(args.filename, args.title, - args.checkpoint_dir, args.doc_number) + reg_tree, builder = tree_and_builder(filename, title, checkpoint_dir, + doc_number) builder.write_notices() @@ -47,7 +45,7 @@ def parse_regulation(args): builder.gen_and_write_layers(reg_tree, act_title_and_section, layer_cache) layer_cache.replace_using(reg_tree) - if args.generate_diffs: + if generate_diffs: generate_diffs(reg_tree, act_title_and_section, builder, layer_cache) @@ -133,38 +131,42 @@ def build_by_notice(filename, title, act_title, act_section, if args.generate_diffs: generate_diffs(reg_tree, act_title_and_section, builder, layer_cache) -if __name__ == "__main__": - parser = argparse.ArgumentParser(description='Regulation parser') - parser.add_argument('filename', - help='XML file containing the regulation') - parser.add_argument('title', type=int, help='Title number') - parser.add_argument('act_title', type=int, help='Act title', - action='store') - parser.add_argument('act_section', type=int, help='Act section') - diffs = parser.add_mutually_exclusive_group(required=False) - diffs.add_argument('--generate-diffs', dest='generate_diffs', - action='store_true', help='Generate diffs') - diffs.add_argument('--no-generate-diffs', dest='generate_diffs', - action='store_false', help="Don't generate diffs") - diffs.set_defaults(generate_diffs=True) - parser.add_argument('--checkpoint', dest='checkpoint_dir', required=False, - help='Directory to save checkpoint data') - parser.add_argument( - '--version-identifier', dest='doc_number', required=False, - help=('Do not try to derive the version information. (Only use if ' - 'the regulation has no electronic final rules on ' - 'federalregister.gov, i.e. has not changed since before ~2000)')) - - parser.add_argument('--last-notice', type=str, - help='the last notice to be used') - parser.add_argument('--operation', action='store') - parser.add_argument('--notices-to-apply', nargs='*', action='store') - - args = parser.parse_args() - - if args.operation == 'build_by_notice': - build_by_notice(args.filename, args.title, args.act_title, - args.act_section, args.notices_to_apply, - args.last_notice, args.checkpoint) + +@click.command() +@click.argument('filename', + type=click.Path(exists=True, dir_okay=False, readable=True)) +@click.argument('title', type=int) +@click.option('--act_title', type=int, default=0, + help=('Title of the act of congress providing authority for ' + 'this regulation')) +@click.option('--act_section', type=int, default=0, + help=('Section of the act of congress providing authority for ' + 'this regulation')) +@click.option('--generate-diffs/--no-generate-diffs', default=True) +@click.option('--checkpoint', help='Directory to save checkpoint data', + type=click.Path(file_okay=False, readable=True, writable=True)) +@click.option('--version-identifier', + help=('Do not try to derive the version information. (Only use ' + 'if the regulation has no electronic final rules on ' + 'federalregister.gov, i.e. has not changed since before ' + '~2000)')) +@click.option('--last-notice', help='the last notice to be used') +@click.option('--operation') +@click.option('--notices-to-apply', nargs=-1) +# @profile +def build_from(filename, title, act_title, act_section, generate_diffs, + checkpoint, version_identifier, last_notice, operation, + notices_to_apply): + """Build all data from provided xml. Reads the provided file and builds + all versions of the regulation, its layers, etc. that follow. + + \b + FILENAME: XML file containing the regulation + TITLE: CFR title + """ + if operation == 'build_by_notice': + build_by_notice(filename, title, act_title, act_section, + notices_to_apply, last_notice, checkpoint) else: - parse_regulation(args) + parse_regulation(filename, title, act_title, act_section, checkpoint, + version_identifier) From 82193f6d15e2312d081ecd83aebb43194379c3aa Mon Sep 17 00:00:00 2001 From: CM Lubinski Date: Fri, 2 Oct 2015 22:23:23 +0000 Subject: [PATCH 5/8] Include Click in requirements. Make an eregs script when installed --- .gitignore | 2 ++ requirements.txt | 2 ++ setup.py | 21 +++++++++++++++++++++ 3 files changed, 25 insertions(+) create mode 100644 setup.py diff --git a/.gitignore b/.gitignore index bca5bf6..00cf950 100644 --- a/.gitignore +++ b/.gitignore @@ -85,3 +85,5 @@ regulations-configs # docs output docs/_build *.p + +regparser.egg-info/ diff --git a/requirements.txt b/requirements.txt index d577910..1a8eb3f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,8 @@ +click==5.1 lxml==3.2.0 pyparsing==1.5.7 inflection==0.1.2 requests==1.2.3 GitPython==0.3.2.RC1 python-constraint==1.2 +-e . diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..668c417 --- /dev/null +++ b/setup.py @@ -0,0 +1,21 @@ +from setuptools import setup, find_packages + +setup( + name="regparser", + version="2.0.0", + packages=find_packages(), + classifiers=[ + 'License :: Public Domain', + 'License :: CC0 1.0 Universal (CC0 1.0) Public Domain Dedication' + ], + install_requires=[ + "click", + "GitPython", + "inflection", + "lxml", + "pyparsing", + "python-constraint", + "requests" + ], + entry_points={"console_scripts": ["eregs=eregs:cli"]} +) From 95745f3d5df5958c82c7724410a5f70868c27379 Mon Sep 17 00:00:00 2001 From: CM Lubinski Date: Fri, 2 Oct 2015 22:28:43 +0000 Subject: [PATCH 6/8] Update README to refer to the eregs commands --- README.md | 39 ++++++++++++++++----------------------- 1 file changed, 16 insertions(+), 23 deletions(-) diff --git a/README.md b/README.md index c054efb..1b4f16f 100644 --- a/README.md +++ b/README.md @@ -20,10 +20,8 @@ Here's an example, using CFPB's regulation H. 1. `git clone https://github.com/cfpb/regulations-parser.git` 1. `cd regulations-parser` 1. `pip install -r requirements.txt` -1. `wget - http://www.gpo.gov/fdsys/pkg/CFR-2012-title12-vol8/xml/CFR-2012-title12-vol8-part1004.xml` -1. `python build_from.py CFR-2012-title12-vol8-part1004.xml 12 2011-18676 15 - 1693` +1. `wget http://www.gpo.gov/fdsys/pkg/CFR-2012-title12-vol8/xml/CFR-2012-title12-vol8-part1004.xml` +1. `eregs build_from CFR-2012-title12-vol8-part1004.xml 12` At the end, you will have new directories for `regulation`, `layer`, `diff`, and `notice` which would mirror the JSON files sent to the API. @@ -38,7 +36,7 @@ tweaked to pass the parser. 1. `git clone https://github.com/cfpb/fr-notices.git` 1. `pip install -r requirements.txt` 1. `echo "LOCAL_XML_PATHS = ['fr-notices/']" >> local_settings.py` -1. `python build_from.py fr-notices/articles/xml/201/131/725.xml 12 2011-31725 15 1693` +1. `eregs build_from fr-notices/articles/xml/201/131/725.xml 12 2011-31725 15 1693` If you review the history of the `fr-notices` repo, you'll see some of the types of changes that need to be made. @@ -152,18 +150,15 @@ regulation E). The syntax is ```bash -$ python build_from.py regulation.xml title act_title act_section +$ eregs build_from regulation.xml title ``` For example, to match the reissuance above: ```bash -$ python build_from.py 725.xml 12 15 1693 +$ eregs build_from 725.xml 12 15 1693 ``` -Here ```12``` is the CFR title number (in our case, for "Banks and Banking"), -```15``` is the title of "the Act" and ```1693``` is the relevant section. -Wherever the phrase "the Act" is used in the regulation, the external link -parser will treat it as "15 U.S.C. 1693". +Here ```12``` is the CFR title number (in our case, for "Banks and Banking"). Running the command will generate four folders, ```regulation```, ```notice```, ``layer`` and possibly ``diff`` in the ```OUTPUT_DIR``` @@ -242,30 +237,30 @@ configuration. ### Notice Order When debugging, it can be helpful to know how notices will be grouped and -sequenced when compiling the regulation. The `notice_order.py` utility tells +sequenced when compiling the regulation. The `notice_order` utility tells you exactly that information, once it is given a CFR title and part. ``` -$ python notice_order.py 12 1026 +$ eregs notice_order 12 1026 ``` By default, this only includes notices which explicitly change the text of the regulation. To include all final notices, add this flag: ``` -$ python notice_order.py 12 1005 --include-notices-without-changes +$ eregs notice_order 12 1005 --include-notices-without-changes ``` ### Watch Node Tracing how a specific node changes over the life of a regulation can help -track down why the parser is failing (or exploding). The `watch_node.py` +track down why the parser is failing (or exploding). The `watch_node` utility does exactly that, stepping through the initial tree and all subsequent notices. Whenever a node is changed (created, modified, deleted, etc.) this utility will log some output. ``` -$ python watch_node.py 1005-16-c path/to/regulation.xml 12 +$ eregs watch_node 1005-16-c path/to/regulation.xml 12 ``` The first parameter is the label of the node you want to watch, the second is @@ -470,13 +465,13 @@ requires several hours. There are a few methods to speed up this process. Installing `requests-cache` will cache API-read calls (such as those made when calling the Federal Register). The cache lives in an sqlite database (`fr_cache.sqlite`), which -can be safely removed without error. The `build_from.py` pipeline can also +can be safely removed without error. The `build_from` pipeline can also include checkpoints -- that is, saving the state of the process up until some point in time. To activate this feature, pass in a directory name to the `--checkpoint` flag, e.g. ```bash -$ python build_from.py CFR-2012-title12-vol8-part1004.xml 12 15 1693 --checkpoint my-checkpoint-dir +$ eregs build_from CFR-2012-title12-vol8-part1004.xml 12 --checkpoint my-checkpoint-dir ``` ### Parsing Error Example @@ -577,8 +572,7 @@ Let's set up [regulations-core](https://github.com/cfpb/regulations-core) first. 1. `git clone https://github.com/cfpb/regulations-core.git` 1. `cd regulations-core` - 1. `pip install zc.buildout` - 1. `buildout # pulls in python dependencies` + 1. `pip install -r requirements.txt # pulls in python dependencies` 1. `./bin/django syncdb --migrate` 1. `./bin/django runserver 127.0.0.1:8888 & # Starts the API` @@ -587,14 +581,13 @@ the regulation H example above 1. `cd /path/to/regulations-parser` 1. `echo "API_BASE = 'http://127.0.0.1:8888/'" >> local_settings.py` - 1. `python build_from.py CFR-2012-title12-vol8-part1004.xml 12 2011-18676 15 - 1693` + 1. `eregs build_from CFR-2012-title12-vol8-part1004.xml 12 2011-18676 15 1693` Next up, we set up [regulations-site](https://github.com/cfpb/regulations-site) to provide a webapp. 1. `git clone https://github.com/cfpb/regulations-site.git` 1. `cd regulations-site` - 1. `buildout` + 1. `pip install -r requirements.txt` 1. `echo "API_BASE = 'http://127.0.0.1:8888/'" >> regulations/settings/local_settings.py` 1. `./run_server.sh` From dc259241c9e55d44ef5e02034aa660dc65543ed0 Mon Sep 17 00:00:00 2001 From: CM Lubinski Date: Mon, 21 Dec 2015 17:53:27 -0500 Subject: [PATCH 7/8] Remove build_by_notice --- regparser/commands/build_from.py | 120 ++++++------------------------- 1 file changed, 20 insertions(+), 100 deletions(-) diff --git a/regparser/commands/build_from.py b/regparser/commands/build_from.py index d583646..bd6414a 100755 --- a/regparser/commands/build_from.py +++ b/regparser/commands/build_from.py @@ -1,55 +1,18 @@ -import codecs -import hashlib import logging import click -try: - import requests_cache - requests_cache.install_cache('fr_cache') -except ImportError: - # If the cache library isn't present, do nothing -- we'll just make full - # HTTP requests rather than looking it up from the cache - pass -from regparser.builder import ( - LayerCacheAggregator, tree_and_builder, Checkpointer, NullCheckpointer, - Builder) +from regparser.builder import LayerCacheAggregator, tree_and_builder from regparser.diff.tree import changes_between from regparser.tree.struct import FrozenNode + logger = logging.getLogger('build_from') logger.setLevel(logging.INFO) logger.addHandler(logging.StreamHandler()) -# @profile -def parse_regulation(filename, title, act_title, act_section, checkpoint_dir, - doc_number): - """ Run the parser on the specified command-line arguments. Broken out - into separate function to assist in profiling. - """ - - act_title_and_section = [act_title, act_section] - # First, the regulation tree - - reg_tree, builder = tree_and_builder(filename, title, checkpoint_dir, - doc_number) - - builder.write_notices() - - # Always do at least the first reg - logger.info("Version %s", builder.doc_number) - builder.write_regulation(reg_tree) - layer_cache = LayerCacheAggregator() - - builder.gen_and_write_layers(reg_tree, act_title_and_section, layer_cache) - layer_cache.replace_using(reg_tree) - - if generate_diffs: - generate_diffs(reg_tree, act_title_and_section, builder, layer_cache) - - -def generate_diffs(reg_tree, act_title_and_section, builder, layer_cache): +def gen_diffs(reg_tree, act_title_and_section, builder, layer_cache): """ Generate all the diffs for the given regulation. Broken out into separate function to assist with profiling so it's easier to determine which parts of the parser take the most time """ @@ -84,54 +47,6 @@ def generate_diffs(reg_tree, act_title_and_section, builder, layer_cache): ).write(changes) -def build_by_notice(filename, title, act_title, act_section, - notice_doc_numbers, doc_number=None, checkpoint=None): - - with codecs.open(filename, 'r', 'utf-8') as f: - reg = f.read() - file_digest = hashlib.sha256(reg.encode('utf-8')).hexdigest() - - if checkpoint: - checkpointer = Checkpointer(checkpoint) - else: - checkpointer = NullCheckpointer() - - # build the initial tree - reg_tree = checkpointer.checkpoint( - "init-tree-" + file_digest, - lambda: Builder.reg_tree(reg)) - - title_part = reg_tree.label_id() - - if doc_number is None: - doc_number = Builder.determine_doc_number(reg, title, title_part) - - checkpointer.suffix = ":".join( - ["", title_part, str(args.title), doc_number]) - - # create the builder - builder = Builder(cfr_title=title, - cfr_part=title_part, - doc_number=doc_number, - checkpointer=checkpointer) - - builder.fetch_notices_json() - - for notice in notice_doc_numbers: - builder.build_notice_from_doc_number(notice) - - builder.write_regulation(reg_tree) - layer_cache = LayerCacheAggregator() - - act_title_and_section = [act_title, act_section] - - builder.gen_and_write_layers(reg_tree, act_title_and_section, layer_cache) - layer_cache.replace_using(reg_tree) - - if args.generate_diffs: - generate_diffs(reg_tree, act_title_and_section, builder, layer_cache) - - @click.command() @click.argument('filename', type=click.Path(exists=True, dir_okay=False, readable=True)) @@ -150,13 +65,8 @@ def build_by_notice(filename, title, act_title, act_section, 'if the regulation has no electronic final rules on ' 'federalregister.gov, i.e. has not changed since before ' '~2000)')) -@click.option('--last-notice', help='the last notice to be used') -@click.option('--operation') -@click.option('--notices-to-apply', nargs=-1) -# @profile def build_from(filename, title, act_title, act_section, generate_diffs, - checkpoint, version_identifier, last_notice, operation, - notices_to_apply): + checkpoint, version_identifier): """Build all data from provided xml. Reads the provided file and builds all versions of the regulation, its layers, etc. that follow. @@ -164,9 +74,19 @@ def build_from(filename, title, act_title, act_section, generate_diffs, FILENAME: XML file containing the regulation TITLE: CFR title """ - if operation == 'build_by_notice': - build_by_notice(filename, title, act_title, act_section, - notices_to_apply, last_notice, checkpoint) - else: - parse_regulation(filename, title, act_title, act_section, checkpoint, - version_identifier) + act_title_and_section = [act_title, act_section] + # First, the regulation tree + reg_tree, builder = tree_and_builder(filename, title, checkpoint, + version_identifier) + builder.write_notices() + + # Always do at least the first reg + logger.info("Version %s", builder.doc_number) + builder.write_regulation(reg_tree) + layer_cache = LayerCacheAggregator() + + builder.gen_and_write_layers(reg_tree, act_title_and_section, layer_cache) + layer_cache.replace_using(reg_tree) + + if generate_diffs: + gen_diffs(reg_tree, act_title_and_section, builder, layer_cache) From ad475541a215ba89b1dd3060c7ff30c34433807f Mon Sep 17 00:00:00 2001 From: CM Lubinski Date: Mon, 21 Dec 2015 18:00:56 -0500 Subject: [PATCH 8/8] Update README to match --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 1b4f16f..44edef3 100644 --- a/README.md +++ b/README.md @@ -36,7 +36,7 @@ tweaked to pass the parser. 1. `git clone https://github.com/cfpb/fr-notices.git` 1. `pip install -r requirements.txt` 1. `echo "LOCAL_XML_PATHS = ['fr-notices/']" >> local_settings.py` -1. `eregs build_from fr-notices/articles/xml/201/131/725.xml 12 2011-31725 15 1693` +1. `eregs build_from fr-notices/articles/xml/201/131/725.xml 12` If you review the history of the `fr-notices` repo, you'll see some of the types of changes that need to be made. @@ -155,7 +155,7 @@ $ eregs build_from regulation.xml title For example, to match the reissuance above: ```bash -$ eregs build_from 725.xml 12 15 1693 +$ eregs build_from 725.xml 12 ``` Here ```12``` is the CFR title number (in our case, for "Banks and Banking"). @@ -581,7 +581,7 @@ the regulation H example above 1. `cd /path/to/regulations-parser` 1. `echo "API_BASE = 'http://127.0.0.1:8888/'" >> local_settings.py` - 1. `eregs build_from CFR-2012-title12-vol8-part1004.xml 12 2011-18676 15 1693` + 1. `eregs build_from CFR-2012-title12-vol8-part1004.xml 12` Next up, we set up [regulations-site](https://github.com/cfpb/regulations-site) to provide a webapp.