Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions model/countries/JP/JP-metadata.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
country: JP
flag: 🇯🇵
87 changes: 87 additions & 0 deletions model/countries/JP/JP-parsing-rules.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
regex_definitions:
# Regular expression to match optional zip prefix.
kZipOptionalPrefixRe:
regex_fragment: (?:〒\s*)

# Regular expression to match 3-digit zip prefix.
kZipPrefixValueRe:
regex_fragment: (?:[0-90-9]{3})

# Regular expression pattern to match the separator between
# zip code prefix and suffix.
kZipCodeSeparatorsRe:
regex_fragment: (?:[\s--]+)

# Regular expression to match 4-digit zip suffix.
kZipSuffixValueRe:
regex_fragment: (?:[0-90-9]{4})

capture_definitions:
# Returns an expression to parse `postal-code` into `postal-code-prefix`
# and `postal-code-suffix`, separator is optional, skip optional prefix.
ParsePostalCodeOptionalSeparatorOptionalPrefixExpression:
capture:
output: postal-code
parts:
- no_capture:
parts: [ {regex_reference: kZipOptionalPrefixRe} ]
quantifier: MATCH_OPTIONAL
- capture:
output: postal-code-prefix
parts: [ {regex_reference: kZipPrefixValueRe} ]
- no_capture:
parts:
- separator: {regex_reference: kZipCodeSeparatorsRe}
quantifier: MATCH_OPTIONAL
- capture:
output: postal-code-suffix
parts: [ {regex_reference: kZipSuffixValueRe} ]

parsing_definitions:
postal-code:
decomposition:
capture_reference: ParsePostalCodeOptionalSeparatorOptionalPrefixExpression

test_parsing_definitions:
- id: "Zip code with separator"
type: postal-code
input: "163-8001"
output:
postal-code-prefix: "163"
postal-code-suffix: "8001"
- id: "Zip code without separator"
type: postal-code
input: "1638001"
output:
postal-code-prefix: "163"
postal-code-suffix: "8001"
- id: "Full-width zip code with separator"
type: postal-code
input: "163-8001"
output:
postal-code-prefix: "163"
postal-code-suffix: "8001"
- id: "Zip code with optional prefix and separator"
type: postal-code
input: "〒163-8001"
output:
postal-code-prefix: "163"
postal-code-suffix: "8001"
- id: "Zip code with optional prefix, space, and separator"
type: postal-code
input: "〒 163-8001"
output:
postal-code-prefix: "163"
postal-code-suffix: "8001"
- id: "Full-width zip code with optional prefix"
type: postal-code
input: "〒163-8001"
output:
postal-code-prefix: "163"
postal-code-suffix: "8001"
- id: "Zip code with optional prefix and no separator"
type: postal-code
input: "〒1638001"
output:
postal-code-prefix: "163"
postal-code-suffix: "8001"
29 changes: 15 additions & 14 deletions model/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,20 +127,21 @@ def country_of_path(path: Path) -> str:
after_token_index += new_after_token_index
content += after_token_index

all_token_content = ""
for token in renderer.get_model(country).pre_order_only_uniques():
token_content = ""
for module in modules:
if new_token_conent := module.render_token_details(
country, token.id, renderer):
token_content += new_token_conent
token_content = renderer.wrap_token_details(token.id,
renderer.get_model(country),
token_content)
all_token_content += token_content
if all_token_content:
all_token_content = renderer.wrap_all_token_details(all_token_content)
content += all_token_content
model = renderer.get_model(country)
if model:
all_token_content = ""
for token in model.pre_order_only_uniques():
token_content = ""
for module in modules:
if new_token_conent := module.render_token_details(
country, token.id, renderer):
token_content += new_token_conent
token_content = renderer.wrap_token_details(token.id, model,
token_content)
all_token_content += token_content
if all_token_content:
all_token_content = renderer.wrap_all_token_details(all_token_content)
content += all_token_content

epilogue = ""
for module in modules:
Expand Down
4 changes: 3 additions & 1 deletion model/modules/formatting/formatting_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,8 @@ def apply_formatting(self, country: str, token_id: str, data: dict,
return str(data[token_id])

model = renderer.get_model(country)
if not model:
return ""

token = model.find_token(token_id)
if not token or token.is_atomic_token() or not token.children:
Expand Down Expand Up @@ -290,7 +292,7 @@ def collect_details_for_example_addresses(self, country: str,
})
return {
'examples': collected_details,
'model': renderer.country_data[country]['model']
'model': renderer.country_data[country].get('model')
}

def render_after_token_index(self, country: str,
Expand Down
2 changes: 1 addition & 1 deletion model/modules/parsing/parsing_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ def observe_file(self, path: Path, renderer: Renderer):
engine.prune_output_types(all_removed_tokens)

model = renderer.get_model(country)
if not engine.validate(model):
if model and not engine.validate(model):
return

if 'test_regex_definitions' in yaml:
Expand Down
4 changes: 2 additions & 2 deletions model/renderer.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ def add_country(self, country: str) -> None:
# Put "global" first.
self.countries.sort(key=lambda c: "" if c == "global" else c)

def get_model(self, country: str) -> Any:
return self.country_data[country]["model"]
def get_model(self, country: str) -> Optional[Any]:
return self.country_data[country].get("model")

def set_model(self, country: str, model: Any):
self.country_data[country]["model"] = model
Expand Down