From 0adb79b937a69a85921bf292471c913552b8ddb4 Mon Sep 17 00:00:00 2001
From: m-maryia <mmaryia@google.com>
Date: Tue, 7 Oct 2025 10:28:51 +0200
Subject: [PATCH 1/4] add JP parsing rules without model, handle the absence of
 model

---
 model/countries/JP/JP-metadata.yaml           |  2 +
 model/countries/JP/JP-parsing-rules.yaml      | 87 +++++++++++++++++++
 model/main.py                                 | 29 ++++---
 model/modules/formatting/formatting_module.py |  4 +-
 model/modules/parsing/parsing_module.py       |  2 +-
 model/renderer.py                             |  5 +-
 6 files changed, 111 insertions(+), 18 deletions(-)
 create mode 100644 model/countries/JP/JP-metadata.yaml
 create mode 100644 model/countries/JP/JP-parsing-rules.yaml

diff --git a/model/countries/JP/JP-metadata.yaml b/model/countries/JP/JP-metadata.yaml
new file mode 100644
index 0000000..f1daa1e
--- /dev/null
+++ b/model/countries/JP/JP-metadata.yaml
@@ -0,0 +1,2 @@
+country: JP
+flag: 🇯🇵
diff --git a/model/countries/JP/JP-parsing-rules.yaml b/model/countries/JP/JP-parsing-rules.yaml
new file mode 100644
index 0000000..3c63e12
--- /dev/null
+++ b/model/countries/JP/JP-parsing-rules.yaml
@@ -0,0 +1,87 @@
+regex_definitions:
+  # Regular expression to match optional zip prefix.
+  kZipOptionalPrefixRe:
+    regex_fragment: (?:〒\s*)
+
+  # Regular expression to match 3-digit zip prefix.
+  kZipPrefixValueRe:
+    regex_fragment: (?:[0-9０-９]{3})
+  
+  # Regular expression pattern to match the separator between
+  # zip code prefix and suffix.
+  kZipCodeSeparatorsRe:
+    regex_fragment: (?:[\s－-]+)
+
+  # Regular expression to match 4-digit zip suffix.
+  kZipSuffixValueRe:
+    regex_fragment: (?:[0-9０-９]{4})
+
+capture_definitions:
+  # Returns an expression to parse `postal-code` into `postal-code-prefix`
+  # and `postal-code-suffix`, separator is optional, skip optional prefix.
+  ParsePostalCodeOptionalSeparatorOptionalPrefixExpression:
+    capture:
+      output: postal-code
+      parts:
+      - no_capture:
+          parts: [ {regex_reference: kZipOptionalPrefixRe} ]
+          quantifier: MATCH_OPTIONAL
+      - capture:
+          output: postal-code-prefix
+          parts: [ {regex_reference: kZipPrefixValueRe} ]
+      - no_capture:
+          parts:
+          - separator: {regex_reference: kZipCodeSeparatorsRe}
+          quantifier: MATCH_OPTIONAL
+      - capture:
+          output: postal-code-suffix
+          parts: [ {regex_reference: kZipSuffixValueRe} ]
+
+parsing_definitions:
+  postal-code:
+    decomposition:
+      capture_reference: ParsePostalCodeOptionalSeparatorOptionalPrefixExpression
+
+test_parsing_definitions:
+- id: "Zip code with separator"
+  type: postal-code
+  input: "163-8001"
+  output:
+    postal-code-prefix: "163"
+    postal-code-suffix: "8001"
+- id: "Zip code without separator"
+  type: postal-code
+  input: "1638001"
+  output:
+    postal-code-prefix: "163"
+    postal-code-suffix: "8001" 
+- id: "Full-width zip code with separator"
+  type: postal-code
+  input: "１６３－８００１"
+  output:
+    postal-code-prefix: "１６３"
+    postal-code-suffix: "８００１"
+- id: "Zip code with optional prefix and separator"
+  type: postal-code
+  input: "〒163-8001"
+  output:
+    postal-code-prefix: "163"
+    postal-code-suffix: "8001"
+- id: "Zip code with optional prefix, space, and separator"
+  type: postal-code
+  input: "〒 163-8001"
+  output:
+    postal-code-prefix: "163"
+    postal-code-suffix: "8001"
+- id: "Full-width zip code with optional prefix"
+  type: postal-code
+  input: "〒１６３－８００１"
+  output:
+    postal-code-prefix: "１６３"
+    postal-code-suffix: "８００１"
+- id: "Zip code with optional prefix and no separator"
+  type: postal-code
+  input: "〒1638001"
+  output:
+    postal-code-prefix: "163"
+    postal-code-suffix: "8001"
diff --git a/model/main.py b/model/main.py
index bbb5c97..7deb819 100644
--- a/model/main.py
+++ b/model/main.py
@@ -127,20 +127,21 @@ def country_of_path(path: Path) -> str:
       after_token_index += new_after_token_index
   content += after_token_index
 
-  all_token_content = ""
-  for token in renderer.get_model(country).pre_order_only_uniques():
-    token_content = ""
-    for module in modules:
-      if new_token_conent := module.render_token_details(
-          country, token.id, renderer):
-        token_content += new_token_conent
-    token_content = renderer.wrap_token_details(token.id,
-                                                renderer.get_model(country),
-                                                token_content)
-    all_token_content += token_content
-  if all_token_content:
-    all_token_content = renderer.wrap_all_token_details(all_token_content)
-    content += all_token_content
+  model = renderer.get_model(country)
+  if model is not None:
+    all_token_content = ""
+    for token in model.pre_order_only_uniques():
+      token_content = ""
+      for module in modules:
+        if new_token_conent := module.render_token_details(
+            country, token.id, renderer):
+          token_content += new_token_conent
+      token_content = renderer.wrap_token_details(token.id, model,
+                                                  token_content)
+      all_token_content += token_content
+    if all_token_content:
+      all_token_content = renderer.wrap_all_token_details(all_token_content)
+      content += all_token_content
 
   epilogue = ""
   for module in modules:
diff --git a/model/modules/formatting/formatting_module.py b/model/modules/formatting/formatting_module.py
index 2caa902..096e908 100644
--- a/model/modules/formatting/formatting_module.py
+++ b/model/modules/formatting/formatting_module.py
@@ -192,6 +192,8 @@ def apply_formatting(self, country: str, token_id: str, data: dict,
       return str(data[token_id])
 
     model = renderer.get_model(country)
+    if model is None:
+      return ""
 
     token = model.find_token(token_id)
     if not token or token.is_atomic_token() or not token.children:
@@ -290,7 +292,7 @@ def collect_details_for_example_addresses(self, country: str,
       })
     return {
         'examples': collected_details,
-        'model': renderer.country_data[country]['model']
+        'model': renderer.country_data[country].get('model')
     }
 
   def render_after_token_index(self, country: str,
diff --git a/model/modules/parsing/parsing_module.py b/model/modules/parsing/parsing_module.py
index afd5305..051874d 100644
--- a/model/modules/parsing/parsing_module.py
+++ b/model/modules/parsing/parsing_module.py
@@ -174,7 +174,7 @@ def observe_file(self, path: Path, renderer: Renderer):
     engine.prune_output_types(all_removed_tokens)
 
     model = renderer.get_model(country)
-    if not engine.validate(model):
+    if model is not None and not engine.validate(model):
       return
 
     if 'test_regex_definitions' in yaml:
diff --git a/model/renderer.py b/model/renderer.py
index cbd8af8..2bb01c2 100644
--- a/model/renderer.py
+++ b/model/renderer.py
@@ -24,6 +24,7 @@ class Renderer:
 
   countries = []
   vendor_extension_extra_pages: List[ExtraPage] = []
+  LEGACY_COUNTRT_CODE = "XX"
 
   def __init__(self,
                output_dir: Optional[str] = None,
@@ -38,8 +39,8 @@ def add_country(self, country: str) -> None:
     # Put "global" first.
     self.countries.sort(key=lambda c: "" if c == "global" else c)
 
-  def get_model(self, country: str) -> Any:
-    return self.country_data[country]["model"]
+  def get_model(self, country: str) -> Optional[Any]:
+    return self.country_data[country].get("model")
 
   def set_model(self, country: str, model: Any):
     self.country_data[country]["model"] = model

From 53b267bad065925140d174976091df2a8e60a14b Mon Sep 17 00:00:00 2001
From: m-maryia <mmaryia@google.com>
Date: Tue, 7 Oct 2025 10:28:51 +0200
Subject: [PATCH 2/4] add JP parsing rules without model, handle the absence of
 model

---
 model/renderer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/model/renderer.py b/model/renderer.py
index 2bb01c2..291b63d 100644
--- a/model/renderer.py
+++ b/model/renderer.py
@@ -24,7 +24,7 @@ class Renderer:
 
   countries = []
   vendor_extension_extra_pages: List[ExtraPage] = []
-  LEGACY_COUNTRT_CODE = "XX"
+  LEGACY_COUNTRY_CODE = "XX"
 
   def __init__(self,
                output_dir: Optional[str] = None,

From 1941df35e16bd1aa4d626729d6bc7d824f64ead7 Mon Sep 17 00:00:00 2001
From: m-maryia <mmaryia@google.com>
Date: Fri, 10 Oct 2025 14:38:26 +0200
Subject: [PATCH 3/4] remove is not None

---
 model/main.py                                 | 2 +-
 model/modules/formatting/formatting_module.py | 2 +-
 model/modules/parsing/parsing_module.py       | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/model/main.py b/model/main.py
index 7deb819..f4071fb 100644
--- a/model/main.py
+++ b/model/main.py
@@ -128,7 +128,7 @@ def country_of_path(path: Path) -> str:
   content += after_token_index
 
   model = renderer.get_model(country)
-  if model is not None:
+  if model:
     all_token_content = ""
     for token in model.pre_order_only_uniques():
       token_content = ""
diff --git a/model/modules/formatting/formatting_module.py b/model/modules/formatting/formatting_module.py
index 096e908..c436972 100644
--- a/model/modules/formatting/formatting_module.py
+++ b/model/modules/formatting/formatting_module.py
@@ -192,7 +192,7 @@ def apply_formatting(self, country: str, token_id: str, data: dict,
       return str(data[token_id])
 
     model = renderer.get_model(country)
-    if model is None:
+    if not model:
       return ""
 
     token = model.find_token(token_id)
diff --git a/model/modules/parsing/parsing_module.py b/model/modules/parsing/parsing_module.py
index 051874d..4483a79 100644
--- a/model/modules/parsing/parsing_module.py
+++ b/model/modules/parsing/parsing_module.py
@@ -174,7 +174,7 @@ def observe_file(self, path: Path, renderer: Renderer):
     engine.prune_output_types(all_removed_tokens)
 
     model = renderer.get_model(country)
-    if model is not None and not engine.validate(model):
+    if model and not engine.validate(model):
       return
 
     if 'test_regex_definitions' in yaml:

From b6f62d9663560e2657e5e67843704bb0e02a041a Mon Sep 17 00:00:00 2001
From: m-maryia <mmaryia@google.com>
Date: Fri, 10 Oct 2025 14:56:37 +0200
Subject: [PATCH 4/4] remove legacy country code constant

---
 model/renderer.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/model/renderer.py b/model/renderer.py
index 291b63d..b1b1a8b 100644
--- a/model/renderer.py
+++ b/model/renderer.py
@@ -24,7 +24,6 @@ class Renderer:
 
   countries = []
   vendor_extension_extra_pages: List[ExtraPage] = []
-  LEGACY_COUNTRY_CODE = "XX"
 
   def __init__(self,
                output_dir: Optional[str] = None,