From fa304a251a31c5f0efe20dee7997f521eb0b9e39 Mon Sep 17 00:00:00 2001
From: hmlee245 <hmlee245@gmail.com>
Date: Tue, 13 May 2025 10:54:38 -0700
Subject: [PATCH 01/29] First draft of Korean Cardinal ITN

Sparrowhawk testing is not done yet.

Signed-off-by: hmlee245 <hmlee245@gmail.com>
---
 .../inverse_normalize.py                      |   7 +-
 .../inverse_text_normalization/ko/__init__.py |  17 +
 .../ko/clean_eval_data.py                     | 361 ++++++++++++++++++
 .../ko/data/__init__.py                       |  13 +
 .../ko/data/numbers/__init__.py               |  13 +
 .../ko/data/numbers/digit.tsv                 |   9 +
 .../ko/data/numbers/thousands.tsv             |  11 +
 .../ko/data/numbers/zero.tsv                  |   1 +
 .../ko/graph_utils.py                         | 292 ++++++++++++++
 .../ko/taggers/__init__.py                    |  17 +
 .../ko/taggers/cardinal.py                    | 104 +++++
 .../ko/taggers/tokenize_and_classify.py       |  76 ++++
 .../ko/taggers/word.py                        |  32 ++
 .../inverse_text_normalization/ko/utils.py    |  23 ++
 .../ko/verbalizers/__init__.py                |  17 +
 .../ko/verbalizers/cardinal.py                |  54 +++
 .../ko/verbalizers/verbalize.py               |  36 ++
 .../ko/verbalizers/verbalize_final.py         |  49 +++
 .../ko/verbalizers/word.py                    |  34 ++
 .../run_evaluate.py                           |   2 +-
 tests/nemo_text_processing/ko/__init__.py     |  13 +
 .../test_cases_cardinal.txt                   |  27 ++
 .../nemo_text_processing/ko/test_cardinal.py  |  39 ++
 ..._sparrowhawk_inverse_text_normalization.sh |  34 ++
 .../pynini_export.py                          |   8 +
 25 files changed, 1287 insertions(+), 2 deletions(-)
 create mode 100644 nemo_text_processing/inverse_text_normalization/ko/__init__.py
 create mode 100644 nemo_text_processing/inverse_text_normalization/ko/clean_eval_data.py
 create mode 100644 nemo_text_processing/inverse_text_normalization/ko/data/__init__.py
 create mode 100644 nemo_text_processing/inverse_text_normalization/ko/data/numbers/__init__.py
 create mode 100644 nemo_text_processing/inverse_text_normalization/ko/data/numbers/digit.tsv
 create mode 100644 nemo_text_processing/inverse_text_normalization/ko/data/numbers/thousands.tsv
 create mode 100644 nemo_text_processing/inverse_text_normalization/ko/data/numbers/zero.tsv
 create mode 100644 nemo_text_processing/inverse_text_normalization/ko/graph_utils.py
 create mode 100644 nemo_text_processing/inverse_text_normalization/ko/taggers/__init__.py
 create mode 100644 nemo_text_processing/inverse_text_normalization/ko/taggers/cardinal.py
 create mode 100644 nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py
 create mode 100644 nemo_text_processing/inverse_text_normalization/ko/taggers/word.py
 create mode 100644 nemo_text_processing/inverse_text_normalization/ko/utils.py
 create mode 100644 nemo_text_processing/inverse_text_normalization/ko/verbalizers/__init__.py
 create mode 100644 nemo_text_processing/inverse_text_normalization/ko/verbalizers/cardinal.py
 create mode 100644 nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py
 create mode 100644 nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize_final.py
 create mode 100644 nemo_text_processing/inverse_text_normalization/ko/verbalizers/word.py
 create mode 100644 tests/nemo_text_processing/ko/__init__.py
 create mode 100644 tests/nemo_text_processing/ko/data_inverse_text_normalization/test_cases_cardinal.txt
 create mode 100644 tests/nemo_text_processing/ko/test_cardinal.py
 create mode 100644 tests/nemo_text_processing/ko/test_sparrowhawk_inverse_text_normalization.sh

diff --git a/nemo_text_processing/inverse_text_normalization/inverse_normalize.py b/nemo_text_processing/inverse_text_normalization/inverse_normalize.py
index c10819908..e505a8ad0 100644
--- a/nemo_text_processing/inverse_text_normalization/inverse_normalize.py
+++ b/nemo_text_processing/inverse_text_normalization/inverse_normalize.py
@@ -131,6 +131,11 @@ def __init__(
             from nemo_text_processing.inverse_text_normalization.ja.verbalizers.verbalize_final import (
                 VerbalizeFinalFst,
             )
+        elif lang == 'ko':  # Korean
+            from nemo_text_processing.inverse_text_normalization.ko.taggers.tokenize_and_classify import ClassifyFst
+            from nemo_text_processing.inverse_text_normalization.ko.verbalizers.verbalize_final import (
+                VerbalizeFinalFst,
+            )    
 
         self.tagger = ClassifyFst(
             cache_dir=cache_dir, whitelist=whitelist, overwrite_cache=overwrite_cache, input_case=input_case
@@ -175,7 +180,7 @@ def parse_args():
     parser.add_argument(
         "--language",
         help="language",
-        choices=['en', 'de', 'es', 'pt', 'ru', 'fr', 'sv', 'vi', 'ar', 'es_en', 'zh', 'hi', 'hy', 'mr', 'ja'],
+        choices=['en', 'de', 'es', 'pt', 'ru', 'fr', 'sv', 'vi', 'ar', 'es_en', 'zh', 'hi', 'hy', 'mr', 'ja','ko'],
         default="en",
         type=str,
     )
diff --git a/nemo_text_processing/inverse_text_normalization/ko/__init__.py b/nemo_text_processing/inverse_text_normalization/ko/__init__.py
new file mode 100644
index 000000000..f541211af
--- /dev/null
+++ b/nemo_text_processing/inverse_text_normalization/ko/__init__.py
@@ -0,0 +1,17 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from nemo_text_processing.inverse_text_normalization.ko.taggers.tokenize_and_classify import ClassifyFst
+from nemo_text_processing.inverse_text_normalization.ko.verbalizers.verbalize import VerbalizeFst
+from nemo_text_processing.inverse_text_normalization.ko.verbalizers.verbalize_final import VerbalizeFinalFst
diff --git a/nemo_text_processing/inverse_text_normalization/ko/clean_eval_data.py b/nemo_text_processing/inverse_text_normalization/ko/clean_eval_data.py
new file mode 100644
index 000000000..3c1193333
--- /dev/null
+++ b/nemo_text_processing/inverse_text_normalization/ko/clean_eval_data.py
@@ -0,0 +1,361 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from argparse import ArgumentParser
+from typing import List
+
+import regex as re
+
+from nemo_text_processing.text_normalization.data_loader_utils import (
+    EOS_TYPE,
+    Instance,
+    load_files,
+    training_data_to_sentences,
+)
+
+"""
+This file is for evaluation purposes.
+filter_loaded_data() cleans data (list of instances) for inverse text normalization. Filters and cleaners can be specified for each semiotic class individually.
+For example, normalized text should only include characters and whitespace characters but no punctuation. 
+            Cardinal unnormalized instances should contain at least one integer and all other characters are removed.
+"""
+
+
+class Filter:
+    """
+    Filter class
+
+    Args:
+        class_type: semiotic class used in dataset
+        process_func: function to transform text
+        filter_func:  function to filter text
+
+    """
+
+    def __init__(self, class_type: str, process_func: object, filter_func: object):
+        self.class_type = class_type
+        self.process_func = process_func
+        self.filter_func = filter_func
+
+    def filter(self, instance: Instance) -> bool:
+        """
+        filter function
+
+        Args:
+            filters given instance with filter function
+
+        Returns: True if given instance fulfills criteria or does not belong to class type
+        """
+        if instance.token_type != self.class_type:
+            return True
+        return self.filter_func(instance)
+
+    def process(self, instance: Instance) -> Instance:
+        """
+        process function
+
+        Args:
+            processes given instance with process function
+
+        Returns: processed instance if instance belongs to expected class type or original instance
+        """
+        if instance.token_type != self.class_type:
+            return instance
+        return self.process_func(instance)
+
+
+def filter_cardinal_1(instance: Instance) -> bool:
+    ok = re.search(r"[0-9]", instance.un_normalized)
+    return ok
+
+
+def process_cardinal_1(instance: Instance) -> Instance:
+    un_normalized = instance.un_normalized
+    normalized = instance.normalized
+    un_normalized = re.sub(r"[^0-9]", "", un_normalized)
+    normalized = re.sub(r"[^a-z ]", "", normalized)
+    return Instance(token_type=instance.token_type, un_normalized=un_normalized, normalized=normalized)
+
+
+def filter_ordinal_1(instance: Instance) -> bool:
+    ok = re.search(r"(st|nd|rd|th)\s*$", instance.un_normalized)
+    return ok
+
+
+def process_ordinal_1(instance: Instance) -> Instance:
+    un_normalized = instance.un_normalized
+    normalized = instance.normalized
+    un_normalized = re.sub(r"[,\s]", "", un_normalized)
+    normalized = re.sub(r"[^a-z ]", "", normalized)
+    return Instance(token_type=instance.token_type, un_normalized=un_normalized, normalized=normalized)
+
+
+def filter_decimal_1(instance: Instance) -> bool:
+    ok = re.search(r"[0-9]", instance.un_normalized)
+    return ok
+
+
+def process_decimal_1(instance: Instance) -> Instance:
+    un_normalized = instance.un_normalized
+    un_normalized = re.sub(r",", "", un_normalized)
+    normalized = instance.normalized
+    normalized = re.sub(r"[^a-z ]", "", normalized)
+    return Instance(token_type=instance.token_type, un_normalized=un_normalized, normalized=normalized)
+
+
+def filter_measure_1(instance: Instance) -> bool:
+    ok = True
+    return ok
+
+
+def process_measure_1(instance: Instance) -> Instance:
+    un_normalized = instance.un_normalized
+    normalized = instance.normalized
+    un_normalized = re.sub(r",", "", un_normalized)
+    un_normalized = re.sub(r"m2", "m²", un_normalized)
+    un_normalized = re.sub(r"(\d)([^\d.\s])", r"\1 \2", un_normalized)
+    normalized = re.sub(r"[^a-z\s]", "", normalized)
+    normalized = re.sub(r"per ([a-z\s]*)s$", r"per \1", normalized)
+    normalized = re.sub(r"[^a-z ]", "", normalized)
+    return Instance(token_type=instance.token_type, un_normalized=un_normalized, normalized=normalized)
+
+
+def filter_money_1(instance: Instance) -> bool:
+    ok = re.search(r"[0-9]", instance.un_normalized)
+    return ok
+
+
+def process_money_1(instance: Instance) -> Instance:
+    un_normalized = instance.un_normalized
+    normalized = instance.normalized
+    un_normalized = re.sub(r",", "", un_normalized)
+    un_normalized = re.sub(r"a\$", r"$", un_normalized)
+    un_normalized = re.sub(r"us\$", r"$", un_normalized)
+    un_normalized = re.sub(r"(\d)m\s*$", r"\1 million", un_normalized)
+    un_normalized = re.sub(r"(\d)bn?\s*$", r"\1 billion", un_normalized)
+    normalized = re.sub(r"[^a-z ]", "", normalized)
+    return Instance(token_type=instance.token_type, un_normalized=un_normalized, normalized=normalized)
+
+
+def filter_time_1(instance: Instance) -> bool:
+    ok = re.search(r"[0-9]", instance.un_normalized)
+    return ok
+
+
+def process_time_1(instance: Instance) -> Instance:
+    un_normalized = instance.un_normalized
+    un_normalized = re.sub(r": ", ":", un_normalized)
+    un_normalized = re.sub(r"(\d)\s?a\s?m\s?", r"\1 a.m.", un_normalized)
+    un_normalized = re.sub(r"(\d)\s?p\s?m\s?", r"\1 p.m.", un_normalized)
+    normalized = instance.normalized
+    normalized = re.sub(r"[^a-z ]", "", normalized)
+    return Instance(token_type=instance.token_type, un_normalized=un_normalized, normalized=normalized)
+
+
+def filter_plain_1(instance: Instance) -> bool:
+    ok = True
+    return ok
+
+
+def process_plain_1(instance: Instance) -> Instance:
+    un_normalized = instance.un_normalized
+    normalized = instance.normalized
+    return Instance(token_type=instance.token_type, un_normalized=un_normalized, normalized=normalized)
+
+
+def filter_punct_1(instance: Instance) -> bool:
+    ok = True
+    return ok
+
+
+def process_punct_1(instance: Instance) -> Instance:
+    un_normalized = instance.un_normalized
+    normalized = instance.normalized
+    return Instance(token_type=instance.token_type, un_normalized=un_normalized, normalized=normalized)
+
+
+def filter_date_1(instance: Instance) -> bool:
+    ok = True
+    return ok
+
+
+def process_date_1(instance: Instance) -> Instance:
+    un_normalized = instance.un_normalized
+    un_normalized = re.sub(r",", "", un_normalized)
+    normalized = instance.normalized
+    normalized = re.sub(r"[^a-z ]", "", normalized)
+    return Instance(token_type=instance.token_type, un_normalized=un_normalized, normalized=normalized)
+
+
+def filter_letters_1(instance: Instance) -> bool:
+    ok = True
+    return ok
+
+
+def process_letters_1(instance: Instance) -> Instance:
+    un_normalized = instance.un_normalized
+    normalized = instance.normalized
+    normalized = re.sub(r"[^a-z ]", "", normalized)
+    return Instance(token_type=instance.token_type, un_normalized=un_normalized, normalized=normalized)
+
+
+def filter_verbatim_1(instance: Instance) -> bool:
+    ok = True
+    return ok
+
+
+def process_verbatim_1(instance: Instance) -> Instance:
+    un_normalized = instance.un_normalized
+    normalized = instance.normalized
+    return Instance(token_type=instance.token_type, un_normalized=un_normalized, normalized=normalized)
+
+
+def filter_digit_1(instance: Instance) -> bool:
+    ok = re.search(r"[0-9]", instance.un_normalized)
+    return ok
+
+
+def process_digit_1(instance: Instance) -> Instance:
+    un_normalized = instance.un_normalized
+    normalized = instance.normalized
+    normalized = re.sub(r"[^a-z ]", "", normalized)
+    return Instance(token_type=instance.token_type, un_normalized=un_normalized, normalized=normalized)
+
+
+def filter_telephone_1(instance: Instance) -> bool:
+    ok = re.search(r"[0-9]", instance.un_normalized)
+    return ok
+
+
+def process_telephone_1(instance: Instance) -> Instance:
+    un_normalized = instance.un_normalized
+    normalized = instance.normalized
+    normalized = re.sub(r"[^a-z ]", "", normalized)
+    return Instance(token_type=instance.token_type, un_normalized=un_normalized, normalized=normalized)
+
+
+def filter_electronic_1(instance: Instance) -> bool:
+    ok = re.search(r"[0-9]", instance.un_normalized)
+    return ok
+
+
+def process_electronic_1(instance: Instance) -> Instance:
+    un_normalized = instance.un_normalized
+    normalized = instance.normalized
+    normalized = re.sub(r"[^a-z ]", "", normalized)
+    return Instance(token_type=instance.token_type, un_normalized=un_normalized, normalized=normalized)
+
+
+def filter_fraction_1(instance: Instance) -> bool:
+    ok = re.search(r"[0-9]", instance.un_normalized)
+    return ok
+
+
+def process_fraction_1(instance: Instance) -> Instance:
+    un_normalized = instance.un_normalized
+    normalized = instance.normalized
+    normalized = re.sub(r"[^a-z ]", "", normalized)
+    return Instance(token_type=instance.token_type, un_normalized=un_normalized, normalized=normalized)
+
+
+def filter_address_1(instance: Instance) -> bool:
+    ok = True
+    return ok
+
+
+def process_address_1(instance: Instance) -> Instance:
+    un_normalized = instance.un_normalized
+    normalized = instance.normalized
+    normalized = re.sub(r"[^a-z ]", "", normalized)
+    return Instance(token_type=instance.token_type, un_normalized=un_normalized, normalized=normalized)
+
+
+filters = []
+filters.append(Filter(class_type="CARDINAL",
+               process_func=process_cardinal_1, filter_func=filter_cardinal_1))
+filters.append(Filter(class_type="ORDINAL",
+               process_func=process_ordinal_1, filter_func=filter_ordinal_1))
+filters.append(Filter(class_type="DECIMAL",
+               process_func=process_decimal_1, filter_func=filter_decimal_1))
+filters.append(Filter(class_type="MEASURE",
+               process_func=process_measure_1, filter_func=filter_measure_1))
+filters.append(Filter(class_type="MONEY",
+               process_func=process_money_1, filter_func=filter_money_1))
+filters.append(Filter(class_type="TIME",
+               process_func=process_time_1, filter_func=filter_time_1))
+
+filters.append(Filter(class_type="DATE",
+               process_func=process_date_1, filter_func=filter_date_1))
+filters.append(Filter(class_type="PLAIN",
+               process_func=process_plain_1, filter_func=filter_plain_1))
+filters.append(Filter(class_type="PUNCT",
+               process_func=process_punct_1, filter_func=filter_punct_1))
+filters.append(Filter(class_type="LETTERS",
+               process_func=process_letters_1, filter_func=filter_letters_1))
+filters.append(Filter(class_type="VERBATIM",
+               process_func=process_verbatim_1, filter_func=filter_verbatim_1))
+filters.append(Filter(class_type="DIGIT",
+               process_func=process_digit_1, filter_func=filter_digit_1))
+filters.append(Filter(class_type="TELEPHONE",
+               process_func=process_telephone_1, filter_func=filter_telephone_1))
+filters.append(Filter(class_type="ELECTRONIC",
+               process_func=process_electronic_1, filter_func=filter_electronic_1))
+filters.append(Filter(class_type="FRACTION",
+               process_func=process_fraction_1, filter_func=filter_fraction_1))
+filters.append(Filter(class_type="ADDRESS",
+               process_func=process_address_1, filter_func=filter_address_1))
+filters.append(Filter(class_type=EOS_TYPE,
+               process_func=lambda x: x, filter_func=lambda x: True))
+
+
+def filter_loaded_data(data: List[Instance], verbose: bool = False) -> List[Instance]:
+    """
+    Filters list of instances
+
+    Args:
+        data: list of instances
+
+    Returns: filtered and transformed list of instances
+    """
+    updates_instances = []
+    for instance in data:
+        updated_instance = False
+        for fil in filters:
+            if fil.class_type == instance.token_type and fil.filter(instance):
+                instance = fil.process(instance)
+                updated_instance = True
+        if updated_instance:
+            if verbose:
+                print(instance)
+            updates_instances.append(instance)
+    return updates_instances
+
+
+def parse_args():
+    parser = ArgumentParser()
+    parser.add_argument("--input", help="input file path",
+                        type=str, default='./en_with_types/output-00001-of-00100')
+    parser.add_argument(
+        "--verbose", help="print filtered instances", action='store_true')
+    return parser.parse_args()
+
+
+if __name__ == "__main__":
+    args = parse_args()
+    file_path = args.input
+
+    print("Loading training data: " + file_path)
+    instance_list = load_files([file_path])  # List of instances
+    filtered_instance_list = filter_loaded_data(instance_list, args.verbose)
+    training_data_to_sentences(filtered_instance_list)
diff --git a/nemo_text_processing/inverse_text_normalization/ko/data/__init__.py b/nemo_text_processing/inverse_text_normalization/ko/data/__init__.py
new file mode 100644
index 000000000..341a77c5b
--- /dev/null
+++ b/nemo_text_processing/inverse_text_normalization/ko/data/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/nemo_text_processing/inverse_text_normalization/ko/data/numbers/__init__.py b/nemo_text_processing/inverse_text_normalization/ko/data/numbers/__init__.py
new file mode 100644
index 000000000..341a77c5b
--- /dev/null
+++ b/nemo_text_processing/inverse_text_normalization/ko/data/numbers/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/nemo_text_processing/inverse_text_normalization/ko/data/numbers/digit.tsv b/nemo_text_processing/inverse_text_normalization/ko/data/numbers/digit.tsv
new file mode 100644
index 000000000..9871cb9cf
--- /dev/null
+++ b/nemo_text_processing/inverse_text_normalization/ko/data/numbers/digit.tsv
@@ -0,0 +1,9 @@
+일	1
+이	2
+삼	3
+사	4
+오	5
+육	6
+칠	7
+팔	8
+구	9
\ No newline at end of file
diff --git a/nemo_text_processing/inverse_text_normalization/ko/data/numbers/thousands.tsv b/nemo_text_processing/inverse_text_normalization/ko/data/numbers/thousands.tsv
new file mode 100644
index 000000000..541752211
--- /dev/null
+++ b/nemo_text_processing/inverse_text_normalization/ko/data/numbers/thousands.tsv
@@ -0,0 +1,11 @@
+억
+조
+경
+해
+자
+양
+구
+간
+정
+재
+극
\ No newline at end of file
diff --git a/nemo_text_processing/inverse_text_normalization/ko/data/numbers/zero.tsv b/nemo_text_processing/inverse_text_normalization/ko/data/numbers/zero.tsv
new file mode 100644
index 000000000..43baac7c1
--- /dev/null
+++ b/nemo_text_processing/inverse_text_normalization/ko/data/numbers/zero.tsv
@@ -0,0 +1 @@
+영  0
\ No newline at end of file
diff --git a/nemo_text_processing/inverse_text_normalization/ko/graph_utils.py b/nemo_text_processing/inverse_text_normalization/ko/graph_utils.py
new file mode 100644
index 000000000..7a9fd8720
--- /dev/null
+++ b/nemo_text_processing/inverse_text_normalization/ko/graph_utils.py
@@ -0,0 +1,292 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
+# Copyright 2015 and onwards Google, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+import os
+import string
+from pathlib import Path
+from typing import Dict
+
+import pynini
+from pynini import Far
+from pynini.examples import plurals
+from pynini.export import export
+from pynini.lib import byte, pynutil, utf8
+
+from nemo_text_processing.text_normalization.en.utils import get_abs_path, load_labels
+
+NEMO_CHAR = utf8.VALID_UTF8_CHAR
+
+NEMO_NARROW_NON_BREAK_SPACE = "\u202f"
+NEMO_DIGIT = byte.DIGIT
+NEMO_LOWER = pynini.union(*string.ascii_lowercase).optimize()
+NEMO_UPPER = pynini.union(*string.ascii_uppercase).optimize()
+NEMO_ALPHA = pynini.union(NEMO_LOWER, NEMO_UPPER).optimize()
+NEMO_ALNUM = pynini.union(NEMO_DIGIT, NEMO_ALPHA).optimize()
+NEMO_HEX = pynini.union(*string.hexdigits).optimize()
+NEMO_NON_BREAKING_SPACE = "\u00a0"
+NEMO_SPACE = " "
+NEMO_WHITE_SPACE = pynini.union(" ", "\t", "\n", "\r", u"\u00a0").optimize()
+NEMO_NOT_SPACE = pynini.difference(NEMO_CHAR, NEMO_WHITE_SPACE).optimize()
+NEMO_NOT_QUOTE = pynini.difference(NEMO_CHAR, r'"').optimize()
+
+NEMO_PUNCT = pynini.union(*map(pynini.escape, string.punctuation)).optimize()
+NEMO_GRAPH = pynini.union(NEMO_ALNUM, NEMO_PUNCT).optimize()
+
+NEMO_SIGMA = pynini.closure(NEMO_CHAR)
+
+NEMO_NOT_ALPHA = pynini.difference(NEMO_SIGMA, NEMO_ALPHA).optimize()
+NEMO_LOWER_NOT_A = pynini.union(
+    "b",
+    "c",
+    "d",
+    "e",
+    "f",
+    "g",
+    "h",
+    "i",
+    "j",
+    "k",
+    "l",
+    "m",
+    "n",
+    "o",
+    "p",
+    "q",
+    "r",
+    "s",
+    "t",
+    "u",
+    "v",
+    "w",
+    "x",
+    "y",
+    "z",
+).optimize()
+
+delete_space = pynutil.delete(pynini.closure(NEMO_WHITE_SPACE))
+delete_zero_or_one_space = pynutil.delete(pynini.closure(NEMO_WHITE_SPACE, 0, 1))
+insert_space = pynutil.insert(" ")
+delete_extra_space = pynini.cross(pynini.closure(NEMO_WHITE_SPACE, 1), " ")
+delete_preserve_order = pynini.closure(
+    pynutil.delete(" preserve_order: true")
+    | (pynutil.delete(" field_order: \"") + NEMO_NOT_QUOTE + pynutil.delete("\""))
+)
+
+suppletive = pynini.string_file(get_abs_path("data/suppletive.tsv"))
+# _v = pynini.union("a", "e", "i", "o", "u")
+_c = pynini.union(
+    "b", "c", "d", "f", "g", "h", "j", "k", "l", "m", "n", "p", "q", "r", "s", "t", "v", "w", "x", "y", "z"
+)
+_ies = NEMO_SIGMA + _c + pynini.cross("y", "ies")
+_es = NEMO_SIGMA + pynini.union("s", "sh", "ch", "x", "z") + pynutil.insert("es")
+_s = NEMO_SIGMA + pynutil.insert("s")
+
+graph_plural = plurals._priority_union(
+    suppletive, plurals._priority_union(_ies, plurals._priority_union(_es, _s, NEMO_SIGMA), NEMO_SIGMA), NEMO_SIGMA
+).optimize()
+
+SINGULAR_TO_PLURAL = graph_plural
+PLURAL_TO_SINGULAR = pynini.invert(graph_plural)
+TO_LOWER = pynini.union(*[pynini.cross(x, y) for x, y in zip(string.ascii_uppercase, string.ascii_lowercase)])
+TO_UPPER = pynini.invert(TO_LOWER)
+MIN_NEG_WEIGHT = -0.0001
+MIN_POS_WEIGHT = 0.0001
+INPUT_CASED = "cased"
+INPUT_LOWER_CASED = "lower_cased"
+MINUS = pynini.union("minus", "Minus").optimize()
+
+
+def capitalized_input_graph(
+    graph: 'pynini.FstLike', original_graph_weight: float = None, capitalized_graph_weight: float = None
+) -> 'pynini.FstLike':
+    """
+    Allow graph input to be capitalized, e.g. for ITN)
+
+    Args:
+        graph: FstGraph
+        original_graph_weight: weight to add to the original `graph`
+        capitalized_graph_weight: weight to add to the capitalized graph
+    """
+    capitalized_graph = pynini.compose(TO_LOWER + NEMO_SIGMA, graph).optimize()
+
+    if original_graph_weight is not None:
+        graph = pynutil.add_weight(graph, weight=original_graph_weight)
+
+    if capitalized_graph_weight is not None:
+        capitalized_graph = pynutil.add_weight(capitalized_graph, weight=capitalized_graph_weight)
+
+    graph |= capitalized_graph
+    return graph
+
+
+def generator_main(file_name: str, graphs: Dict[str, 'pynini.FstLike']):
+    """
+    Exports graph as OpenFst finite state archive (FAR) file with given file name and rule name.
+
+    Args:
+        file_name: exported file name
+        graphs: Mapping of a rule name and Pynini WFST graph to be exported
+    """
+    exporter = export.Exporter(file_name)
+    for rule, graph in graphs.items():
+        exporter[rule] = graph.optimize()
+    exporter.close()
+    logging.info(f'Created {file_name}')
+
+
+def get_plurals(fst):
+    """
+    Given singular returns plurals
+
+    Args:
+        fst: Fst
+
+    Returns plurals to given singular forms
+    """
+    return SINGULAR_TO_PLURAL @ fst
+
+
+def get_singulars(fst):
+    """
+    Given plural returns singulars
+
+    Args:
+        fst: Fst
+
+    Returns singulars to given plural forms
+    """
+    return PLURAL_TO_SINGULAR @ fst
+
+
+def convert_space(fst) -> 'pynini.FstLike':
+    """
+    Converts space to nonbreaking space.
+    Used only in tagger grammars for transducing token values within quotes, e.g. name: "hello kitty"
+    This is making transducer significantly slower, so only use when there could be potential spaces within quotes, otherwise leave it.
+
+    Args:
+        fst: input fst
+
+    Returns output fst where breaking spaces are converted to non breaking spaces
+    """
+    return fst @ pynini.cdrewrite(pynini.cross(NEMO_SPACE, NEMO_NON_BREAKING_SPACE), "", "", NEMO_SIGMA)
+
+
+def string_map_cased(input_file: str, input_case: str = INPUT_LOWER_CASED):
+    labels = load_labels(input_file)
+
+    if input_case == INPUT_CASED:
+        additional_labels = []
+        for written, spoken, *weight in labels:
+            written_capitalized = written[0].upper() + written[1:]
+            additional_labels.extend(
+                [
+                    [written_capitalized, spoken.capitalize()],  # first letter capitalized
+                    [
+                        written_capitalized,
+                        spoken.upper().replace(" AND ", " and "),
+                    ],  # # add pairs with the all letters capitalized
+                ]
+            )
+
+            spoken_no_space = spoken.replace(" ", "")
+            # add abbreviations without spaces (both lower and upper case), i.e. "BMW" not "B M W"
+            if len(spoken) == (2 * len(spoken_no_space) - 1):
+                logging.debug(f"This is weight {weight}")
+                if len(weight) == 0:
+                    additional_labels.extend(
+                        [[written, spoken_no_space], [written_capitalized, spoken_no_space.upper()]]
+                    )
+                else:
+                    additional_labels.extend(
+                        [
+                            [written, spoken_no_space, weight[0]],
+                            [written_capitalized, spoken_no_space.upper(), weight[0]],
+                        ]
+                    )
+        labels += additional_labels
+
+    whitelist = pynini.string_map(labels).invert().optimize()
+    return whitelist
+
+
+class GraphFst:
+    """
+    Base class for all grammar fsts.
+
+    Args:
+        name: name of grammar class
+        kind: either 'classify' or 'verbalize'
+        deterministic: if True will provide a single transduction option,
+            for False multiple transduction are generated (used for audio-based normalization)
+    """
+
+    def __init__(self, name: str, kind: str, deterministic: bool = True):
+        self.name = name
+        self.kind = kind
+        self._fst = None
+        self.deterministic = deterministic
+
+        self.far_path = Path(os.path.dirname(__file__) + '/grammars/' + kind + '/' + name + '.far')
+        if self.far_exist():
+            self._fst = Far(self.far_path, mode="r", arc_type="standard", far_type="default").get_fst()
+
+    def far_exist(self) -> bool:
+        """
+        Returns true if FAR can be loaded
+        """
+        return self.far_path.exists()
+
+    @property
+    def fst(self) -> 'pynini.FstLike':
+        return self._fst
+
+    @fst.setter
+    def fst(self, fst):
+        self._fst = fst
+
+    def add_tokens(self, fst) -> 'pynini.FstLike':
+        """
+        Wraps class name around to given fst
+
+        Args:
+            fst: input fst
+
+        Returns:
+            Fst: fst
+        """
+        return pynutil.insert(f"{self.name} {{ ") + fst + pynutil.insert(" }")
+
+    def delete_tokens(self, fst) -> 'pynini.FstLike':
+        """
+        Deletes class name wrap around output of given fst
+
+        Args:
+            fst: input fst
+
+        Returns:
+            Fst: fst
+        """
+        res = (
+            pynutil.delete(f"{self.name}")
+            + delete_space
+            + pynutil.delete("{")
+            + delete_space
+            + fst
+            + delete_space
+            + pynutil.delete("}")
+        )
+        return res @ pynini.cdrewrite(pynini.cross(u"\u00a0", " "), "", "", NEMO_SIGMA)
diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/__init__.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/__init__.py
new file mode 100644
index 000000000..f541211af
--- /dev/null
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/__init__.py
@@ -0,0 +1,17 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from nemo_text_processing.inverse_text_normalization.ko.taggers.tokenize_and_classify import ClassifyFst
+from nemo_text_processing.inverse_text_normalization.ko.verbalizers.verbalize import VerbalizeFst
+from nemo_text_processing.inverse_text_normalization.ko.verbalizers.verbalize_final import VerbalizeFinalFst
diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/cardinal.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/cardinal.py
new file mode 100644
index 000000000..df5804fc0
--- /dev/null
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/cardinal.py
@@ -0,0 +1,104 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+# Copyright 2015 and onwards Google, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pynini
+from pynini.lib import pynutil
+
+from nemo_text_processing.inverse_text_normalization.ko.graph_utils import NEMO_DIGIT, GraphFst, delete_space
+from nemo_text_processing.inverse_text_normalization.ko.utils import get_abs_path
+
+class CardinalFst(GraphFst):
+    """
+    Finite state transducer for classifying cardinals
+        e.g. 마이너스 이십삼 -> cardinal { integer: "23" negative: "-" } }
+
+    Args:
+        input_case: accepting Korean input.
+    """
+
+    def __init__(self):
+        super().__init__(name="cardinal", kind="classify")
+
+        graph_zero = pynini.string_file(get_abs_path("data/numbers/zero.tsv"))
+        graph_digit = pynini.string_file(get_abs_path("data/numbers/digit.tsv"))
+        graph_zero = pynini.cross("영", "0")
+
+        graph_negative = pynini.cross("마이너스", "-")
+        graph_negative += delete_space
+        
+        ten = pynutil.delete("십")
+        ten_alt = pynini.cross("십", "1")
+        ### Responsible for second digit of two digit number. ex) 20's 2
+        graph_ten_component = pynini.union((graph_digit + ten) | ten_alt, pynutil.insert("0"))
+        ### Responsible for the first digit of number. ex) 1,2,3,4,5,,,
+        graph_ten_component += graph_digit | pynutil.insert("0")
+        
+        hundred = pynutil.delete("백")
+        hundred_alt = pynini.cross("백", "1")
+        graph_hundred_component = pynini.union(((graph_digit + hundred) | hundred_alt), pynutil.insert("0"))
+        graph_hundred_component += graph_ten_component
+
+        thousand = pynutil.delete("천")
+        thousand_alt = pynini.cross("천", "1")
+        graph_thousand_component = pynini.union(((graph_digit + thousand) | thousand_alt), pynutil.insert("0"))
+        graph_thousand_component += graph_hundred_component
+
+        tenthousand = pynutil.delete("만")
+        tenthousand_alt = pynini.cross("만", "1")
+        ### "만" can express next four digits of numbers until the next unit "억", so insert "0000" to allocate four digit worth of space
+        ### From "만", keep adding four digits and graph_thousand_component(0000-9999), because Korean units increase every four digits
+        graph_tenthousand_component = pynini.union(((graph_thousand_component + tenthousand) | tenthousand_alt), pynutil.insert("0000"))
+        graph_tenthousand_component += graph_thousand_component
+
+        hundredmillion = pynutil.delete("억")
+        hundredmillion_alt = pynini.cross("억", "1")
+        graph_hundredmillion_component = pynini.union(((graph_thousand_component + hundredmillion) | hundredmillion_alt), pynutil.insert("0000"))
+        graph_hundredmillion_component +=  graph_tenthousand_component
+        
+        trillion = pynutil.delete("조")
+        trillion_alt = pynini.cross("조", "1")
+        graph_trillion_component = pynini.union(((graph_thousand_component + trillion) | trillion_alt), pynutil.insert("0000"))
+        graph_trillion_component += graph_hundredmillion_component
+
+        tenquadrillion = pynutil.delete("경")
+        tenquadrillion_alt = pynini.cross("경", "1")
+        graph_tenquadrillion_component = pynini.union(((graph_thousand_component + tenquadrillion) | tenquadrillion_alt), pynutil.insert("0000"))
+        graph_tenquadrillion_component += graph_trillion_component
+
+        
+        graph = pynini.union(
+            ### From biggest unit to smallest, everything is included
+            graph_tenquadrillion_component|
+            graph_zero
+        )
+
+        leading_zero = (
+            pynutil.delete(pynini.closure("0")) + pynini.difference(NEMO_DIGIT, "0") + pynini.closure(NEMO_DIGIT)
+        )
+        graph_nonzero = graph @ leading_zero
+        graph = pynini.union(graph_nonzero, graph_zero)
+        
+        graph = graph @ leading_zero | graph_zero
+
+        self.just_cardinals = graph
+
+        optional_sign = pynini.closure((pynini.cross("마이너스", 'negative: "-"') | pynini.cross("-", 'negative: "-"')) + delete_space,0, 1)
+
+        final_graph = (
+            optional_sign + pynutil.insert(" ") + pynutil.insert("integer: \"") + graph + pynutil.insert("\"")
+        ) | (pynutil.insert("integer: \"") + graph + pynutil.insert("\""))
+
+        final_graph = self.add_tokens(final_graph)
+        self.fst = final_graph.optimize()
\ No newline at end of file
diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py
new file mode 100644
index 000000000..760ce6829
--- /dev/null
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py
@@ -0,0 +1,76 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+# Copyright 2015 and onwards Google, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+import os
+
+import pynini
+from pynini.lib import pynutil
+
+from nemo_text_processing.inverse_text_normalization.ko.taggers.cardinal import CardinalFst
+from nemo_text_processing.inverse_text_normalization.ko.taggers.word import WordFst
+from nemo_text_processing.inverse_text_normalization.ko.graph_utils import ( 
+    INPUT_LOWER_CASED,
+    GraphFst,
+    delete_extra_space,
+    delete_space,
+    generator_main,
+)
+
+
+class ClassifyFst(GraphFst):
+    """
+    Final class that composes all other classification grammars. This class can process an entire sentence, that is lower cased.
+    For deployment, this grammar will be compiled and exported to OpenFst Finite State Archive (FAR) File.
+    More details to deployment at NeMo/tools/text_processing_deployment.
+
+    Args:
+        input_case: accepting either "lower_cased" or "cased" input.
+        cache_dir: path to a dir with .far grammar file. Set to None to avoid using cache.
+        overwrite_cache: set to True to overwrite .far files
+        whitelist: path to a file with whitelist replacements
+    """
+
+    def __init__(
+        self,
+        input_case: str = INPUT_LOWER_CASED,
+        cache_dir: str = None,
+        overwrite_cache: bool = False,
+        whitelist: str = None,
+    ):
+        super().__init__(name="tokenize_and_classify", kind="classify")
+
+        far_file = None
+        if cache_dir is not None and cache_dir != "None":
+            os.makedirs(cache_dir, exist_ok=True)
+            far_file = os.path.join(cache_dir, f"jp_itn_{input_case}.far")
+        if not overwrite_cache and far_file and os.path.exists(far_file):
+            self.fst = pynini.Far(far_file, mode="r")["tokenize_and_classify"]
+            logging.info(f"ClassifyFst.fst was restored from {far_file}.")
+        else:
+            logging.info(f"Creating ClassifyFst grammars.")
+            cardinal = CardinalFst()
+            cardinal_graph = cardinal.fst
+            word_graph = WordFst().fst
+            classify = (pynutil.add_weight(cardinal_graph, 1.1)| pynutil.add_weight(word_graph, 100))
+           
+            token = pynutil.insert("tokens { ") + classify + pynutil.insert(" } ")
+            tagger = pynini.closure(token, 1)
+
+            self.fst = tagger
+
+            if far_file:
+                generator_main(far_file, {"tokenize_and_classify": self.fst})
+                logging.info(f"ClassifyFst grammars are saved to {far_file}.")
\ No newline at end of file
diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/word.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/word.py
new file mode 100644
index 000000000..0d6ccd5c5
--- /dev/null
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/word.py
@@ -0,0 +1,32 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+# Copyright 2015 and onwards Google, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pynini
+from pynini.lib import pynutil
+
+from nemo_text_processing.inverse_text_normalization.ko.graph_utils import NEMO_NOT_SPACE, GraphFst
+
+
+class WordFst(GraphFst):
+    """
+    Finite state transducer for classifying plain tokens, that do not belong to any special class. This can be considered as the default class.
+        e.g. sleep -> tokens { name: "sleep" }
+    """
+
+    def __init__(self):
+        super().__init__(name="word", kind="classify")
+        word = pynutil.insert(
+            "name: \"") + pynini.closure(NEMO_NOT_SPACE, 1) + pynutil.insert("\"")
+        self.fst = word.optimize()
diff --git a/nemo_text_processing/inverse_text_normalization/ko/utils.py b/nemo_text_processing/inverse_text_normalization/ko/utils.py
new file mode 100644
index 000000000..0222cc0b8
--- /dev/null
+++ b/nemo_text_processing/inverse_text_normalization/ko/utils.py
@@ -0,0 +1,23 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+
+
+def get_abs_path(rel_path):
+
+    return os.path.dirname(os.path.abspath(__file__)) + '/' + rel_path
+
+
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/__init__.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/__init__.py
new file mode 100644
index 000000000..da950f35e
--- /dev/null
+++ b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/__init__.py
@@ -0,0 +1,17 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from nemo_text_processing.inverse_text_normalization.ko.taggers.tokenize_and_classify import ClassifyFst
+from nemo_text_processing.inverse_text_normalization.ko.verbalizers.verbalize import VerbalizeFst
+from nemo_text_processing.inverse_text_normalization.ko.verbalizers.verbalize_final import VerbalizeFinalFst
\ No newline at end of file
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/cardinal.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/cardinal.py
new file mode 100644
index 000000000..1800a6dc8
--- /dev/null
+++ b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/cardinal.py
@@ -0,0 +1,54 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pynini
+from pynini.lib import pynutil
+
+from nemo_text_processing.inverse_text_normalization.ko.graph_utils import (
+    NEMO_NOT_QUOTE,
+    GraphFst,
+    delete_space,
+)
+
+
+class CardinalFst(GraphFst):
+    """
+    Finite state transducer for verbalizing cardinal
+        e.g. cardinal { negative: "-" integer: "23" } -> -23
+    """
+
+    def __init__(self):
+        super().__init__(name="cardinal", kind="verbalize")
+        negative_sign = (
+            pynutil.delete("negative:")
+            + delete_space
+            + pynutil.delete("\"")
+            + pynini.accep("-") 
+            + pynutil.delete("\"")
+        )
+
+        optional_sign_output = pynini.closure(negative_sign + delete_space, 0, 1)
+
+        digits_from_tag = pynini.closure(NEMO_NOT_QUOTE, 1) 
+        integer_cardinal = (
+            pynutil.delete("integer:")
+            + delete_space
+            + pynutil.delete("\"")
+            + digits_from_tag
+            + pynutil.delete("\"")
+        )
+
+        graph = integer_cardinal
+        final_graph = optional_sign_output + graph
+        self.fst = self.delete_tokens(final_graph).optimize()
\ No newline at end of file
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py
new file mode 100644
index 000000000..9d750d757
--- /dev/null
+++ b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py
@@ -0,0 +1,36 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+# Copyright 2015 and onwards Google, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from nemo_text_processing.inverse_text_normalization.ko.verbalizers.cardinal import CardinalFst
+from nemo_text_processing.inverse_text_normalization.ko.verbalizers.word import WordFst
+from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst
+
+
+class VerbalizeFst(GraphFst):
+    """
+    Composes other verbalizer grammars.
+    For deployment, this grammar will be compiled and exported to OpenFst Finite State Archive (FAR) File.
+    More details to deployment at NeMo/tools/text_processing_deployment.
+    """
+
+    def __init__(self):
+        super().__init__(name="verbalize", kind="verbalize")
+        cardinal = CardinalFst()
+        cardinal_graph = cardinal.fst
+        word_graph = WordFst().fst
+        
+        graph = (cardinal_graph|word_graph)
+        self.fst = graph
+        
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize_final.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize_final.py
new file mode 100644
index 000000000..8554fc161
--- /dev/null
+++ b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize_final.py
@@ -0,0 +1,49 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+# Copyright 2015 and onwards Google, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+import pynini
+from pynini.lib import pynutil
+
+from nemo_text_processing.inverse_text_normalization.ko.verbalizers.verbalize import VerbalizeFst
+from nemo_text_processing.inverse_text_normalization.ko.verbalizers.word import WordFst
+from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst, generator_main, delete_space
+
+
+class VerbalizeFinalFst(GraphFst):
+    """
+    Finite state transducer that verbalizes an entire sentence, e.g.
+    tokens { name: "its" } tokens { time { hours: "12" minutes: "30" } } tokens { name: "now" } -> its 12:30 now
+    """
+    def __init__(self, deterministic: bool = True, cache_dir: str = None, overwrite_cache: bool = False):
+        super().__init__(name="verbalize_final", kind="verbalize", deterministic=deterministic)
+        far_file = None
+        if cache_dir is not None and cache_dir != "None":
+            os.makedirs(cache_dir, exist_ok=True)
+            far_file = os.path.join(cache_dir, f"ko_tn_{deterministic}_deterministic_verbalizer.far")
+        if not overwrite_cache and far_file and os.path.exists(far_file):
+            self.fst = pynini.Far(far_file, mode="r")["verbalize"]
+        else:
+            # token_graph = VerbalizeFst(deterministic=deterministic)
+            token_graph = VerbalizeFst().fst
+            token_verbalizer = (
+                pynutil.delete("tokens {") + delete_space + token_graph + delete_space + pynutil.delete(" }")
+            )
+            verbalizer = pynini.closure(delete_space + token_verbalizer + delete_space)
+
+            self.fst = (verbalizer).optimize()
+            if far_file:
+                generator_main(far_file, {"verbalize": self.fst})
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/word.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/word.py
new file mode 100644
index 000000000..d79957ca8
--- /dev/null
+++ b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/word.py
@@ -0,0 +1,34 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+# Copyright 2015 and onwards Google, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import pynini
+from pynini.lib import pynutil
+
+from nemo_text_processing.inverse_text_normalization.ko.graph_utils import NEMO_NOT_QUOTE, GraphFst, delete_space
+
+
+
+class WordFst(GraphFst):
+    '''
+    tokens { name: "一" } -> 一
+    '''
+
+    def __init__(self, deterministic: bool = True, lm: bool = False):
+        super().__init__(name="word", kind="verbalize", deterministic=deterministic)
+
+        graph = pynutil.delete("name: \"") + NEMO_NOT_QUOTE + pynutil.delete("\"")
+
+        self.fst = graph.optimize()
diff --git a/nemo_text_processing/inverse_text_normalization/run_evaluate.py b/nemo_text_processing/inverse_text_normalization/run_evaluate.py
index 0852329d6..7bfdd3399 100644
--- a/nemo_text_processing/inverse_text_normalization/run_evaluate.py
+++ b/nemo_text_processing/inverse_text_normalization/run_evaluate.py
@@ -35,7 +35,7 @@ def parse_args():
     parser.add_argument(
         "--lang",
         help="language",
-        choices=["ar", "de", "en", "es", "es_en", "fr", "hi", "hy", "mr", "pt", "ru", "sv", "vi", "zh", 'ja'],
+        choices=["ar", "de", "en", "es", "es_en", "fr", "hi", "hy", "mr", "pt", "ru", "sv", "vi", "zh", "ja","ko"],
         default="en",
         type=str,
     )
diff --git a/tests/nemo_text_processing/ko/__init__.py b/tests/nemo_text_processing/ko/__init__.py
new file mode 100644
index 000000000..341a77c5b
--- /dev/null
+++ b/tests/nemo_text_processing/ko/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/tests/nemo_text_processing/ko/data_inverse_text_normalization/test_cases_cardinal.txt b/tests/nemo_text_processing/ko/data_inverse_text_normalization/test_cases_cardinal.txt
new file mode 100644
index 000000000..007273e5e
--- /dev/null
+++ b/tests/nemo_text_processing/ko/data_inverse_text_normalization/test_cases_cardinal.txt
@@ -0,0 +1,27 @@
+영~0
+구~9
+십~10
+십칠~17
+오십삼~53
+백~100
+백오~105
+삼백이십~320
+구백팔십칠~987
+천~1000
+천육~1006
+천오백~1500
+오천사백삼십이~5432
+만~10000
+만천이백~11200
+삼만오천칠백~35700
+십이만~120000
+백오십만삼천~1503000
+천만~10000000
+오천이백칠십만육천백~52706100
+억~100000000
+삼억오천만~350000000
+십이억천만~1210000000
+백오십억칠천만~15070000000
+오천억~500000000000
+일조~1000000000000
+이조오천억~2500000000000
\ No newline at end of file
diff --git a/tests/nemo_text_processing/ko/test_cardinal.py b/tests/nemo_text_processing/ko/test_cardinal.py
new file mode 100644
index 000000000..9fd366ea6
--- /dev/null
+++ b/tests/nemo_text_processing/ko/test_cardinal.py
@@ -0,0 +1,39 @@
+# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pytest
+from parameterized import parameterized
+
+from nemo_text_processing.inverse_text_normalization.inverse_normalize import InverseNormalizer
+from nemo_text_processing.text_normalization.normalize import Normalizer
+from nemo_text_processing.text_normalization.normalize_with_audio import NormalizerWithAudio
+
+from ..utils import CACHE_DIR, RUN_AUDIO_BASED_TESTS, parse_test_case_file
+
+
+class TestCardinal:
+    inverse_normalizer_ko = InverseNormalizer(lang='ko', cache_dir=CACHE_DIR, overwrite_cache=False)
+
+    @parameterized.expand(parse_test_case_file('ko/data_inverse_text_normalization/test_cases_cardinal.txt'))
+    @pytest.mark.run_only_on('CPU')
+    @pytest.mark.unit
+    def test_denorm(self, test_input, expected):
+        pred = self.inverse_normalizer_ko.inverse_normalize(test_input, verbose=False)
+        assert pred == expected
+
+    normalizer_with_audio_ko = (
+        NormalizerWithAudio(lang='ko', cache_dir=CACHE_DIR, overwrite_cache=False)
+        if RUN_AUDIO_BASED_TESTS
+        else None
+    )
\ No newline at end of file
diff --git a/tests/nemo_text_processing/ko/test_sparrowhawk_inverse_text_normalization.sh b/tests/nemo_text_processing/ko/test_sparrowhawk_inverse_text_normalization.sh
new file mode 100644
index 000000000..c44f4a703
--- /dev/null
+++ b/tests/nemo_text_processing/ko/test_sparrowhawk_inverse_text_normalization.sh
@@ -0,0 +1,34 @@
+#! /bin/sh
+
+GRAMMARS_DIR=${1:-"/workspace/sparrowhawk/documentation/grammars"}
+TEST_DIR=${2:-"/workspace/tests/ko"}
+
+runtest () {
+  input=$1
+  echo "INPUT is $input"
+  cd ${GRAMMARS_DIR}
+
+  # read test file
+  while read testcase; do
+    IFS='~' read spoken written <<< $testcase
+    denorm_pred=$(echo $spoken | normalizer_main --config=sparrowhawk_configuration.ascii_proto 2>&1 | tail -n 1)
+
+    # trim white space
+    written="$(echo -e "${written}" | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')"
+    denorm_pred="$(echo -e "${denorm_pred}" | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')"
+
+    # input expected actual
+    assertEquals "$spoken" "$written" "$denorm_pred"
+  done < "$input"
+}
+
+testITNCardinal() {
+  input=$TEST_DIR/data_inverse_text_normalization/test_cases_cardinal.txt
+  runtest $input
+}
+
+# Remove all command-line arguments
+shift $#
+
+# Load shUnit2
+. /workspace/shunit2/shunit2
\ No newline at end of file
diff --git a/tools/text_processing_deployment/pynini_export.py b/tools/text_processing_deployment/pynini_export.py
index 6b82dfbec..0df099774 100644
--- a/tools/text_processing_deployment/pynini_export.py
+++ b/tools/text_processing_deployment/pynini_export.py
@@ -106,6 +106,7 @@ def parse_args():
             'mr',
             'ja',
             'rw',
+            'ko'
         ],
         type=str,
         default='en',
@@ -307,6 +308,13 @@ def parse_args():
             PostProcessingFst as TNPostProcessingFst,
         )
         from nemo_text_processing.text_normalization.ja.verbalizers.verbalize import VerbalizeFst as TNVerbalizeFst
+    elif args.language == 'ko':
+        from nemo_text_processing.inverse_text_normalization.ko.taggers.tokenize_and_classify import (
+            ClassifyFst as ITNClassifyFst,
+        )
+        from nemo_text_processing.inverse_text_normalization.ko.verbalizers.verbalize import (
+            VerbalizeFst as ITNVerbalizeFst,
+        )
     elif args.language == 'rw':
         from nemo_text_processing.text_normalization.rw.taggers.tokenize_and_classify import (
             ClassifyFst as TNClassifyFst,

From 77da79d12b1378502cc2b382cd6933b02e7c2545 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 16 May 2025 18:46:22 +0000
Subject: [PATCH 02/29] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../inverse_normalize.py                      |  4 +-
 .../ko/clean_eval_data.py                     | 59 +++++++------------
 .../ko/taggers/cardinal.py                    | 38 +++++++-----
 .../ko/taggers/tokenize_and_classify.py       | 12 ++--
 .../ko/taggers/word.py                        |  3 +-
 .../inverse_text_normalization/ko/utils.py    |  3 -
 .../ko/verbalizers/__init__.py                |  2 +-
 .../ko/verbalizers/cardinal.py                | 18 ++----
 .../ko/verbalizers/verbalize.py               |  7 +--
 .../ko/verbalizers/verbalize_final.py         |  3 +-
 .../ko/verbalizers/word.py                    |  1 -
 .../run_evaluate.py                           |  2 +-
 .../nemo_text_processing/ko/test_cardinal.py  |  6 +-
 .../pynini_export.py                          |  2 +-
 14 files changed, 68 insertions(+), 92 deletions(-)

diff --git a/nemo_text_processing/inverse_text_normalization/inverse_normalize.py b/nemo_text_processing/inverse_text_normalization/inverse_normalize.py
index e505a8ad0..acda8b7f9 100644
--- a/nemo_text_processing/inverse_text_normalization/inverse_normalize.py
+++ b/nemo_text_processing/inverse_text_normalization/inverse_normalize.py
@@ -135,7 +135,7 @@ def __init__(
             from nemo_text_processing.inverse_text_normalization.ko.taggers.tokenize_and_classify import ClassifyFst
             from nemo_text_processing.inverse_text_normalization.ko.verbalizers.verbalize_final import (
                 VerbalizeFinalFst,
-            )    
+            )
 
         self.tagger = ClassifyFst(
             cache_dir=cache_dir, whitelist=whitelist, overwrite_cache=overwrite_cache, input_case=input_case
@@ -180,7 +180,7 @@ def parse_args():
     parser.add_argument(
         "--language",
         help="language",
-        choices=['en', 'de', 'es', 'pt', 'ru', 'fr', 'sv', 'vi', 'ar', 'es_en', 'zh', 'hi', 'hy', 'mr', 'ja','ko'],
+        choices=['en', 'de', 'es', 'pt', 'ru', 'fr', 'sv', 'vi', 'ar', 'es_en', 'zh', 'hi', 'hy', 'mr', 'ja', 'ko'],
         default="en",
         type=str,
     )
diff --git a/nemo_text_processing/inverse_text_normalization/ko/clean_eval_data.py b/nemo_text_processing/inverse_text_normalization/ko/clean_eval_data.py
index 3c1193333..bc429e858 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/clean_eval_data.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/clean_eval_data.py
@@ -282,41 +282,24 @@ def process_address_1(instance: Instance) -> Instance:
 
 
 filters = []
-filters.append(Filter(class_type="CARDINAL",
-               process_func=process_cardinal_1, filter_func=filter_cardinal_1))
-filters.append(Filter(class_type="ORDINAL",
-               process_func=process_ordinal_1, filter_func=filter_ordinal_1))
-filters.append(Filter(class_type="DECIMAL",
-               process_func=process_decimal_1, filter_func=filter_decimal_1))
-filters.append(Filter(class_type="MEASURE",
-               process_func=process_measure_1, filter_func=filter_measure_1))
-filters.append(Filter(class_type="MONEY",
-               process_func=process_money_1, filter_func=filter_money_1))
-filters.append(Filter(class_type="TIME",
-               process_func=process_time_1, filter_func=filter_time_1))
-
-filters.append(Filter(class_type="DATE",
-               process_func=process_date_1, filter_func=filter_date_1))
-filters.append(Filter(class_type="PLAIN",
-               process_func=process_plain_1, filter_func=filter_plain_1))
-filters.append(Filter(class_type="PUNCT",
-               process_func=process_punct_1, filter_func=filter_punct_1))
-filters.append(Filter(class_type="LETTERS",
-               process_func=process_letters_1, filter_func=filter_letters_1))
-filters.append(Filter(class_type="VERBATIM",
-               process_func=process_verbatim_1, filter_func=filter_verbatim_1))
-filters.append(Filter(class_type="DIGIT",
-               process_func=process_digit_1, filter_func=filter_digit_1))
-filters.append(Filter(class_type="TELEPHONE",
-               process_func=process_telephone_1, filter_func=filter_telephone_1))
-filters.append(Filter(class_type="ELECTRONIC",
-               process_func=process_electronic_1, filter_func=filter_electronic_1))
-filters.append(Filter(class_type="FRACTION",
-               process_func=process_fraction_1, filter_func=filter_fraction_1))
-filters.append(Filter(class_type="ADDRESS",
-               process_func=process_address_1, filter_func=filter_address_1))
-filters.append(Filter(class_type=EOS_TYPE,
-               process_func=lambda x: x, filter_func=lambda x: True))
+filters.append(Filter(class_type="CARDINAL", process_func=process_cardinal_1, filter_func=filter_cardinal_1))
+filters.append(Filter(class_type="ORDINAL", process_func=process_ordinal_1, filter_func=filter_ordinal_1))
+filters.append(Filter(class_type="DECIMAL", process_func=process_decimal_1, filter_func=filter_decimal_1))
+filters.append(Filter(class_type="MEASURE", process_func=process_measure_1, filter_func=filter_measure_1))
+filters.append(Filter(class_type="MONEY", process_func=process_money_1, filter_func=filter_money_1))
+filters.append(Filter(class_type="TIME", process_func=process_time_1, filter_func=filter_time_1))
+
+filters.append(Filter(class_type="DATE", process_func=process_date_1, filter_func=filter_date_1))
+filters.append(Filter(class_type="PLAIN", process_func=process_plain_1, filter_func=filter_plain_1))
+filters.append(Filter(class_type="PUNCT", process_func=process_punct_1, filter_func=filter_punct_1))
+filters.append(Filter(class_type="LETTERS", process_func=process_letters_1, filter_func=filter_letters_1))
+filters.append(Filter(class_type="VERBATIM", process_func=process_verbatim_1, filter_func=filter_verbatim_1))
+filters.append(Filter(class_type="DIGIT", process_func=process_digit_1, filter_func=filter_digit_1))
+filters.append(Filter(class_type="TELEPHONE", process_func=process_telephone_1, filter_func=filter_telephone_1))
+filters.append(Filter(class_type="ELECTRONIC", process_func=process_electronic_1, filter_func=filter_electronic_1))
+filters.append(Filter(class_type="FRACTION", process_func=process_fraction_1, filter_func=filter_fraction_1))
+filters.append(Filter(class_type="ADDRESS", process_func=process_address_1, filter_func=filter_address_1))
+filters.append(Filter(class_type=EOS_TYPE, process_func=lambda x: x, filter_func=lambda x: True))
 
 
 def filter_loaded_data(data: List[Instance], verbose: bool = False) -> List[Instance]:
@@ -344,10 +327,8 @@ def filter_loaded_data(data: List[Instance], verbose: bool = False) -> List[Inst
 
 def parse_args():
     parser = ArgumentParser()
-    parser.add_argument("--input", help="input file path",
-                        type=str, default='./en_with_types/output-00001-of-00100')
-    parser.add_argument(
-        "--verbose", help="print filtered instances", action='store_true')
+    parser.add_argument("--input", help="input file path", type=str, default='./en_with_types/output-00001-of-00100')
+    parser.add_argument("--verbose", help="print filtered instances", action='store_true')
     return parser.parse_args()
 
 
diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/cardinal.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/cardinal.py
index df5804fc0..09cc03909 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/taggers/cardinal.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/cardinal.py
@@ -19,6 +19,7 @@
 from nemo_text_processing.inverse_text_normalization.ko.graph_utils import NEMO_DIGIT, GraphFst, delete_space
 from nemo_text_processing.inverse_text_normalization.ko.utils import get_abs_path
 
+
 class CardinalFst(GraphFst):
     """
     Finite state transducer for classifying cardinals
@@ -37,14 +38,14 @@ def __init__(self):
 
         graph_negative = pynini.cross("마이너스", "-")
         graph_negative += delete_space
-        
+
         ten = pynutil.delete("십")
         ten_alt = pynini.cross("십", "1")
         ### Responsible for second digit of two digit number. ex) 20's 2
         graph_ten_component = pynini.union((graph_digit + ten) | ten_alt, pynutil.insert("0"))
         ### Responsible for the first digit of number. ex) 1,2,3,4,5,,,
         graph_ten_component += graph_digit | pynutil.insert("0")
-        
+
         hundred = pynutil.delete("백")
         hundred_alt = pynini.cross("백", "1")
         graph_hundred_component = pynini.union(((graph_digit + hundred) | hundred_alt), pynutil.insert("0"))
@@ -59,29 +60,36 @@ def __init__(self):
         tenthousand_alt = pynini.cross("만", "1")
         ### "만" can express next four digits of numbers until the next unit "억", so insert "0000" to allocate four digit worth of space
         ### From "만", keep adding four digits and graph_thousand_component(0000-9999), because Korean units increase every four digits
-        graph_tenthousand_component = pynini.union(((graph_thousand_component + tenthousand) | tenthousand_alt), pynutil.insert("0000"))
+        graph_tenthousand_component = pynini.union(
+            ((graph_thousand_component + tenthousand) | tenthousand_alt), pynutil.insert("0000")
+        )
         graph_tenthousand_component += graph_thousand_component
 
         hundredmillion = pynutil.delete("억")
         hundredmillion_alt = pynini.cross("억", "1")
-        graph_hundredmillion_component = pynini.union(((graph_thousand_component + hundredmillion) | hundredmillion_alt), pynutil.insert("0000"))
-        graph_hundredmillion_component +=  graph_tenthousand_component
-        
+        graph_hundredmillion_component = pynini.union(
+            ((graph_thousand_component + hundredmillion) | hundredmillion_alt), pynutil.insert("0000")
+        )
+        graph_hundredmillion_component += graph_tenthousand_component
+
         trillion = pynutil.delete("조")
         trillion_alt = pynini.cross("조", "1")
-        graph_trillion_component = pynini.union(((graph_thousand_component + trillion) | trillion_alt), pynutil.insert("0000"))
+        graph_trillion_component = pynini.union(
+            ((graph_thousand_component + trillion) | trillion_alt), pynutil.insert("0000")
+        )
         graph_trillion_component += graph_hundredmillion_component
 
         tenquadrillion = pynutil.delete("경")
         tenquadrillion_alt = pynini.cross("경", "1")
-        graph_tenquadrillion_component = pynini.union(((graph_thousand_component + tenquadrillion) | tenquadrillion_alt), pynutil.insert("0000"))
+        graph_tenquadrillion_component = pynini.union(
+            ((graph_thousand_component + tenquadrillion) | tenquadrillion_alt), pynutil.insert("0000")
+        )
         graph_tenquadrillion_component += graph_trillion_component
 
-        
         graph = pynini.union(
             ### From biggest unit to smallest, everything is included
-            graph_tenquadrillion_component|
-            graph_zero
+            graph_tenquadrillion_component
+            | graph_zero
         )
 
         leading_zero = (
@@ -89,16 +97,18 @@ def __init__(self):
         )
         graph_nonzero = graph @ leading_zero
         graph = pynini.union(graph_nonzero, graph_zero)
-        
+
         graph = graph @ leading_zero | graph_zero
 
         self.just_cardinals = graph
 
-        optional_sign = pynini.closure((pynini.cross("마이너스", 'negative: "-"') | pynini.cross("-", 'negative: "-"')) + delete_space,0, 1)
+        optional_sign = pynini.closure(
+            (pynini.cross("마이너스", 'negative: "-"') | pynini.cross("-", 'negative: "-"')) + delete_space, 0, 1
+        )
 
         final_graph = (
             optional_sign + pynutil.insert(" ") + pynutil.insert("integer: \"") + graph + pynutil.insert("\"")
         ) | (pynutil.insert("integer: \"") + graph + pynutil.insert("\""))
 
         final_graph = self.add_tokens(final_graph)
-        self.fst = final_graph.optimize()
\ No newline at end of file
+        self.fst = final_graph.optimize()
diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py
index 760ce6829..2842a4167 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py
@@ -19,15 +19,15 @@
 import pynini
 from pynini.lib import pynutil
 
-from nemo_text_processing.inverse_text_normalization.ko.taggers.cardinal import CardinalFst
-from nemo_text_processing.inverse_text_normalization.ko.taggers.word import WordFst
-from nemo_text_processing.inverse_text_normalization.ko.graph_utils import ( 
+from nemo_text_processing.inverse_text_normalization.ko.graph_utils import (
     INPUT_LOWER_CASED,
     GraphFst,
     delete_extra_space,
     delete_space,
     generator_main,
 )
+from nemo_text_processing.inverse_text_normalization.ko.taggers.cardinal import CardinalFst
+from nemo_text_processing.inverse_text_normalization.ko.taggers.word import WordFst
 
 
 class ClassifyFst(GraphFst):
@@ -64,8 +64,8 @@ def __init__(
             cardinal = CardinalFst()
             cardinal_graph = cardinal.fst
             word_graph = WordFst().fst
-            classify = (pynutil.add_weight(cardinal_graph, 1.1)| pynutil.add_weight(word_graph, 100))
-           
+            classify = pynutil.add_weight(cardinal_graph, 1.1) | pynutil.add_weight(word_graph, 100)
+
             token = pynutil.insert("tokens { ") + classify + pynutil.insert(" } ")
             tagger = pynini.closure(token, 1)
 
@@ -73,4 +73,4 @@ def __init__(
 
             if far_file:
                 generator_main(far_file, {"tokenize_and_classify": self.fst})
-                logging.info(f"ClassifyFst grammars are saved to {far_file}.")
\ No newline at end of file
+                logging.info(f"ClassifyFst grammars are saved to {far_file}.")
diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/word.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/word.py
index 0d6ccd5c5..0e4dbb93c 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/taggers/word.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/word.py
@@ -27,6 +27,5 @@ class WordFst(GraphFst):
 
     def __init__(self):
         super().__init__(name="word", kind="classify")
-        word = pynutil.insert(
-            "name: \"") + pynini.closure(NEMO_NOT_SPACE, 1) + pynutil.insert("\"")
+        word = pynutil.insert("name: \"") + pynini.closure(NEMO_NOT_SPACE, 1) + pynutil.insert("\"")
         self.fst = word.optimize()
diff --git a/nemo_text_processing/inverse_text_normalization/ko/utils.py b/nemo_text_processing/inverse_text_normalization/ko/utils.py
index 0222cc0b8..d198c3835 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/utils.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/utils.py
@@ -15,9 +15,6 @@
 import os
 
 
-
 def get_abs_path(rel_path):
 
     return os.path.dirname(os.path.abspath(__file__)) + '/' + rel_path
-
-
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/__init__.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/__init__.py
index da950f35e..f541211af 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/__init__.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/__init__.py
@@ -14,4 +14,4 @@
 
 from nemo_text_processing.inverse_text_normalization.ko.taggers.tokenize_and_classify import ClassifyFst
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.verbalize import VerbalizeFst
-from nemo_text_processing.inverse_text_normalization.ko.verbalizers.verbalize_final import VerbalizeFinalFst
\ No newline at end of file
+from nemo_text_processing.inverse_text_normalization.ko.verbalizers.verbalize_final import VerbalizeFinalFst
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/cardinal.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/cardinal.py
index 1800a6dc8..fb9a76d8e 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/cardinal.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/cardinal.py
@@ -15,11 +15,7 @@
 import pynini
 from pynini.lib import pynutil
 
-from nemo_text_processing.inverse_text_normalization.ko.graph_utils import (
-    NEMO_NOT_QUOTE,
-    GraphFst,
-    delete_space,
-)
+from nemo_text_processing.inverse_text_normalization.ko.graph_utils import NEMO_NOT_QUOTE, GraphFst, delete_space
 
 
 class CardinalFst(GraphFst):
@@ -34,21 +30,17 @@ def __init__(self):
             pynutil.delete("negative:")
             + delete_space
             + pynutil.delete("\"")
-            + pynini.accep("-") 
+            + pynini.accep("-")
             + pynutil.delete("\"")
         )
 
         optional_sign_output = pynini.closure(negative_sign + delete_space, 0, 1)
 
-        digits_from_tag = pynini.closure(NEMO_NOT_QUOTE, 1) 
+        digits_from_tag = pynini.closure(NEMO_NOT_QUOTE, 1)
         integer_cardinal = (
-            pynutil.delete("integer:")
-            + delete_space
-            + pynutil.delete("\"")
-            + digits_from_tag
-            + pynutil.delete("\"")
+            pynutil.delete("integer:") + delete_space + pynutil.delete("\"") + digits_from_tag + pynutil.delete("\"")
         )
 
         graph = integer_cardinal
         final_graph = optional_sign_output + graph
-        self.fst = self.delete_tokens(final_graph).optimize()
\ No newline at end of file
+        self.fst = self.delete_tokens(final_graph).optimize()
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py
index 9d750d757..d8851e206 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py
@@ -13,9 +13,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.cardinal import CardinalFst
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.word import WordFst
-from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst
 
 
 class VerbalizeFst(GraphFst):
@@ -30,7 +30,6 @@ def __init__(self):
         cardinal = CardinalFst()
         cardinal_graph = cardinal.fst
         word_graph = WordFst().fst
-        
-        graph = (cardinal_graph|word_graph)
+
+        graph = cardinal_graph | word_graph
         self.fst = graph
-        
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize_final.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize_final.py
index 8554fc161..09b4cbc8b 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize_final.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize_final.py
@@ -18,9 +18,9 @@
 import pynini
 from pynini.lib import pynutil
 
+from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst, delete_space, generator_main
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.verbalize import VerbalizeFst
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.word import WordFst
-from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst, generator_main, delete_space
 
 
 class VerbalizeFinalFst(GraphFst):
@@ -28,6 +28,7 @@ class VerbalizeFinalFst(GraphFst):
     Finite state transducer that verbalizes an entire sentence, e.g.
     tokens { name: "its" } tokens { time { hours: "12" minutes: "30" } } tokens { name: "now" } -> its 12:30 now
     """
+
     def __init__(self, deterministic: bool = True, cache_dir: str = None, overwrite_cache: bool = False):
         super().__init__(name="verbalize_final", kind="verbalize", deterministic=deterministic)
         far_file = None
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/word.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/word.py
index d79957ca8..c134fe63a 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/word.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/word.py
@@ -20,7 +20,6 @@
 from nemo_text_processing.inverse_text_normalization.ko.graph_utils import NEMO_NOT_QUOTE, GraphFst, delete_space
 
 
-
 class WordFst(GraphFst):
     '''
     tokens { name: "一" } -> 一
diff --git a/nemo_text_processing/inverse_text_normalization/run_evaluate.py b/nemo_text_processing/inverse_text_normalization/run_evaluate.py
index 7bfdd3399..133474940 100644
--- a/nemo_text_processing/inverse_text_normalization/run_evaluate.py
+++ b/nemo_text_processing/inverse_text_normalization/run_evaluate.py
@@ -35,7 +35,7 @@ def parse_args():
     parser.add_argument(
         "--lang",
         help="language",
-        choices=["ar", "de", "en", "es", "es_en", "fr", "hi", "hy", "mr", "pt", "ru", "sv", "vi", "zh", "ja","ko"],
+        choices=["ar", "de", "en", "es", "es_en", "fr", "hi", "hy", "mr", "pt", "ru", "sv", "vi", "zh", "ja", "ko"],
         default="en",
         type=str,
     )
diff --git a/tests/nemo_text_processing/ko/test_cardinal.py b/tests/nemo_text_processing/ko/test_cardinal.py
index 9fd366ea6..526747668 100644
--- a/tests/nemo_text_processing/ko/test_cardinal.py
+++ b/tests/nemo_text_processing/ko/test_cardinal.py
@@ -33,7 +33,5 @@ def test_denorm(self, test_input, expected):
         assert pred == expected
 
     normalizer_with_audio_ko = (
-        NormalizerWithAudio(lang='ko', cache_dir=CACHE_DIR, overwrite_cache=False)
-        if RUN_AUDIO_BASED_TESTS
-        else None
-    )
\ No newline at end of file
+        NormalizerWithAudio(lang='ko', cache_dir=CACHE_DIR, overwrite_cache=False) if RUN_AUDIO_BASED_TESTS else None
+    )
diff --git a/tools/text_processing_deployment/pynini_export.py b/tools/text_processing_deployment/pynini_export.py
index 0df099774..d1ba34a37 100644
--- a/tools/text_processing_deployment/pynini_export.py
+++ b/tools/text_processing_deployment/pynini_export.py
@@ -106,7 +106,7 @@ def parse_args():
             'mr',
             'ja',
             'rw',
-            'ko'
+            'ko',
         ],
         type=str,
         default='en',

From 9f7e876841b518a5b4d3d5e68df760cb7126729c Mon Sep 17 00:00:00 2001
From: hmlee245 <hmlee245@gmail.com>
Date: Fri, 16 May 2025 13:10:40 -0700
Subject: [PATCH 03/29] fixing all the feedbacks

Signed-off-by: hmlee245 <hmlee245@gmail.com>
---
 .../ko/clean_eval_data.py                     | 361 ------------------
 .../ko/data/numbers/zero.tsv                  |   1 -
 .../ko/graph_utils.py                         |   2 +-
 .../ko/taggers/__init__.py                    |   3 -
 .../ko/taggers/cardinal.py                    |   6 +-
 .../ko/taggers/tokenize_and_classify.py       |   2 -
 .../ko/verbalizers/__init__.py                |   4 -
 .../ko/verbalizers/verbalize_final.py         |   1 -
 .../ko/verbalizers/word.py                    |   4 +-
 .../nemo_text_processing/ko/test_cardinal.py  |  12 +-
 10 files changed, 5 insertions(+), 391 deletions(-)
 delete mode 100644 nemo_text_processing/inverse_text_normalization/ko/clean_eval_data.py
 delete mode 100644 nemo_text_processing/inverse_text_normalization/ko/data/numbers/zero.tsv

diff --git a/nemo_text_processing/inverse_text_normalization/ko/clean_eval_data.py b/nemo_text_processing/inverse_text_normalization/ko/clean_eval_data.py
deleted file mode 100644
index 3c1193333..000000000
--- a/nemo_text_processing/inverse_text_normalization/ko/clean_eval_data.py
+++ /dev/null
@@ -1,361 +0,0 @@
-# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from argparse import ArgumentParser
-from typing import List
-
-import regex as re
-
-from nemo_text_processing.text_normalization.data_loader_utils import (
-    EOS_TYPE,
-    Instance,
-    load_files,
-    training_data_to_sentences,
-)
-
-"""
-This file is for evaluation purposes.
-filter_loaded_data() cleans data (list of instances) for inverse text normalization. Filters and cleaners can be specified for each semiotic class individually.
-For example, normalized text should only include characters and whitespace characters but no punctuation. 
-            Cardinal unnormalized instances should contain at least one integer and all other characters are removed.
-"""
-
-
-class Filter:
-    """
-    Filter class
-
-    Args:
-        class_type: semiotic class used in dataset
-        process_func: function to transform text
-        filter_func:  function to filter text
-
-    """
-
-    def __init__(self, class_type: str, process_func: object, filter_func: object):
-        self.class_type = class_type
-        self.process_func = process_func
-        self.filter_func = filter_func
-
-    def filter(self, instance: Instance) -> bool:
-        """
-        filter function
-
-        Args:
-            filters given instance with filter function
-
-        Returns: True if given instance fulfills criteria or does not belong to class type
-        """
-        if instance.token_type != self.class_type:
-            return True
-        return self.filter_func(instance)
-
-    def process(self, instance: Instance) -> Instance:
-        """
-        process function
-
-        Args:
-            processes given instance with process function
-
-        Returns: processed instance if instance belongs to expected class type or original instance
-        """
-        if instance.token_type != self.class_type:
-            return instance
-        return self.process_func(instance)
-
-
-def filter_cardinal_1(instance: Instance) -> bool:
-    ok = re.search(r"[0-9]", instance.un_normalized)
-    return ok
-
-
-def process_cardinal_1(instance: Instance) -> Instance:
-    un_normalized = instance.un_normalized
-    normalized = instance.normalized
-    un_normalized = re.sub(r"[^0-9]", "", un_normalized)
-    normalized = re.sub(r"[^a-z ]", "", normalized)
-    return Instance(token_type=instance.token_type, un_normalized=un_normalized, normalized=normalized)
-
-
-def filter_ordinal_1(instance: Instance) -> bool:
-    ok = re.search(r"(st|nd|rd|th)\s*$", instance.un_normalized)
-    return ok
-
-
-def process_ordinal_1(instance: Instance) -> Instance:
-    un_normalized = instance.un_normalized
-    normalized = instance.normalized
-    un_normalized = re.sub(r"[,\s]", "", un_normalized)
-    normalized = re.sub(r"[^a-z ]", "", normalized)
-    return Instance(token_type=instance.token_type, un_normalized=un_normalized, normalized=normalized)
-
-
-def filter_decimal_1(instance: Instance) -> bool:
-    ok = re.search(r"[0-9]", instance.un_normalized)
-    return ok
-
-
-def process_decimal_1(instance: Instance) -> Instance:
-    un_normalized = instance.un_normalized
-    un_normalized = re.sub(r",", "", un_normalized)
-    normalized = instance.normalized
-    normalized = re.sub(r"[^a-z ]", "", normalized)
-    return Instance(token_type=instance.token_type, un_normalized=un_normalized, normalized=normalized)
-
-
-def filter_measure_1(instance: Instance) -> bool:
-    ok = True
-    return ok
-
-
-def process_measure_1(instance: Instance) -> Instance:
-    un_normalized = instance.un_normalized
-    normalized = instance.normalized
-    un_normalized = re.sub(r",", "", un_normalized)
-    un_normalized = re.sub(r"m2", "m²", un_normalized)
-    un_normalized = re.sub(r"(\d)([^\d.\s])", r"\1 \2", un_normalized)
-    normalized = re.sub(r"[^a-z\s]", "", normalized)
-    normalized = re.sub(r"per ([a-z\s]*)s$", r"per \1", normalized)
-    normalized = re.sub(r"[^a-z ]", "", normalized)
-    return Instance(token_type=instance.token_type, un_normalized=un_normalized, normalized=normalized)
-
-
-def filter_money_1(instance: Instance) -> bool:
-    ok = re.search(r"[0-9]", instance.un_normalized)
-    return ok
-
-
-def process_money_1(instance: Instance) -> Instance:
-    un_normalized = instance.un_normalized
-    normalized = instance.normalized
-    un_normalized = re.sub(r",", "", un_normalized)
-    un_normalized = re.sub(r"a\$", r"$", un_normalized)
-    un_normalized = re.sub(r"us\$", r"$", un_normalized)
-    un_normalized = re.sub(r"(\d)m\s*$", r"\1 million", un_normalized)
-    un_normalized = re.sub(r"(\d)bn?\s*$", r"\1 billion", un_normalized)
-    normalized = re.sub(r"[^a-z ]", "", normalized)
-    return Instance(token_type=instance.token_type, un_normalized=un_normalized, normalized=normalized)
-
-
-def filter_time_1(instance: Instance) -> bool:
-    ok = re.search(r"[0-9]", instance.un_normalized)
-    return ok
-
-
-def process_time_1(instance: Instance) -> Instance:
-    un_normalized = instance.un_normalized
-    un_normalized = re.sub(r": ", ":", un_normalized)
-    un_normalized = re.sub(r"(\d)\s?a\s?m\s?", r"\1 a.m.", un_normalized)
-    un_normalized = re.sub(r"(\d)\s?p\s?m\s?", r"\1 p.m.", un_normalized)
-    normalized = instance.normalized
-    normalized = re.sub(r"[^a-z ]", "", normalized)
-    return Instance(token_type=instance.token_type, un_normalized=un_normalized, normalized=normalized)
-
-
-def filter_plain_1(instance: Instance) -> bool:
-    ok = True
-    return ok
-
-
-def process_plain_1(instance: Instance) -> Instance:
-    un_normalized = instance.un_normalized
-    normalized = instance.normalized
-    return Instance(token_type=instance.token_type, un_normalized=un_normalized, normalized=normalized)
-
-
-def filter_punct_1(instance: Instance) -> bool:
-    ok = True
-    return ok
-
-
-def process_punct_1(instance: Instance) -> Instance:
-    un_normalized = instance.un_normalized
-    normalized = instance.normalized
-    return Instance(token_type=instance.token_type, un_normalized=un_normalized, normalized=normalized)
-
-
-def filter_date_1(instance: Instance) -> bool:
-    ok = True
-    return ok
-
-
-def process_date_1(instance: Instance) -> Instance:
-    un_normalized = instance.un_normalized
-    un_normalized = re.sub(r",", "", un_normalized)
-    normalized = instance.normalized
-    normalized = re.sub(r"[^a-z ]", "", normalized)
-    return Instance(token_type=instance.token_type, un_normalized=un_normalized, normalized=normalized)
-
-
-def filter_letters_1(instance: Instance) -> bool:
-    ok = True
-    return ok
-
-
-def process_letters_1(instance: Instance) -> Instance:
-    un_normalized = instance.un_normalized
-    normalized = instance.normalized
-    normalized = re.sub(r"[^a-z ]", "", normalized)
-    return Instance(token_type=instance.token_type, un_normalized=un_normalized, normalized=normalized)
-
-
-def filter_verbatim_1(instance: Instance) -> bool:
-    ok = True
-    return ok
-
-
-def process_verbatim_1(instance: Instance) -> Instance:
-    un_normalized = instance.un_normalized
-    normalized = instance.normalized
-    return Instance(token_type=instance.token_type, un_normalized=un_normalized, normalized=normalized)
-
-
-def filter_digit_1(instance: Instance) -> bool:
-    ok = re.search(r"[0-9]", instance.un_normalized)
-    return ok
-
-
-def process_digit_1(instance: Instance) -> Instance:
-    un_normalized = instance.un_normalized
-    normalized = instance.normalized
-    normalized = re.sub(r"[^a-z ]", "", normalized)
-    return Instance(token_type=instance.token_type, un_normalized=un_normalized, normalized=normalized)
-
-
-def filter_telephone_1(instance: Instance) -> bool:
-    ok = re.search(r"[0-9]", instance.un_normalized)
-    return ok
-
-
-def process_telephone_1(instance: Instance) -> Instance:
-    un_normalized = instance.un_normalized
-    normalized = instance.normalized
-    normalized = re.sub(r"[^a-z ]", "", normalized)
-    return Instance(token_type=instance.token_type, un_normalized=un_normalized, normalized=normalized)
-
-
-def filter_electronic_1(instance: Instance) -> bool:
-    ok = re.search(r"[0-9]", instance.un_normalized)
-    return ok
-
-
-def process_electronic_1(instance: Instance) -> Instance:
-    un_normalized = instance.un_normalized
-    normalized = instance.normalized
-    normalized = re.sub(r"[^a-z ]", "", normalized)
-    return Instance(token_type=instance.token_type, un_normalized=un_normalized, normalized=normalized)
-
-
-def filter_fraction_1(instance: Instance) -> bool:
-    ok = re.search(r"[0-9]", instance.un_normalized)
-    return ok
-
-
-def process_fraction_1(instance: Instance) -> Instance:
-    un_normalized = instance.un_normalized
-    normalized = instance.normalized
-    normalized = re.sub(r"[^a-z ]", "", normalized)
-    return Instance(token_type=instance.token_type, un_normalized=un_normalized, normalized=normalized)
-
-
-def filter_address_1(instance: Instance) -> bool:
-    ok = True
-    return ok
-
-
-def process_address_1(instance: Instance) -> Instance:
-    un_normalized = instance.un_normalized
-    normalized = instance.normalized
-    normalized = re.sub(r"[^a-z ]", "", normalized)
-    return Instance(token_type=instance.token_type, un_normalized=un_normalized, normalized=normalized)
-
-
-filters = []
-filters.append(Filter(class_type="CARDINAL",
-               process_func=process_cardinal_1, filter_func=filter_cardinal_1))
-filters.append(Filter(class_type="ORDINAL",
-               process_func=process_ordinal_1, filter_func=filter_ordinal_1))
-filters.append(Filter(class_type="DECIMAL",
-               process_func=process_decimal_1, filter_func=filter_decimal_1))
-filters.append(Filter(class_type="MEASURE",
-               process_func=process_measure_1, filter_func=filter_measure_1))
-filters.append(Filter(class_type="MONEY",
-               process_func=process_money_1, filter_func=filter_money_1))
-filters.append(Filter(class_type="TIME",
-               process_func=process_time_1, filter_func=filter_time_1))
-
-filters.append(Filter(class_type="DATE",
-               process_func=process_date_1, filter_func=filter_date_1))
-filters.append(Filter(class_type="PLAIN",
-               process_func=process_plain_1, filter_func=filter_plain_1))
-filters.append(Filter(class_type="PUNCT",
-               process_func=process_punct_1, filter_func=filter_punct_1))
-filters.append(Filter(class_type="LETTERS",
-               process_func=process_letters_1, filter_func=filter_letters_1))
-filters.append(Filter(class_type="VERBATIM",
-               process_func=process_verbatim_1, filter_func=filter_verbatim_1))
-filters.append(Filter(class_type="DIGIT",
-               process_func=process_digit_1, filter_func=filter_digit_1))
-filters.append(Filter(class_type="TELEPHONE",
-               process_func=process_telephone_1, filter_func=filter_telephone_1))
-filters.append(Filter(class_type="ELECTRONIC",
-               process_func=process_electronic_1, filter_func=filter_electronic_1))
-filters.append(Filter(class_type="FRACTION",
-               process_func=process_fraction_1, filter_func=filter_fraction_1))
-filters.append(Filter(class_type="ADDRESS",
-               process_func=process_address_1, filter_func=filter_address_1))
-filters.append(Filter(class_type=EOS_TYPE,
-               process_func=lambda x: x, filter_func=lambda x: True))
-
-
-def filter_loaded_data(data: List[Instance], verbose: bool = False) -> List[Instance]:
-    """
-    Filters list of instances
-
-    Args:
-        data: list of instances
-
-    Returns: filtered and transformed list of instances
-    """
-    updates_instances = []
-    for instance in data:
-        updated_instance = False
-        for fil in filters:
-            if fil.class_type == instance.token_type and fil.filter(instance):
-                instance = fil.process(instance)
-                updated_instance = True
-        if updated_instance:
-            if verbose:
-                print(instance)
-            updates_instances.append(instance)
-    return updates_instances
-
-
-def parse_args():
-    parser = ArgumentParser()
-    parser.add_argument("--input", help="input file path",
-                        type=str, default='./en_with_types/output-00001-of-00100')
-    parser.add_argument(
-        "--verbose", help="print filtered instances", action='store_true')
-    return parser.parse_args()
-
-
-if __name__ == "__main__":
-    args = parse_args()
-    file_path = args.input
-
-    print("Loading training data: " + file_path)
-    instance_list = load_files([file_path])  # List of instances
-    filtered_instance_list = filter_loaded_data(instance_list, args.verbose)
-    training_data_to_sentences(filtered_instance_list)
diff --git a/nemo_text_processing/inverse_text_normalization/ko/data/numbers/zero.tsv b/nemo_text_processing/inverse_text_normalization/ko/data/numbers/zero.tsv
deleted file mode 100644
index 43baac7c1..000000000
--- a/nemo_text_processing/inverse_text_normalization/ko/data/numbers/zero.tsv
+++ /dev/null
@@ -1 +0,0 @@
-영  0
\ No newline at end of file
diff --git a/nemo_text_processing/inverse_text_normalization/ko/graph_utils.py b/nemo_text_processing/inverse_text_normalization/ko/graph_utils.py
index 7a9fd8720..50f1eb3b9 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/graph_utils.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/graph_utils.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 # Copyright 2015 and onwards Google, Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/__init__.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/__init__.py
index f541211af..f6e3c3795 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/taggers/__init__.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/__init__.py
@@ -12,6 +12,3 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from nemo_text_processing.inverse_text_normalization.ko.taggers.tokenize_and_classify import ClassifyFst
-from nemo_text_processing.inverse_text_normalization.ko.verbalizers.verbalize import VerbalizeFst
-from nemo_text_processing.inverse_text_normalization.ko.verbalizers.verbalize_final import VerbalizeFinalFst
diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/cardinal.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/cardinal.py
index df5804fc0..7253019f0 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/taggers/cardinal.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/cardinal.py
@@ -31,13 +31,9 @@ class CardinalFst(GraphFst):
     def __init__(self):
         super().__init__(name="cardinal", kind="classify")
 
-        graph_zero = pynini.string_file(get_abs_path("data/numbers/zero.tsv"))
-        graph_digit = pynini.string_file(get_abs_path("data/numbers/digit.tsv"))
         graph_zero = pynini.cross("영", "0")
+        graph_digit = pynini.string_file(get_abs_path("data/numbers/digit.tsv"))
 
-        graph_negative = pynini.cross("마이너스", "-")
-        graph_negative += delete_space
-        
         ten = pynutil.delete("십")
         ten_alt = pynini.cross("십", "1")
         ### Responsible for second digit of two digit number. ex) 20's 2
diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py
index 760ce6829..bb6b35d41 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py
@@ -24,8 +24,6 @@
 from nemo_text_processing.inverse_text_normalization.ko.graph_utils import ( 
     INPUT_LOWER_CASED,
     GraphFst,
-    delete_extra_space,
-    delete_space,
     generator_main,
 )
 
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/__init__.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/__init__.py
index da950f35e..341a77c5b 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/__init__.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/__init__.py
@@ -11,7 +11,3 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
-from nemo_text_processing.inverse_text_normalization.ko.taggers.tokenize_and_classify import ClassifyFst
-from nemo_text_processing.inverse_text_normalization.ko.verbalizers.verbalize import VerbalizeFst
-from nemo_text_processing.inverse_text_normalization.ko.verbalizers.verbalize_final import VerbalizeFinalFst
\ No newline at end of file
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize_final.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize_final.py
index 8554fc161..8d40d2804 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize_final.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize_final.py
@@ -19,7 +19,6 @@
 from pynini.lib import pynutil
 
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.verbalize import VerbalizeFst
-from nemo_text_processing.inverse_text_normalization.ko.verbalizers.word import WordFst
 from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst, generator_main, delete_space
 
 
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/word.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/word.py
index d79957ca8..a423d5d0c 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/word.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/word.py
@@ -13,11 +13,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-
-import pynini
 from pynini.lib import pynutil
 
-from nemo_text_processing.inverse_text_normalization.ko.graph_utils import NEMO_NOT_QUOTE, GraphFst, delete_space
+from nemo_text_processing.inverse_text_normalization.ko.graph_utils import NEMO_NOT_QUOTE, GraphFst
 
 
 
diff --git a/tests/nemo_text_processing/ko/test_cardinal.py b/tests/nemo_text_processing/ko/test_cardinal.py
index 9fd366ea6..872a5aa2a 100644
--- a/tests/nemo_text_processing/ko/test_cardinal.py
+++ b/tests/nemo_text_processing/ko/test_cardinal.py
@@ -16,10 +16,8 @@
 from parameterized import parameterized
 
 from nemo_text_processing.inverse_text_normalization.inverse_normalize import InverseNormalizer
-from nemo_text_processing.text_normalization.normalize import Normalizer
-from nemo_text_processing.text_normalization.normalize_with_audio import NormalizerWithAudio
 
-from ..utils import CACHE_DIR, RUN_AUDIO_BASED_TESTS, parse_test_case_file
+from ..utils import CACHE_DIR, parse_test_case_file
 
 
 class TestCardinal:
@@ -30,10 +28,4 @@ class TestCardinal:
     @pytest.mark.unit
     def test_denorm(self, test_input, expected):
         pred = self.inverse_normalizer_ko.inverse_normalize(test_input, verbose=False)
-        assert pred == expected
-
-    normalizer_with_audio_ko = (
-        NormalizerWithAudio(lang='ko', cache_dir=CACHE_DIR, overwrite_cache=False)
-        if RUN_AUDIO_BASED_TESTS
-        else None
-    )
\ No newline at end of file
+        assert pred == expected
\ No newline at end of file

From 4df2965feae682f7762f3c6f292613339869a89b Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 16 May 2025 20:23:32 +0000
Subject: [PATCH 04/29] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../inverse_text_normalization/ko/taggers/__init__.py       | 1 -
 .../ko/taggers/tokenize_and_classify.py                     | 6 +-----
 .../ko/verbalizers/verbalize_final.py                       | 5 ++++-
 3 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/__init__.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/__init__.py
index f6e3c3795..341a77c5b 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/taggers/__init__.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/__init__.py
@@ -11,4 +11,3 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py
index 30e0f5df4..75e3f6f20 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py
@@ -19,11 +19,7 @@
 import pynini
 from pynini.lib import pynutil
 
-from nemo_text_processing.inverse_text_normalization.ko.graph_utils import (
-    INPUT_LOWER_CASED,
-    GraphFst,
-    generator_main,
-)
+from nemo_text_processing.inverse_text_normalization.ko.graph_utils import INPUT_LOWER_CASED, GraphFst, generator_main
 from nemo_text_processing.inverse_text_normalization.ko.taggers.cardinal import CardinalFst
 from nemo_text_processing.inverse_text_normalization.ko.taggers.word import WordFst
 
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize_final.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize_final.py
index 648285758..09c917d00 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize_final.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize_final.py
@@ -20,10 +20,13 @@
 
 from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst, delete_space, generator_main
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.verbalize import VerbalizeFst
+
 <<<<<<< HEAD
-from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst, generator_main, delete_space
+from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst, delete_space, generator_main
+
 =======
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.word import WordFst
+
 >>>>>>> 77da79d12b1378502cc2b382cd6933b02e7c2545
 
 

From 41ac59d791511cd82c03b242e8ec671c91360c6e Mon Sep 17 00:00:00 2001
From: hmlee245 <hmlee245@gmail.com>
Date: Fri, 16 May 2025 13:36:00 -0700
Subject: [PATCH 05/29] This reverts commit
 f893d89bd8890e1b46df1e40054cc9176ac7ce7a, reversing changes made to
 9f7e876841b518a5b4d3d5e68df760cb7126729c.

Signed-off-by: hmlee245 <hmlee245@gmail.com>
---
 .../inverse_normalize.py                      |  4 +-
 .../ko/taggers/cardinal.py                    | 42 ++++++-------------
 .../ko/taggers/tokenize_and_classify.py       | 12 ++++--
 .../ko/taggers/word.py                        |  3 +-
 .../inverse_text_normalization/ko/utils.py    |  3 ++
 .../ko/verbalizers/__init__.py                |  7 ----
 .../ko/verbalizers/cardinal.py                | 18 +++++---
 .../ko/verbalizers/verbalize.py               |  7 ++--
 .../ko/verbalizers/verbalize_final.py         | 11 +----
 .../ko/verbalizers/word.py                    |  1 +
 .../run_evaluate.py                           |  2 +-
 .../nemo_text_processing/ko/test_cardinal.py  | 10 +----
 .../pynini_export.py                          |  2 +-
 13 files changed, 50 insertions(+), 72 deletions(-)

diff --git a/nemo_text_processing/inverse_text_normalization/inverse_normalize.py b/nemo_text_processing/inverse_text_normalization/inverse_normalize.py
index acda8b7f9..e505a8ad0 100644
--- a/nemo_text_processing/inverse_text_normalization/inverse_normalize.py
+++ b/nemo_text_processing/inverse_text_normalization/inverse_normalize.py
@@ -135,7 +135,7 @@ def __init__(
             from nemo_text_processing.inverse_text_normalization.ko.taggers.tokenize_and_classify import ClassifyFst
             from nemo_text_processing.inverse_text_normalization.ko.verbalizers.verbalize_final import (
                 VerbalizeFinalFst,
-            )
+            )    
 
         self.tagger = ClassifyFst(
             cache_dir=cache_dir, whitelist=whitelist, overwrite_cache=overwrite_cache, input_case=input_case
@@ -180,7 +180,7 @@ def parse_args():
     parser.add_argument(
         "--language",
         help="language",
-        choices=['en', 'de', 'es', 'pt', 'ru', 'fr', 'sv', 'vi', 'ar', 'es_en', 'zh', 'hi', 'hy', 'mr', 'ja', 'ko'],
+        choices=['en', 'de', 'es', 'pt', 'ru', 'fr', 'sv', 'vi', 'ar', 'es_en', 'zh', 'hi', 'hy', 'mr', 'ja','ko'],
         default="en",
         type=str,
     )
diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/cardinal.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/cardinal.py
index f3fa597e3..7253019f0 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/taggers/cardinal.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/cardinal.py
@@ -19,7 +19,6 @@
 from nemo_text_processing.inverse_text_normalization.ko.graph_utils import NEMO_DIGIT, GraphFst, delete_space
 from nemo_text_processing.inverse_text_normalization.ko.utils import get_abs_path
 
-
 class CardinalFst(GraphFst):
     """
     Finite state transducer for classifying cardinals
@@ -35,19 +34,13 @@ def __init__(self):
         graph_zero = pynini.cross("영", "0")
         graph_digit = pynini.string_file(get_abs_path("data/numbers/digit.tsv"))
 
-<<<<<<< HEAD
-=======
-        graph_negative = pynini.cross("마이너스", "-")
-        graph_negative += delete_space
-
->>>>>>> 77da79d12b1378502cc2b382cd6933b02e7c2545
         ten = pynutil.delete("십")
         ten_alt = pynini.cross("십", "1")
         ### Responsible for second digit of two digit number. ex) 20's 2
         graph_ten_component = pynini.union((graph_digit + ten) | ten_alt, pynutil.insert("0"))
         ### Responsible for the first digit of number. ex) 1,2,3,4,5,,,
         graph_ten_component += graph_digit | pynutil.insert("0")
-
+        
         hundred = pynutil.delete("백")
         hundred_alt = pynini.cross("백", "1")
         graph_hundred_component = pynini.union(((graph_digit + hundred) | hundred_alt), pynutil.insert("0"))
@@ -62,36 +55,29 @@ def __init__(self):
         tenthousand_alt = pynini.cross("만", "1")
         ### "만" can express next four digits of numbers until the next unit "억", so insert "0000" to allocate four digit worth of space
         ### From "만", keep adding four digits and graph_thousand_component(0000-9999), because Korean units increase every four digits
-        graph_tenthousand_component = pynini.union(
-            ((graph_thousand_component + tenthousand) | tenthousand_alt), pynutil.insert("0000")
-        )
+        graph_tenthousand_component = pynini.union(((graph_thousand_component + tenthousand) | tenthousand_alt), pynutil.insert("0000"))
         graph_tenthousand_component += graph_thousand_component
 
         hundredmillion = pynutil.delete("억")
         hundredmillion_alt = pynini.cross("억", "1")
-        graph_hundredmillion_component = pynini.union(
-            ((graph_thousand_component + hundredmillion) | hundredmillion_alt), pynutil.insert("0000")
-        )
-        graph_hundredmillion_component += graph_tenthousand_component
-
+        graph_hundredmillion_component = pynini.union(((graph_thousand_component + hundredmillion) | hundredmillion_alt), pynutil.insert("0000"))
+        graph_hundredmillion_component +=  graph_tenthousand_component
+        
         trillion = pynutil.delete("조")
         trillion_alt = pynini.cross("조", "1")
-        graph_trillion_component = pynini.union(
-            ((graph_thousand_component + trillion) | trillion_alt), pynutil.insert("0000")
-        )
+        graph_trillion_component = pynini.union(((graph_thousand_component + trillion) | trillion_alt), pynutil.insert("0000"))
         graph_trillion_component += graph_hundredmillion_component
 
         tenquadrillion = pynutil.delete("경")
         tenquadrillion_alt = pynini.cross("경", "1")
-        graph_tenquadrillion_component = pynini.union(
-            ((graph_thousand_component + tenquadrillion) | tenquadrillion_alt), pynutil.insert("0000")
-        )
+        graph_tenquadrillion_component = pynini.union(((graph_thousand_component + tenquadrillion) | tenquadrillion_alt), pynutil.insert("0000"))
         graph_tenquadrillion_component += graph_trillion_component
 
+        
         graph = pynini.union(
             ### From biggest unit to smallest, everything is included
-            graph_tenquadrillion_component
-            | graph_zero
+            graph_tenquadrillion_component|
+            graph_zero
         )
 
         leading_zero = (
@@ -99,18 +85,16 @@ def __init__(self):
         )
         graph_nonzero = graph @ leading_zero
         graph = pynini.union(graph_nonzero, graph_zero)
-
+        
         graph = graph @ leading_zero | graph_zero
 
         self.just_cardinals = graph
 
-        optional_sign = pynini.closure(
-            (pynini.cross("마이너스", 'negative: "-"') | pynini.cross("-", 'negative: "-"')) + delete_space, 0, 1
-        )
+        optional_sign = pynini.closure((pynini.cross("마이너스", 'negative: "-"') | pynini.cross("-", 'negative: "-"')) + delete_space,0, 1)
 
         final_graph = (
             optional_sign + pynutil.insert(" ") + pynutil.insert("integer: \"") + graph + pynutil.insert("\"")
         ) | (pynutil.insert("integer: \"") + graph + pynutil.insert("\""))
 
         final_graph = self.add_tokens(final_graph)
-        self.fst = final_graph.optimize()
+        self.fst = final_graph.optimize()
\ No newline at end of file
diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py
index 75e3f6f20..bb6b35d41 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py
@@ -19,9 +19,13 @@
 import pynini
 from pynini.lib import pynutil
 
-from nemo_text_processing.inverse_text_normalization.ko.graph_utils import INPUT_LOWER_CASED, GraphFst, generator_main
 from nemo_text_processing.inverse_text_normalization.ko.taggers.cardinal import CardinalFst
 from nemo_text_processing.inverse_text_normalization.ko.taggers.word import WordFst
+from nemo_text_processing.inverse_text_normalization.ko.graph_utils import ( 
+    INPUT_LOWER_CASED,
+    GraphFst,
+    generator_main,
+)
 
 
 class ClassifyFst(GraphFst):
@@ -58,8 +62,8 @@ def __init__(
             cardinal = CardinalFst()
             cardinal_graph = cardinal.fst
             word_graph = WordFst().fst
-            classify = pynutil.add_weight(cardinal_graph, 1.1) | pynutil.add_weight(word_graph, 100)
-
+            classify = (pynutil.add_weight(cardinal_graph, 1.1)| pynutil.add_weight(word_graph, 100))
+           
             token = pynutil.insert("tokens { ") + classify + pynutil.insert(" } ")
             tagger = pynini.closure(token, 1)
 
@@ -67,4 +71,4 @@ def __init__(
 
             if far_file:
                 generator_main(far_file, {"tokenize_and_classify": self.fst})
-                logging.info(f"ClassifyFst grammars are saved to {far_file}.")
+                logging.info(f"ClassifyFst grammars are saved to {far_file}.")
\ No newline at end of file
diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/word.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/word.py
index 0e4dbb93c..0d6ccd5c5 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/taggers/word.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/word.py
@@ -27,5 +27,6 @@ class WordFst(GraphFst):
 
     def __init__(self):
         super().__init__(name="word", kind="classify")
-        word = pynutil.insert("name: \"") + pynini.closure(NEMO_NOT_SPACE, 1) + pynutil.insert("\"")
+        word = pynutil.insert(
+            "name: \"") + pynini.closure(NEMO_NOT_SPACE, 1) + pynutil.insert("\"")
         self.fst = word.optimize()
diff --git a/nemo_text_processing/inverse_text_normalization/ko/utils.py b/nemo_text_processing/inverse_text_normalization/ko/utils.py
index d198c3835..0222cc0b8 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/utils.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/utils.py
@@ -15,6 +15,9 @@
 import os
 
 
+
 def get_abs_path(rel_path):
 
     return os.path.dirname(os.path.abspath(__file__)) + '/' + rel_path
+
+
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/__init__.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/__init__.py
index b8e634eef..341a77c5b 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/__init__.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/__init__.py
@@ -11,10 +11,3 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-<<<<<<< HEAD
-=======
-
-from nemo_text_processing.inverse_text_normalization.ko.taggers.tokenize_and_classify import ClassifyFst
-from nemo_text_processing.inverse_text_normalization.ko.verbalizers.verbalize import VerbalizeFst
-from nemo_text_processing.inverse_text_normalization.ko.verbalizers.verbalize_final import VerbalizeFinalFst
->>>>>>> 77da79d12b1378502cc2b382cd6933b02e7c2545
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/cardinal.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/cardinal.py
index fb9a76d8e..1800a6dc8 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/cardinal.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/cardinal.py
@@ -15,7 +15,11 @@
 import pynini
 from pynini.lib import pynutil
 
-from nemo_text_processing.inverse_text_normalization.ko.graph_utils import NEMO_NOT_QUOTE, GraphFst, delete_space
+from nemo_text_processing.inverse_text_normalization.ko.graph_utils import (
+    NEMO_NOT_QUOTE,
+    GraphFst,
+    delete_space,
+)
 
 
 class CardinalFst(GraphFst):
@@ -30,17 +34,21 @@ def __init__(self):
             pynutil.delete("negative:")
             + delete_space
             + pynutil.delete("\"")
-            + pynini.accep("-")
+            + pynini.accep("-") 
             + pynutil.delete("\"")
         )
 
         optional_sign_output = pynini.closure(negative_sign + delete_space, 0, 1)
 
-        digits_from_tag = pynini.closure(NEMO_NOT_QUOTE, 1)
+        digits_from_tag = pynini.closure(NEMO_NOT_QUOTE, 1) 
         integer_cardinal = (
-            pynutil.delete("integer:") + delete_space + pynutil.delete("\"") + digits_from_tag + pynutil.delete("\"")
+            pynutil.delete("integer:")
+            + delete_space
+            + pynutil.delete("\"")
+            + digits_from_tag
+            + pynutil.delete("\"")
         )
 
         graph = integer_cardinal
         final_graph = optional_sign_output + graph
-        self.fst = self.delete_tokens(final_graph).optimize()
+        self.fst = self.delete_tokens(final_graph).optimize()
\ No newline at end of file
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py
index d8851e206..9d750d757 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py
@@ -13,9 +13,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.cardinal import CardinalFst
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.word import WordFst
+from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst
 
 
 class VerbalizeFst(GraphFst):
@@ -30,6 +30,7 @@ def __init__(self):
         cardinal = CardinalFst()
         cardinal_graph = cardinal.fst
         word_graph = WordFst().fst
-
-        graph = cardinal_graph | word_graph
+        
+        graph = (cardinal_graph|word_graph)
         self.fst = graph
+        
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize_final.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize_final.py
index 09c917d00..8d40d2804 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize_final.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize_final.py
@@ -18,16 +18,8 @@
 import pynini
 from pynini.lib import pynutil
 
-from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst, delete_space, generator_main
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.verbalize import VerbalizeFst
-
-<<<<<<< HEAD
-from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst, delete_space, generator_main
-
-=======
-from nemo_text_processing.inverse_text_normalization.ko.verbalizers.word import WordFst
-
->>>>>>> 77da79d12b1378502cc2b382cd6933b02e7c2545
+from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst, generator_main, delete_space
 
 
 class VerbalizeFinalFst(GraphFst):
@@ -35,7 +27,6 @@ class VerbalizeFinalFst(GraphFst):
     Finite state transducer that verbalizes an entire sentence, e.g.
     tokens { name: "its" } tokens { time { hours: "12" minutes: "30" } } tokens { name: "now" } -> its 12:30 now
     """
-
     def __init__(self, deterministic: bool = True, cache_dir: str = None, overwrite_cache: bool = False):
         super().__init__(name="verbalize_final", kind="verbalize", deterministic=deterministic)
         far_file = None
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/word.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/word.py
index ecf62bfe3..a423d5d0c 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/word.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/word.py
@@ -18,6 +18,7 @@
 from nemo_text_processing.inverse_text_normalization.ko.graph_utils import NEMO_NOT_QUOTE, GraphFst
 
 
+
 class WordFst(GraphFst):
     '''
     tokens { name: "一" } -> 一
diff --git a/nemo_text_processing/inverse_text_normalization/run_evaluate.py b/nemo_text_processing/inverse_text_normalization/run_evaluate.py
index 133474940..7bfdd3399 100644
--- a/nemo_text_processing/inverse_text_normalization/run_evaluate.py
+++ b/nemo_text_processing/inverse_text_normalization/run_evaluate.py
@@ -35,7 +35,7 @@ def parse_args():
     parser.add_argument(
         "--lang",
         help="language",
-        choices=["ar", "de", "en", "es", "es_en", "fr", "hi", "hy", "mr", "pt", "ru", "sv", "vi", "zh", "ja", "ko"],
+        choices=["ar", "de", "en", "es", "es_en", "fr", "hi", "hy", "mr", "pt", "ru", "sv", "vi", "zh", "ja","ko"],
         default="en",
         type=str,
     )
diff --git a/tests/nemo_text_processing/ko/test_cardinal.py b/tests/nemo_text_processing/ko/test_cardinal.py
index ff5950f2a..872a5aa2a 100644
--- a/tests/nemo_text_processing/ko/test_cardinal.py
+++ b/tests/nemo_text_processing/ko/test_cardinal.py
@@ -28,12 +28,4 @@ class TestCardinal:
     @pytest.mark.unit
     def test_denorm(self, test_input, expected):
         pred = self.inverse_normalizer_ko.inverse_normalize(test_input, verbose=False)
-<<<<<<< HEAD
-        assert pred == expected
-=======
-        assert pred == expected
-
-    normalizer_with_audio_ko = (
-        NormalizerWithAudio(lang='ko', cache_dir=CACHE_DIR, overwrite_cache=False) if RUN_AUDIO_BASED_TESTS else None
-    )
->>>>>>> 77da79d12b1378502cc2b382cd6933b02e7c2545
+        assert pred == expected
\ No newline at end of file
diff --git a/tools/text_processing_deployment/pynini_export.py b/tools/text_processing_deployment/pynini_export.py
index d1ba34a37..0df099774 100644
--- a/tools/text_processing_deployment/pynini_export.py
+++ b/tools/text_processing_deployment/pynini_export.py
@@ -106,7 +106,7 @@ def parse_args():
             'mr',
             'ja',
             'rw',
-            'ko',
+            'ko'
         ],
         type=str,
         default='en',

From a5164dc157fdfd6af8aeca449eb7875c80ba6aae Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 16 May 2025 20:55:36 +0000
Subject: [PATCH 06/29] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../inverse_normalize.py                      |  4 +--
 .../ko/taggers/cardinal.py                    | 36 ++++++++++++-------
 .../ko/taggers/tokenize_and_classify.py       | 12 +++----
 .../ko/taggers/word.py                        |  3 +-
 .../inverse_text_normalization/ko/utils.py    |  3 --
 .../ko/verbalizers/cardinal.py                | 18 +++-------
 .../ko/verbalizers/verbalize.py               |  7 ++--
 .../ko/verbalizers/verbalize_final.py         |  3 +-
 .../ko/verbalizers/word.py                    |  1 -
 .../run_evaluate.py                           |  2 +-
 .../nemo_text_processing/ko/test_cardinal.py  |  2 +-
 .../pynini_export.py                          |  2 +-
 12 files changed, 43 insertions(+), 50 deletions(-)

diff --git a/nemo_text_processing/inverse_text_normalization/inverse_normalize.py b/nemo_text_processing/inverse_text_normalization/inverse_normalize.py
index e505a8ad0..acda8b7f9 100644
--- a/nemo_text_processing/inverse_text_normalization/inverse_normalize.py
+++ b/nemo_text_processing/inverse_text_normalization/inverse_normalize.py
@@ -135,7 +135,7 @@ def __init__(
             from nemo_text_processing.inverse_text_normalization.ko.taggers.tokenize_and_classify import ClassifyFst
             from nemo_text_processing.inverse_text_normalization.ko.verbalizers.verbalize_final import (
                 VerbalizeFinalFst,
-            )    
+            )
 
         self.tagger = ClassifyFst(
             cache_dir=cache_dir, whitelist=whitelist, overwrite_cache=overwrite_cache, input_case=input_case
@@ -180,7 +180,7 @@ def parse_args():
     parser.add_argument(
         "--language",
         help="language",
-        choices=['en', 'de', 'es', 'pt', 'ru', 'fr', 'sv', 'vi', 'ar', 'es_en', 'zh', 'hi', 'hy', 'mr', 'ja','ko'],
+        choices=['en', 'de', 'es', 'pt', 'ru', 'fr', 'sv', 'vi', 'ar', 'es_en', 'zh', 'hi', 'hy', 'mr', 'ja', 'ko'],
         default="en",
         type=str,
     )
diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/cardinal.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/cardinal.py
index 7253019f0..14172b4e9 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/taggers/cardinal.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/cardinal.py
@@ -19,6 +19,7 @@
 from nemo_text_processing.inverse_text_normalization.ko.graph_utils import NEMO_DIGIT, GraphFst, delete_space
 from nemo_text_processing.inverse_text_normalization.ko.utils import get_abs_path
 
+
 class CardinalFst(GraphFst):
     """
     Finite state transducer for classifying cardinals
@@ -40,7 +41,7 @@ def __init__(self):
         graph_ten_component = pynini.union((graph_digit + ten) | ten_alt, pynutil.insert("0"))
         ### Responsible for the first digit of number. ex) 1,2,3,4,5,,,
         graph_ten_component += graph_digit | pynutil.insert("0")
-        
+
         hundred = pynutil.delete("백")
         hundred_alt = pynini.cross("백", "1")
         graph_hundred_component = pynini.union(((graph_digit + hundred) | hundred_alt), pynutil.insert("0"))
@@ -55,29 +56,36 @@ def __init__(self):
         tenthousand_alt = pynini.cross("만", "1")
         ### "만" can express next four digits of numbers until the next unit "억", so insert "0000" to allocate four digit worth of space
         ### From "만", keep adding four digits and graph_thousand_component(0000-9999), because Korean units increase every four digits
-        graph_tenthousand_component = pynini.union(((graph_thousand_component + tenthousand) | tenthousand_alt), pynutil.insert("0000"))
+        graph_tenthousand_component = pynini.union(
+            ((graph_thousand_component + tenthousand) | tenthousand_alt), pynutil.insert("0000")
+        )
         graph_tenthousand_component += graph_thousand_component
 
         hundredmillion = pynutil.delete("억")
         hundredmillion_alt = pynini.cross("억", "1")
-        graph_hundredmillion_component = pynini.union(((graph_thousand_component + hundredmillion) | hundredmillion_alt), pynutil.insert("0000"))
-        graph_hundredmillion_component +=  graph_tenthousand_component
-        
+        graph_hundredmillion_component = pynini.union(
+            ((graph_thousand_component + hundredmillion) | hundredmillion_alt), pynutil.insert("0000")
+        )
+        graph_hundredmillion_component += graph_tenthousand_component
+
         trillion = pynutil.delete("조")
         trillion_alt = pynini.cross("조", "1")
-        graph_trillion_component = pynini.union(((graph_thousand_component + trillion) | trillion_alt), pynutil.insert("0000"))
+        graph_trillion_component = pynini.union(
+            ((graph_thousand_component + trillion) | trillion_alt), pynutil.insert("0000")
+        )
         graph_trillion_component += graph_hundredmillion_component
 
         tenquadrillion = pynutil.delete("경")
         tenquadrillion_alt = pynini.cross("경", "1")
-        graph_tenquadrillion_component = pynini.union(((graph_thousand_component + tenquadrillion) | tenquadrillion_alt), pynutil.insert("0000"))
+        graph_tenquadrillion_component = pynini.union(
+            ((graph_thousand_component + tenquadrillion) | tenquadrillion_alt), pynutil.insert("0000")
+        )
         graph_tenquadrillion_component += graph_trillion_component
 
-        
         graph = pynini.union(
             ### From biggest unit to smallest, everything is included
-            graph_tenquadrillion_component|
-            graph_zero
+            graph_tenquadrillion_component
+            | graph_zero
         )
 
         leading_zero = (
@@ -85,16 +93,18 @@ def __init__(self):
         )
         graph_nonzero = graph @ leading_zero
         graph = pynini.union(graph_nonzero, graph_zero)
-        
+
         graph = graph @ leading_zero | graph_zero
 
         self.just_cardinals = graph
 
-        optional_sign = pynini.closure((pynini.cross("마이너스", 'negative: "-"') | pynini.cross("-", 'negative: "-"')) + delete_space,0, 1)
+        optional_sign = pynini.closure(
+            (pynini.cross("마이너스", 'negative: "-"') | pynini.cross("-", 'negative: "-"')) + delete_space, 0, 1
+        )
 
         final_graph = (
             optional_sign + pynutil.insert(" ") + pynutil.insert("integer: \"") + graph + pynutil.insert("\"")
         ) | (pynutil.insert("integer: \"") + graph + pynutil.insert("\""))
 
         final_graph = self.add_tokens(final_graph)
-        self.fst = final_graph.optimize()
\ No newline at end of file
+        self.fst = final_graph.optimize()
diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py
index bb6b35d41..75e3f6f20 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py
@@ -19,13 +19,9 @@
 import pynini
 from pynini.lib import pynutil
 
+from nemo_text_processing.inverse_text_normalization.ko.graph_utils import INPUT_LOWER_CASED, GraphFst, generator_main
 from nemo_text_processing.inverse_text_normalization.ko.taggers.cardinal import CardinalFst
 from nemo_text_processing.inverse_text_normalization.ko.taggers.word import WordFst
-from nemo_text_processing.inverse_text_normalization.ko.graph_utils import ( 
-    INPUT_LOWER_CASED,
-    GraphFst,
-    generator_main,
-)
 
 
 class ClassifyFst(GraphFst):
@@ -62,8 +58,8 @@ def __init__(
             cardinal = CardinalFst()
             cardinal_graph = cardinal.fst
             word_graph = WordFst().fst
-            classify = (pynutil.add_weight(cardinal_graph, 1.1)| pynutil.add_weight(word_graph, 100))
-           
+            classify = pynutil.add_weight(cardinal_graph, 1.1) | pynutil.add_weight(word_graph, 100)
+
             token = pynutil.insert("tokens { ") + classify + pynutil.insert(" } ")
             tagger = pynini.closure(token, 1)
 
@@ -71,4 +67,4 @@ def __init__(
 
             if far_file:
                 generator_main(far_file, {"tokenize_and_classify": self.fst})
-                logging.info(f"ClassifyFst grammars are saved to {far_file}.")
\ No newline at end of file
+                logging.info(f"ClassifyFst grammars are saved to {far_file}.")
diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/word.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/word.py
index 0d6ccd5c5..0e4dbb93c 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/taggers/word.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/word.py
@@ -27,6 +27,5 @@ class WordFst(GraphFst):
 
     def __init__(self):
         super().__init__(name="word", kind="classify")
-        word = pynutil.insert(
-            "name: \"") + pynini.closure(NEMO_NOT_SPACE, 1) + pynutil.insert("\"")
+        word = pynutil.insert("name: \"") + pynini.closure(NEMO_NOT_SPACE, 1) + pynutil.insert("\"")
         self.fst = word.optimize()
diff --git a/nemo_text_processing/inverse_text_normalization/ko/utils.py b/nemo_text_processing/inverse_text_normalization/ko/utils.py
index 0222cc0b8..d198c3835 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/utils.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/utils.py
@@ -15,9 +15,6 @@
 import os
 
 
-
 def get_abs_path(rel_path):
 
     return os.path.dirname(os.path.abspath(__file__)) + '/' + rel_path
-
-
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/cardinal.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/cardinal.py
index 1800a6dc8..fb9a76d8e 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/cardinal.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/cardinal.py
@@ -15,11 +15,7 @@
 import pynini
 from pynini.lib import pynutil
 
-from nemo_text_processing.inverse_text_normalization.ko.graph_utils import (
-    NEMO_NOT_QUOTE,
-    GraphFst,
-    delete_space,
-)
+from nemo_text_processing.inverse_text_normalization.ko.graph_utils import NEMO_NOT_QUOTE, GraphFst, delete_space
 
 
 class CardinalFst(GraphFst):
@@ -34,21 +30,17 @@ def __init__(self):
             pynutil.delete("negative:")
             + delete_space
             + pynutil.delete("\"")
-            + pynini.accep("-") 
+            + pynini.accep("-")
             + pynutil.delete("\"")
         )
 
         optional_sign_output = pynini.closure(negative_sign + delete_space, 0, 1)
 
-        digits_from_tag = pynini.closure(NEMO_NOT_QUOTE, 1) 
+        digits_from_tag = pynini.closure(NEMO_NOT_QUOTE, 1)
         integer_cardinal = (
-            pynutil.delete("integer:")
-            + delete_space
-            + pynutil.delete("\"")
-            + digits_from_tag
-            + pynutil.delete("\"")
+            pynutil.delete("integer:") + delete_space + pynutil.delete("\"") + digits_from_tag + pynutil.delete("\"")
         )
 
         graph = integer_cardinal
         final_graph = optional_sign_output + graph
-        self.fst = self.delete_tokens(final_graph).optimize()
\ No newline at end of file
+        self.fst = self.delete_tokens(final_graph).optimize()
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py
index 9d750d757..d8851e206 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py
@@ -13,9 +13,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.cardinal import CardinalFst
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.word import WordFst
-from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst
 
 
 class VerbalizeFst(GraphFst):
@@ -30,7 +30,6 @@ def __init__(self):
         cardinal = CardinalFst()
         cardinal_graph = cardinal.fst
         word_graph = WordFst().fst
-        
-        graph = (cardinal_graph|word_graph)
+
+        graph = cardinal_graph | word_graph
         self.fst = graph
-        
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize_final.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize_final.py
index 8d40d2804..17f547740 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize_final.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize_final.py
@@ -18,8 +18,8 @@
 import pynini
 from pynini.lib import pynutil
 
+from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst, delete_space, generator_main
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.verbalize import VerbalizeFst
-from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst, generator_main, delete_space
 
 
 class VerbalizeFinalFst(GraphFst):
@@ -27,6 +27,7 @@ class VerbalizeFinalFst(GraphFst):
     Finite state transducer that verbalizes an entire sentence, e.g.
     tokens { name: "its" } tokens { time { hours: "12" minutes: "30" } } tokens { name: "now" } -> its 12:30 now
     """
+
     def __init__(self, deterministic: bool = True, cache_dir: str = None, overwrite_cache: bool = False):
         super().__init__(name="verbalize_final", kind="verbalize", deterministic=deterministic)
         far_file = None
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/word.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/word.py
index a423d5d0c..ecf62bfe3 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/word.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/word.py
@@ -18,7 +18,6 @@
 from nemo_text_processing.inverse_text_normalization.ko.graph_utils import NEMO_NOT_QUOTE, GraphFst
 
 
-
 class WordFst(GraphFst):
     '''
     tokens { name: "一" } -> 一
diff --git a/nemo_text_processing/inverse_text_normalization/run_evaluate.py b/nemo_text_processing/inverse_text_normalization/run_evaluate.py
index 7bfdd3399..133474940 100644
--- a/nemo_text_processing/inverse_text_normalization/run_evaluate.py
+++ b/nemo_text_processing/inverse_text_normalization/run_evaluate.py
@@ -35,7 +35,7 @@ def parse_args():
     parser.add_argument(
         "--lang",
         help="language",
-        choices=["ar", "de", "en", "es", "es_en", "fr", "hi", "hy", "mr", "pt", "ru", "sv", "vi", "zh", "ja","ko"],
+        choices=["ar", "de", "en", "es", "es_en", "fr", "hi", "hy", "mr", "pt", "ru", "sv", "vi", "zh", "ja", "ko"],
         default="en",
         type=str,
     )
diff --git a/tests/nemo_text_processing/ko/test_cardinal.py b/tests/nemo_text_processing/ko/test_cardinal.py
index 872a5aa2a..f95d74107 100644
--- a/tests/nemo_text_processing/ko/test_cardinal.py
+++ b/tests/nemo_text_processing/ko/test_cardinal.py
@@ -28,4 +28,4 @@ class TestCardinal:
     @pytest.mark.unit
     def test_denorm(self, test_input, expected):
         pred = self.inverse_normalizer_ko.inverse_normalize(test_input, verbose=False)
-        assert pred == expected
\ No newline at end of file
+        assert pred == expected
diff --git a/tools/text_processing_deployment/pynini_export.py b/tools/text_processing_deployment/pynini_export.py
index 0df099774..d1ba34a37 100644
--- a/tools/text_processing_deployment/pynini_export.py
+++ b/tools/text_processing_deployment/pynini_export.py
@@ -106,7 +106,7 @@ def parse_args():
             'mr',
             'ja',
             'rw',
-            'ko'
+            'ko',
         ],
         type=str,
         default='en',

From 7842d1324e32a40bd522b99eba726f962dafc742 Mon Sep 17 00:00:00 2001
From: hmlee245 <hmlee245@gmail.com>
Date: Fri, 23 May 2025 16:31:36 -0700
Subject: [PATCH 07/29] third draft of korean ITN work. Mainly fixing minor
 issues and adding test cases

Signed-off-by: hmlee245 <hmlee245@gmail.com>
---
 Jenkinsfile                                   | 22 +++++++++++++++++++
 .../ko/data/numbers/thousands.tsv             | 11 ----------
 .../ko/data/numbers/zero.tsv                  |  1 +
 .../ko/taggers/cardinal.py                    |  8 +++----
 .../test_cases_cardinal.txt                   | 12 +++++++++-
 5 files changed, 37 insertions(+), 17 deletions(-)
 delete mode 100644 nemo_text_processing/inverse_text_normalization/ko/data/numbers/thousands.tsv
 create mode 100644 nemo_text_processing/inverse_text_normalization/ko/data/numbers/zero.tsv

diff --git a/Jenkinsfile b/Jenkinsfile
index c94c107c6..32375f28f 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -28,6 +28,7 @@ pipeline {
     MR_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/03-12-24-1'
     JA_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/10-17-24-1'
     HI_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/04-03-25-1'
+    KO_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/05-21-25-0'
     DEFAULT_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/06-08-23-0'
   }
   stages {
@@ -318,6 +319,22 @@ pipeline {
         }
       }
     }
+    stage('L0: Create KO ITN Grammars') {
+      when {
+        anyOf {
+          branch 'main'
+          changeRequest target: 'main'
+        }
+      }   
+      failFast true
+      parallel {
+        stage('L0: KO ITN grammars') {
+          steps {
+            sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=ko --text="100" --cache_dir ${KO_TN_CACHE}'
+          }
+        }
+      }
+    }    
 
 
 // L1 Tests starts here
@@ -406,6 +423,11 @@ pipeline {
             sh 'CUDA_VISIBLE_DEVICES="" pytest tests/nemo_text_processing/hy/ -m "not pleasefixme" --cpu --tn_cache_dir ${HY_TN_CACHE}'
           }
         }
+        stage('L1: Run all KO TN/ITN tests (restore grammars from cache)') {
+          steps {
+            sh 'CUDA_VISIBLE_DEVICES="" pytest tests/nemo_text_processing/ko/ -m "not pleasefixme" --cpu --tn_cache_dir ${KO_TN_CACHE}'
+          }
+        }        
       }
     }
 
diff --git a/nemo_text_processing/inverse_text_normalization/ko/data/numbers/thousands.tsv b/nemo_text_processing/inverse_text_normalization/ko/data/numbers/thousands.tsv
deleted file mode 100644
index 541752211..000000000
--- a/nemo_text_processing/inverse_text_normalization/ko/data/numbers/thousands.tsv
+++ /dev/null
@@ -1,11 +0,0 @@
-억
-조
-경
-해
-자
-양
-구
-간
-정
-재
-극
\ No newline at end of file
diff --git a/nemo_text_processing/inverse_text_normalization/ko/data/numbers/zero.tsv b/nemo_text_processing/inverse_text_normalization/ko/data/numbers/zero.tsv
new file mode 100644
index 000000000..cbf967001
--- /dev/null
+++ b/nemo_text_processing/inverse_text_normalization/ko/data/numbers/zero.tsv
@@ -0,0 +1 @@
+영	0
\ No newline at end of file
diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/cardinal.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/cardinal.py
index 7253019f0..a1cf1012f 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/taggers/cardinal.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/cardinal.py
@@ -31,7 +31,7 @@ class CardinalFst(GraphFst):
     def __init__(self):
         super().__init__(name="cardinal", kind="classify")
 
-        graph_zero = pynini.cross("영", "0")
+        graph_zero = pynini.string_file(get_abs_path("data/numbers/zero.tsv"))
         graph_digit = pynini.string_file(get_abs_path("data/numbers/digit.tsv"))
 
         ten = pynutil.delete("십")
@@ -85,15 +85,13 @@ def __init__(self):
         )
         graph_nonzero = graph @ leading_zero
         graph = pynini.union(graph_nonzero, graph_zero)
-        
-        graph = graph @ leading_zero | graph_zero
 
         self.just_cardinals = graph
 
-        optional_sign = pynini.closure((pynini.cross("마이너스", 'negative: "-"') | pynini.cross("-", 'negative: "-"')) + delete_space,0, 1)
+        negative_sign = pynini.closure((pynini.cross("마이너스", 'negative: "-"') | pynini.cross("-", 'negative: "-"')) + delete_space,0, 1)
 
         final_graph = (
-            optional_sign + pynutil.insert(" ") + pynutil.insert("integer: \"") + graph + pynutil.insert("\"")
+            negative_sign + pynutil.insert(" ") + pynutil.insert("integer: \"") + graph + pynutil.insert("\"")
         ) | (pynutil.insert("integer: \"") + graph + pynutil.insert("\""))
 
         final_graph = self.add_tokens(final_graph)
diff --git a/tests/nemo_text_processing/ko/data_inverse_text_normalization/test_cases_cardinal.txt b/tests/nemo_text_processing/ko/data_inverse_text_normalization/test_cases_cardinal.txt
index 007273e5e..4f64116e5 100644
--- a/tests/nemo_text_processing/ko/data_inverse_text_normalization/test_cases_cardinal.txt
+++ b/tests/nemo_text_processing/ko/data_inverse_text_normalization/test_cases_cardinal.txt
@@ -24,4 +24,14 @@
 백오십억칠천만~15070000000
 오천억~500000000000
 일조~1000000000000
-이조오천억~2500000000000
\ No newline at end of file
+이조오천억~2500000000000
+영영영~000
+영영백이십삼~00123
+만천~11000
+만천백십일~11111
+경~10000000000000000
+마이너스일~-1
+마이너스 일~-1
+- 일~-1
+마이너스일억사천이백칠십구만구천팔십이~-142799082
+마이너스 칠백삼십오~-735
\ No newline at end of file

From b95f5fbb20f560e5592e7d52f0c9d1e3c3f124cd Mon Sep 17 00:00:00 2001
From: hmlee245 <hmlee245@gmail.com>
Date: Tue, 10 Jun 2025 13:55:47 -0700
Subject: [PATCH 08/29] Commiting the first draft of Korean Ordinal ITN

Signed-off-by: hmlee245 <hmlee245@gmail.com>
---
 .../ko/data/ordinals/digit.tsv                | 39 ++++++++++++++++
 .../ko/taggers/cardinal.py                    |  5 +--
 .../ko/taggers/ordinal.py                     | 45 +++++++++++++++++++
 .../ko/taggers/tokenize_and_classify.py       | 12 ++++-
 .../ko/verbalizers/ordinal.py                 | 36 +++++++++++++++
 .../ko/verbalizers/verbalize.py               | 10 ++++-
 .../test_cases_ordinal.txt                    | 19 ++++++++
 tests/nemo_text_processing/ko/test_ordinal.py | 32 +++++++++++++
 8 files changed, 193 insertions(+), 5 deletions(-)
 create mode 100644 nemo_text_processing/inverse_text_normalization/ko/data/ordinals/digit.tsv
 create mode 100644 nemo_text_processing/inverse_text_normalization/ko/taggers/ordinal.py
 create mode 100644 nemo_text_processing/inverse_text_normalization/ko/verbalizers/ordinal.py
 create mode 100644 tests/nemo_text_processing/ko/data_inverse_text_normalization/test_cases_ordinal.txt
 create mode 100644 tests/nemo_text_processing/ko/test_ordinal.py

diff --git a/nemo_text_processing/inverse_text_normalization/ko/data/ordinals/digit.tsv b/nemo_text_processing/inverse_text_normalization/ko/data/ordinals/digit.tsv
new file mode 100644
index 000000000..532a4ed2e
--- /dev/null
+++ b/nemo_text_processing/inverse_text_normalization/ko/data/ordinals/digit.tsv
@@ -0,0 +1,39 @@
+첫	1
+두	2
+세	3
+네	4
+다섯	5
+여섯	6
+일곱	7
+여덟	8
+아홉	9
+열	10
+열한	11
+열두	12
+열세	13
+열네	14
+열다섯	15
+열여섯	16
+열일곱	17
+열여덟	18
+열아홉	19
+스무	20
+스물한	21
+스물두	22
+스물세	23
+스물네	24
+스물다섯	25
+스물여섯	26
+스물일곱	27
+스물여덟	28
+스물아홉	29
+서른	30
+서른한	31
+서른두	32
+서른세	33
+서른네	34
+서른다섯	35
+서른여섯	36
+서른일곱	37
+서른여덟	38
+서른아홉	39
\ No newline at end of file
diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/cardinal.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/cardinal.py
index 1c78f6000..83b2b80d4 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/taggers/cardinal.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/cardinal.py
@@ -87,12 +87,11 @@ def __init__(self):
             graph_tenquadrillion_component
             | graph_zero
         )
-
+        
         leading_zero = (
             pynutil.delete(pynini.closure("0")) + pynini.difference(NEMO_DIGIT, "0") + pynini.closure(NEMO_DIGIT)
         )
-        graph_nonzero = graph @ leading_zero
-        graph = pynini.union(graph_nonzero, graph_zero)
+        graph = graph @ leading_zero | graph_zero
 
         self.just_cardinals = graph
 
diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/ordinal.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/ordinal.py
new file mode 100644
index 000000000..2068c0894
--- /dev/null
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/ordinal.py
@@ -0,0 +1,45 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+# Copyright 2015 and onwards Google, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import pynini
+from pynini.lib import pynutil
+
+from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst, NEMO_CHAR
+from nemo_text_processing.inverse_text_normalization.ko.utils import get_abs_path
+
+
+class OrdinalFst(GraphFst):
+    """
+    Finite state transducer for classifying ordinal
+        Expressing integers in ordinal way for 1-39 and cardinal for 40+ due to Korean grammar.
+        e.g. 스물세번째 -> ordinal {integer: "23", 23번째}
+        e.g. 사십오번째 -> ordinal but the integer part is written in cardinal(due to korean grammar)
+        { integer: "45", 45번쨰}
+    """
+
+    def __init__(self, cardinal: GraphFst):
+        super().__init__(name="ordinal", kind="classify")
+
+        cardinals = cardinal.just_cardinals
+        graph_digit = pynini.string_file(get_abs_path("data/ordinals/digit.tsv"))
+        ordinals = pynini.accep("째") | pynini.accep("번째")
+
+        ordinal_graph = (
+            pynutil.insert("integer: \"") + ((graph_digit + ordinals) | (cardinals + ordinals)) + pynutil.insert("\"")
+        )
+
+        final_graph = self.add_tokens(ordinal_graph)
+        self.fst = final_graph.optimize()
\ No newline at end of file
diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py
index 75e3f6f20..78f6198d0 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py
@@ -22,6 +22,7 @@
 from nemo_text_processing.inverse_text_normalization.ko.graph_utils import INPUT_LOWER_CASED, GraphFst, generator_main
 from nemo_text_processing.inverse_text_normalization.ko.taggers.cardinal import CardinalFst
 from nemo_text_processing.inverse_text_normalization.ko.taggers.word import WordFst
+from nemo_text_processing.inverse_text_normalization.ko.taggers.ordinal import OrdinalFst
 
 
 class ClassifyFst(GraphFst):
@@ -55,10 +56,19 @@ def __init__(
             logging.info(f"ClassifyFst.fst was restored from {far_file}.")
         else:
             logging.info(f"Creating ClassifyFst grammars.")
+
             cardinal = CardinalFst()
             cardinal_graph = cardinal.fst
+
+            ordinal = OrdinalFst(cardinal)
+            ordinal_graph = ordinal.fst
+
             word_graph = WordFst().fst
-            classify = pynutil.add_weight(cardinal_graph, 1.1) | pynutil.add_weight(word_graph, 100)
+
+            classify = (pynutil.add_weight(cardinal_graph, 1.1)
+                        | pynutil.add_weight(ordinal_graph, 1.1)
+                        | pynutil.add_weight(word_graph, 100)
+            )
 
             token = pynutil.insert("tokens { ") + classify + pynutil.insert(" } ")
             tagger = pynini.closure(token, 1)
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/ordinal.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/ordinal.py
new file mode 100644
index 000000000..b857a3be0
--- /dev/null
+++ b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/ordinal.py
@@ -0,0 +1,36 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import pynini
+from pynini.lib import pynutil
+
+from nemo_text_processing.inverse_text_normalization.ko.graph_utils import NEMO_NOT_QUOTE, GraphFst
+
+
+class OrdinalFst(GraphFst):
+    """
+    Finite state transducer for classifying cardinals
+        e.g. 스물세번째 -> ordinal {integer: "23", 23번째}
+        e.g. 사십오번째 -> ordinal but the integer part is written in cardinal(due to korean grammar)
+        { integer: "45", 45번쨰}
+    """
+
+    def __init__(self):
+        super().__init__(name="ordinal", kind="verbalize")
+
+        integer_component = pynutil.delete("integer: \"") + pynini.closure(NEMO_NOT_QUOTE) + pynutil.delete("\"")
+
+        final_graph = self.delete_tokens(integer_component)
+        self.fst = final_graph.optimize()
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py
index d8851e206..6a3af3cf4 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py
@@ -16,6 +16,7 @@
 from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.cardinal import CardinalFst
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.word import WordFst
+from nemo_text_processing.inverse_text_normalization.ko.verbalizers.ordinal import OrdinalFst
 
 
 class VerbalizeFst(GraphFst):
@@ -29,7 +30,14 @@ def __init__(self):
         super().__init__(name="verbalize", kind="verbalize")
         cardinal = CardinalFst()
         cardinal_graph = cardinal.fst
+
+        ordinal = OrdinalFst()
+        ordinal_graph = ordinal.fst
+
         word_graph = WordFst().fst
 
-        graph = cardinal_graph | word_graph
+        graph = (cardinal_graph
+                 | word_graph
+                 | ordinal_graph
+        )
         self.fst = graph
diff --git a/tests/nemo_text_processing/ko/data_inverse_text_normalization/test_cases_ordinal.txt b/tests/nemo_text_processing/ko/data_inverse_text_normalization/test_cases_ordinal.txt
new file mode 100644
index 000000000..2caad7dc3
--- /dev/null
+++ b/tests/nemo_text_processing/ko/data_inverse_text_normalization/test_cases_ordinal.txt
@@ -0,0 +1,19 @@
+영번째~0번째
+첫번째~1번째
+두번째~2번째
+세째~3째
+다섯째~5째
+아홉번째~9번째
+열번째~10번째
+열한번째~11번째
+열일곱째~17째
+스무번째~20번째
+스물두번째~22번째
+스물아홉째~29째
+서른번째~30번째
+서른째~30째
+사십번째~40번째
+사십째~40째
+오십번째~50번째
+오십삼번째~53번째
+백번째~100번째
diff --git a/tests/nemo_text_processing/ko/test_ordinal.py b/tests/nemo_text_processing/ko/test_ordinal.py
new file mode 100644
index 000000000..b07c8bd55
--- /dev/null
+++ b/tests/nemo_text_processing/ko/test_ordinal.py
@@ -0,0 +1,32 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pytest
+from parameterized import parameterized
+
+from nemo_text_processing.inverse_text_normalization.inverse_normalize import InverseNormalizer
+from nemo_text_processing.text_normalization.normalize import Normalizer
+
+from ..utils import CACHE_DIR, parse_test_case_file
+
+
+class TestOrdinal:
+    inverse_normalizer = InverseNormalizer(lang='ko', cache_dir=CACHE_DIR, overwrite_cache=False)
+
+    @parameterized.expand(parse_test_case_file('ko/data_inverse_text_normalization/test_cases_ordinal.txt'))
+    @pytest.mark.run_only_on('CPU')
+    @pytest.mark.unit
+    def test_denorm(self, test_input, expected):
+        pred = self.inverse_normalizer.inverse_normalize(test_input, verbose=False)
+        assert pred == expected

From 9a00ba65ccd3d6949d492dc8cfac9e9bbbda0e5f Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 12 Jun 2025 20:34:11 +0000
Subject: [PATCH 09/29] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../inverse_text_normalization/ko/taggers/cardinal.py  |  6 ++++--
 .../inverse_text_normalization/ko/taggers/ordinal.py   |  4 ++--
 .../ko/taggers/tokenize_and_classify.py                | 10 +++++-----
 .../ko/verbalizers/verbalize.py                        |  7 ++-----
 4 files changed, 13 insertions(+), 14 deletions(-)

diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/cardinal.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/cardinal.py
index 83b2b80d4..5987a9771 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/taggers/cardinal.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/cardinal.py
@@ -87,7 +87,7 @@ def __init__(self):
             graph_tenquadrillion_component
             | graph_zero
         )
-        
+
         leading_zero = (
             pynutil.delete(pynini.closure("0")) + pynini.difference(NEMO_DIGIT, "0") + pynini.closure(NEMO_DIGIT)
         )
@@ -95,7 +95,9 @@ def __init__(self):
 
         self.just_cardinals = graph
 
-        negative_sign = pynini.closure((pynini.cross("마이너스", 'negative: "-"') | pynini.cross("-", 'negative: "-"')) + delete_space,0, 1)
+        negative_sign = pynini.closure(
+            (pynini.cross("마이너스", 'negative: "-"') | pynini.cross("-", 'negative: "-"')) + delete_space, 0, 1
+        )
 
         final_graph = (
             negative_sign + pynutil.insert(" ") + pynutil.insert("integer: \"") + graph + pynutil.insert("\"")
diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/ordinal.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/ordinal.py
index 2068c0894..62cc81203 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/taggers/ordinal.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/ordinal.py
@@ -17,7 +17,7 @@
 import pynini
 from pynini.lib import pynutil
 
-from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst, NEMO_CHAR
+from nemo_text_processing.inverse_text_normalization.ko.graph_utils import NEMO_CHAR, GraphFst
 from nemo_text_processing.inverse_text_normalization.ko.utils import get_abs_path
 
 
@@ -42,4 +42,4 @@ def __init__(self, cardinal: GraphFst):
         )
 
         final_graph = self.add_tokens(ordinal_graph)
-        self.fst = final_graph.optimize()
\ No newline at end of file
+        self.fst = final_graph.optimize()
diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py
index 7f8613506..df5f330f5 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py
@@ -21,9 +21,8 @@
 
 from nemo_text_processing.inverse_text_normalization.ko.graph_utils import INPUT_LOWER_CASED, GraphFst, generator_main
 from nemo_text_processing.inverse_text_normalization.ko.taggers.cardinal import CardinalFst
-from nemo_text_processing.inverse_text_normalization.ko.taggers.word import WordFst
 from nemo_text_processing.inverse_text_normalization.ko.taggers.ordinal import OrdinalFst
-
+from nemo_text_processing.inverse_text_normalization.ko.taggers.word import WordFst
 
 
 class ClassifyFst(GraphFst):
@@ -66,9 +65,10 @@ def __init__(
 
             word_graph = WordFst().fst
 
-            classify = (pynutil.add_weight(cardinal_graph, 1.1)
-                        | pynutil.add_weight(ordinal_graph, 1.1)
-                        | pynutil.add_weight(word_graph, 100)
+            classify = (
+                pynutil.add_weight(cardinal_graph, 1.1)
+                | pynutil.add_weight(ordinal_graph, 1.1)
+                | pynutil.add_weight(word_graph, 100)
             )
 
             token = pynutil.insert("tokens { ") + classify + pynutil.insert(" } ")
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py
index 6a3af3cf4..305f36dc9 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py
@@ -15,8 +15,8 @@
 
 from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.cardinal import CardinalFst
-from nemo_text_processing.inverse_text_normalization.ko.verbalizers.word import WordFst
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.ordinal import OrdinalFst
+from nemo_text_processing.inverse_text_normalization.ko.verbalizers.word import WordFst
 
 
 class VerbalizeFst(GraphFst):
@@ -36,8 +36,5 @@ def __init__(self):
 
         word_graph = WordFst().fst
 
-        graph = (cardinal_graph
-                 | word_graph
-                 | ordinal_graph
-        )
+        graph = cardinal_graph | word_graph | ordinal_graph
         self.fst = graph

From 63ce43df7a0a0d4aae5c10f9345e63ff98a040b9 Mon Sep 17 00:00:00 2001
From: hmlee245 <hmlee245@gmail.com>
Date: Mon, 16 Jun 2025 15:49:25 -0700
Subject: [PATCH 10/29] Update after first Korean Ordinal ITN pull request
 review

Signed-off-by: hmlee245 <hmlee245@gmail.com>
---
 .../ko/data/ordinals/cardinal_digit.tsv       | 39 ++++++++++++++++++
 .../ko/data/ordinals/digit.tsv                | 34 +--------------
 .../ko/data/ordinals/digit_no_one.tsv         |  8 ++++
 .../ko/taggers/ordinal.py                     | 41 +++++++++++++++++--
 .../ko/verbalizers/verbalize.py               |  4 --
 .../ko/verbalizers/verbalize_final.py         | 40 ++++++++----------
 .../ko/verbalizers/word.py                    | 19 +++++----
 .../test_cases_ordinal.txt                    | 10 ++---
 .../nemo_text_processing/ko/test_cardinal.py  |  2 +-
 ..._sparrowhawk_inverse_text_normalization.sh |  5 +++
 10 files changed, 126 insertions(+), 76 deletions(-)
 create mode 100644 nemo_text_processing/inverse_text_normalization/ko/data/ordinals/cardinal_digit.tsv
 create mode 100644 nemo_text_processing/inverse_text_normalization/ko/data/ordinals/digit_no_one.tsv

diff --git a/nemo_text_processing/inverse_text_normalization/ko/data/ordinals/cardinal_digit.tsv b/nemo_text_processing/inverse_text_normalization/ko/data/ordinals/cardinal_digit.tsv
new file mode 100644
index 000000000..19e188ac6
--- /dev/null
+++ b/nemo_text_processing/inverse_text_normalization/ko/data/ordinals/cardinal_digit.tsv
@@ -0,0 +1,39 @@
+일	1
+이	2
+삼	3
+사	4
+오	5
+육	6
+칠	7
+팔	8
+구	9
+십	10
+십일	11
+십이	12
+십삼	13
+십사	14
+십오	15
+십육	16
+십칠	17
+십팔	18
+십구	19
+이십	20
+이십일	21
+이십이	22
+이십삼	23
+이십사	24
+이십오	25
+이십육	26
+이십칠	27
+이십팔	28
+이십구	29
+삼십	30
+삼십일	31
+삼십이	32
+삼십삼	33
+삼십사	34
+삼십오	35
+삼십육	36
+삼십칠	37
+삼십팔	38
+삼십구	39
\ No newline at end of file
diff --git a/nemo_text_processing/inverse_text_normalization/ko/data/ordinals/digit.tsv b/nemo_text_processing/inverse_text_normalization/ko/data/ordinals/digit.tsv
index 532a4ed2e..d2fdd1846 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/data/ordinals/digit.tsv
+++ b/nemo_text_processing/inverse_text_normalization/ko/data/ordinals/digit.tsv
@@ -1,4 +1,4 @@
-첫	1
+한	1
 두	2
 세	3
 네	4
@@ -6,34 +6,4 @@
 여섯	6
 일곱	7
 여덟	8
-아홉	9
-열	10
-열한	11
-열두	12
-열세	13
-열네	14
-열다섯	15
-열여섯	16
-열일곱	17
-열여덟	18
-열아홉	19
-스무	20
-스물한	21
-스물두	22
-스물세	23
-스물네	24
-스물다섯	25
-스물여섯	26
-스물일곱	27
-스물여덟	28
-스물아홉	29
-서른	30
-서른한	31
-서른두	32
-서른세	33
-서른네	34
-서른다섯	35
-서른여섯	36
-서른일곱	37
-서른여덟	38
-서른아홉	39
\ No newline at end of file
+아홉	9
\ No newline at end of file
diff --git a/nemo_text_processing/inverse_text_normalization/ko/data/ordinals/digit_no_one.tsv b/nemo_text_processing/inverse_text_normalization/ko/data/ordinals/digit_no_one.tsv
new file mode 100644
index 000000000..00ab6d0b4
--- /dev/null
+++ b/nemo_text_processing/inverse_text_normalization/ko/data/ordinals/digit_no_one.tsv
@@ -0,0 +1,8 @@
+두	2
+세	3
+네	4
+다섯	5
+여섯	6
+일곱	7
+여덟	8
+아홉	9
\ No newline at end of file
diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/ordinal.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/ordinal.py
index 2068c0894..0bd3484e7 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/taggers/ordinal.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/ordinal.py
@@ -34,11 +34,46 @@ def __init__(self, cardinal: GraphFst):
         super().__init__(name="ordinal", kind="classify")
 
         cardinals = cardinal.just_cardinals
-        graph_digit = pynini.string_file(get_abs_path("data/ordinals/digit.tsv"))
-        ordinals = pynini.accep("째") | pynini.accep("번째")
+        ordinals_suffix = pynini.accep("번째") #Korean ordinal's morphosyntactic feature
+
+        graph_digit = pynini.string_file(get_abs_path("data/ordinals/digit.tsv"))  #1-9
+        graph_digit_no_one = pynini.string_file(get_abs_path("data/ordinals/digit_no_one.tsv"))  #2-9
+        cardinal_1to39 = pynini.string_file(get_abs_path("data/ordinals/cardinal_digit.tsv"))  #1-39 in cardinals
+
+        graph_tens_prefix = pynini.cross("열", "1") #First digit for tens
+        graph_twenties_prefix = pynini.cross("스물", "2") #First digit for twenties
+        graph_thirties_prefix = pynini.cross("서른", "3") #First digit for thirties
+
+        graph_one = pynini.cross("첫", "1")
+        graph_single = graph_one | graph_digit_no_one 
+        # 1 has a unique ordinal case in Korean and does not repeat for 11, 21, 31
+
+        graph_ten = pynini.cross("열", "10")
+        graph_tens = graph_ten | graph_tens_prefix + graph_digit
+        
+        graph_twenty = pynini.cross("스무", "20")
+        graph_twenties = graph_twenty | graph_twenties_prefix + graph_digit
+
+        graph_thirty = pynini.cross("서른", "30")
+        graph_thirties = graph_thirty | graph_thirties_prefix + graph_digit
+
+        ordinals = pynini.union(
+            graph_single,      #1-9
+            graph_tens,        #10-19
+            graph_twenties,    #20-29 
+            graph_thirties     #30-39
+        ).optimize()
+
+        cardinals_acceptor = pynini.project(cardinals, "input").optimize() #Input includes all cardinal expressions
+        cardinals_exception = pynini.project(cardinal_1to39, "input").optimize() #Input includes cardinal expression from 1 to 39
+
+        cardinal_plus_40 = pynini.difference(cardinals_acceptor,cardinals_exception).optimize() #All cardinal values - 1 to 39 cardinal values
+        cardinal_ordinal = cardinal_plus_40 @ cardinals
+
+        ordinal_final = pynini.union(ordinals, cardinal_ordinal) # 1 to 39 in ordinal, everything else cardinal
 
         ordinal_graph = (
-            pynutil.insert("integer: \"") + ((graph_digit + ordinals) | (cardinals + ordinals)) + pynutil.insert("\"")
+            pynutil.insert("integer: \"") + ((ordinal_final + ordinals_suffix)) + pynutil.insert("\"")
         )
 
         final_graph = self.add_tokens(ordinal_graph)
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py
index 6a3af3cf4..7baa749f3 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py
@@ -15,7 +15,6 @@
 
 from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.cardinal import CardinalFst
-from nemo_text_processing.inverse_text_normalization.ko.verbalizers.word import WordFst
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.ordinal import OrdinalFst
 
 
@@ -34,10 +33,7 @@ def __init__(self):
         ordinal = OrdinalFst()
         ordinal_graph = ordinal.fst
 
-        word_graph = WordFst().fst
-
         graph = (cardinal_graph
-                 | word_graph
                  | ordinal_graph
         )
         self.fst = graph
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize_final.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize_final.py
index 17f547740..6ba917b35 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize_final.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize_final.py
@@ -13,13 +13,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import os
-
 import pynini
 from pynini.lib import pynutil
 
-from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst, delete_space, generator_main
+from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst, delete_space, delete_extra_space
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.verbalize import VerbalizeFst
+from nemo_text_processing.inverse_text_normalization.ko.verbalizers.word import WordFst
 
 
 class VerbalizeFinalFst(GraphFst):
@@ -28,22 +27,19 @@ class VerbalizeFinalFst(GraphFst):
     tokens { name: "its" } tokens { time { hours: "12" minutes: "30" } } tokens { name: "now" } -> its 12:30 now
     """
 
-    def __init__(self, deterministic: bool = True, cache_dir: str = None, overwrite_cache: bool = False):
-        super().__init__(name="verbalize_final", kind="verbalize", deterministic=deterministic)
-        far_file = None
-        if cache_dir is not None and cache_dir != "None":
-            os.makedirs(cache_dir, exist_ok=True)
-            far_file = os.path.join(cache_dir, f"ko_tn_{deterministic}_deterministic_verbalizer.far")
-        if not overwrite_cache and far_file and os.path.exists(far_file):
-            self.fst = pynini.Far(far_file, mode="r")["verbalize"]
-        else:
-            # token_graph = VerbalizeFst(deterministic=deterministic)
-            token_graph = VerbalizeFst().fst
-            token_verbalizer = (
-                pynutil.delete("tokens {") + delete_space + token_graph + delete_space + pynutil.delete(" }")
-            )
-            verbalizer = pynini.closure(delete_space + token_verbalizer + delete_space)
-
-            self.fst = (verbalizer).optimize()
-            if far_file:
-                generator_main(far_file, {"verbalize": self.fst})
+    def __init__(self):
+        super().__init__(name="verbalize_final", kind="verbalize")
+        verbalize = VerbalizeFst().fst
+        word = WordFst().fst
+        types = verbalize | word
+        graph = (
+            pynutil.delete("tokens")
+            + delete_space
+            + pynutil.delete("{")
+            + delete_space
+            + types
+            + delete_space
+            + pynutil.delete("}")
+        )
+        graph = delete_space + pynini.closure(graph + delete_extra_space) + graph + delete_space
+        self.fst = graph
\ No newline at end of file
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/word.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/word.py
index ecf62bfe3..29f8fb647 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/word.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/word.py
@@ -13,19 +13,22 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import pynini
 from pynini.lib import pynutil
 
-from nemo_text_processing.inverse_text_normalization.ko.graph_utils import NEMO_NOT_QUOTE, GraphFst
+from nemo_text_processing.text_normalization.en.graph_utils import NEMO_CHAR, NEMO_SIGMA, GraphFst, delete_space
 
 
 class WordFst(GraphFst):
-    '''
-    tokens { name: "一" } -> 一
-    '''
+    """
+    Finite state transducer for verbalizing plain tokens
+        e.g. tokens { name: "sleep" } -> sleep
+    """
 
-    def __init__(self, deterministic: bool = True, lm: bool = False):
-        super().__init__(name="word", kind="verbalize", deterministic=deterministic)
-
-        graph = pynutil.delete("name: \"") + NEMO_NOT_QUOTE + pynutil.delete("\"")
+    def __init__(self):
+        super().__init__(name="word", kind="verbalize")
+        chars = pynini.closure(NEMO_CHAR - " ", 1)
+        char = pynutil.delete("name:") + delete_space + pynutil.delete("\"") + chars + pynutil.delete("\"")
+        graph = char @ pynini.cdrewrite(pynini.cross(u"\u00a0", " "), "", "", NEMO_SIGMA)
 
         self.fst = graph.optimize()
diff --git a/tests/nemo_text_processing/ko/data_inverse_text_normalization/test_cases_ordinal.txt b/tests/nemo_text_processing/ko/data_inverse_text_normalization/test_cases_ordinal.txt
index 2caad7dc3..08baa6c97 100644
--- a/tests/nemo_text_processing/ko/data_inverse_text_normalization/test_cases_ordinal.txt
+++ b/tests/nemo_text_processing/ko/data_inverse_text_normalization/test_cases_ordinal.txt
@@ -1,19 +1,17 @@
 영번째~0번째
 첫번째~1번째
 두번째~2번째
-세째~3째
-다섯째~5째
+세번째~3번째
+다섯번째~5번째
 아홉번째~9번째
 열번째~10번째
 열한번째~11번째
-열일곱째~17째
+열일곱번째~17번째
 스무번째~20번째
 스물두번째~22번째
-스물아홉째~29째
+스물아홉번째~29번째
 서른번째~30번째
-서른째~30째
 사십번째~40번째
-사십째~40째
 오십번째~50번째
 오십삼번째~53번째
 백번째~100번째
diff --git a/tests/nemo_text_processing/ko/test_cardinal.py b/tests/nemo_text_processing/ko/test_cardinal.py
index f95d74107..96681fd8b 100644
--- a/tests/nemo_text_processing/ko/test_cardinal.py
+++ b/tests/nemo_text_processing/ko/test_cardinal.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/tests/nemo_text_processing/ko/test_sparrowhawk_inverse_text_normalization.sh b/tests/nemo_text_processing/ko/test_sparrowhawk_inverse_text_normalization.sh
index c44f4a703..5053be55d 100644
--- a/tests/nemo_text_processing/ko/test_sparrowhawk_inverse_text_normalization.sh
+++ b/tests/nemo_text_processing/ko/test_sparrowhawk_inverse_text_normalization.sh
@@ -27,6 +27,11 @@ testITNCardinal() {
   runtest $input
 }
 
+testITNOrdinal() {
+  input=$TEST_DIR/data_inverse_text_normalization/test_cases_ordinal.txt
+  runtest $input
+}
+
 # Remove all command-line arguments
 shift $#
 

From 473f0423c3940197c9ea11f38ee077293c8e447b Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 16 Jun 2025 23:00:18 +0000
Subject: [PATCH 11/29] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../ko/taggers/ordinal.py                     | 39 +++++++++----------
 .../ko/verbalizers/verbalize.py               |  4 +-
 .../ko/verbalizers/verbalize_final.py         |  4 +-
 3 files changed, 22 insertions(+), 25 deletions(-)

diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/ordinal.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/ordinal.py
index 085990f62..1ab598546 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/taggers/ordinal.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/ordinal.py
@@ -34,23 +34,23 @@ def __init__(self, cardinal: GraphFst):
         super().__init__(name="ordinal", kind="classify")
 
         cardinals = cardinal.just_cardinals
-        ordinals_suffix = pynini.accep("번째") #Korean ordinal's morphosyntactic feature
+        ordinals_suffix = pynini.accep("번째")  # Korean ordinal's morphosyntactic feature
 
-        graph_digit = pynini.string_file(get_abs_path("data/ordinals/digit.tsv"))  #1-9
-        graph_digit_no_one = pynini.string_file(get_abs_path("data/ordinals/digit_no_one.tsv"))  #2-9
-        cardinal_1to39 = pynini.string_file(get_abs_path("data/ordinals/cardinal_digit.tsv"))  #1-39 in cardinals
+        graph_digit = pynini.string_file(get_abs_path("data/ordinals/digit.tsv"))  # 1-9
+        graph_digit_no_one = pynini.string_file(get_abs_path("data/ordinals/digit_no_one.tsv"))  # 2-9
+        cardinal_1to39 = pynini.string_file(get_abs_path("data/ordinals/cardinal_digit.tsv"))  # 1-39 in cardinals
 
-        graph_tens_prefix = pynini.cross("열", "1") #First digit for tens
-        graph_twenties_prefix = pynini.cross("스물", "2") #First digit for twenties
-        graph_thirties_prefix = pynini.cross("서른", "3") #First digit for thirties
+        graph_tens_prefix = pynini.cross("열", "1")  # First digit for tens
+        graph_twenties_prefix = pynini.cross("스물", "2")  # First digit for twenties
+        graph_thirties_prefix = pynini.cross("서른", "3")  # First digit for thirties
 
         graph_one = pynini.cross("첫", "1")
-        graph_single = graph_one | graph_digit_no_one 
+        graph_single = graph_one | graph_digit_no_one
         # 1 has a unique ordinal case in Korean and does not repeat for 11, 21, 31
 
         graph_ten = pynini.cross("열", "10")
         graph_tens = graph_ten | graph_tens_prefix + graph_digit
-        
+
         graph_twenty = pynini.cross("스무", "20")
         graph_twenties = graph_twenty | graph_twenties_prefix + graph_digit
 
@@ -58,23 +58,22 @@ def __init__(self, cardinal: GraphFst):
         graph_thirties = graph_thirty | graph_thirties_prefix + graph_digit
 
         ordinals = pynini.union(
-            graph_single,      #1-9
-            graph_tens,        #10-19
-            graph_twenties,    #20-29 
-            graph_thirties     #30-39
+            graph_single, graph_tens, graph_twenties, graph_thirties  # 1-9  # 10-19  # 20-29  # 30-39
         ).optimize()
 
-        cardinals_acceptor = pynini.project(cardinals, "input").optimize() #Input includes all cardinal expressions
-        cardinals_exception = pynini.project(cardinal_1to39, "input").optimize() #Input includes cardinal expression from 1 to 39
+        cardinals_acceptor = pynini.project(cardinals, "input").optimize()  # Input includes all cardinal expressions
+        cardinals_exception = pynini.project(
+            cardinal_1to39, "input"
+        ).optimize()  # Input includes cardinal expression from 1 to 39
 
-        cardinal_plus_40 = pynini.difference(cardinals_acceptor,cardinals_exception).optimize() #All cardinal values - 1 to 39 cardinal values
+        cardinal_plus_40 = pynini.difference(
+            cardinals_acceptor, cardinals_exception
+        ).optimize()  # All cardinal values - 1 to 39 cardinal values
         cardinal_ordinal = cardinal_plus_40 @ cardinals
 
-        ordinal_final = pynini.union(ordinals, cardinal_ordinal) # 1 to 39 in ordinal, everything else cardinal
+        ordinal_final = pynini.union(ordinals, cardinal_ordinal)  # 1 to 39 in ordinal, everything else cardinal
 
-        ordinal_graph = (
-            pynutil.insert("integer: \"") + ((ordinal_final + ordinals_suffix)) + pynutil.insert("\"")
-        )
+        ordinal_graph = pynutil.insert("integer: \"") + ((ordinal_final + ordinals_suffix)) + pynutil.insert("\"")
 
         final_graph = self.add_tokens(ordinal_graph)
         self.fst = final_graph.optimize()
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py
index b5a3d6dc1..7a2bd341c 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py
@@ -34,7 +34,5 @@ def __init__(self):
         ordinal = OrdinalFst()
         ordinal_graph = ordinal.fst
 
-        graph = (cardinal_graph
-                 | ordinal_graph
-        )
+        graph = cardinal_graph | ordinal_graph
         self.fst = graph
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize_final.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize_final.py
index 6ba917b35..6bcca5fb8 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize_final.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize_final.py
@@ -16,7 +16,7 @@
 import pynini
 from pynini.lib import pynutil
 
-from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst, delete_space, delete_extra_space
+from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst, delete_extra_space, delete_space
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.verbalize import VerbalizeFst
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.word import WordFst
 
@@ -42,4 +42,4 @@ def __init__(self):
             + pynutil.delete("}")
         )
         graph = delete_space + pynini.closure(graph + delete_extra_space) + graph + delete_space
-        self.fst = graph
\ No newline at end of file
+        self.fst = graph

From 6d25ac95da8291864f00020d480e2cae323d2df7 Mon Sep 17 00:00:00 2001
From: hmlee245 <hmlee245@gmail.com>
Date: Tue, 24 Jun 2025 17:19:41 -0700
Subject: [PATCH 12/29] Deleting unnecessary data files and rules

Signed-off-by: hmlee245 <hmlee245@gmail.com>
---
 .../ko/data/ordinals/cardinal_digit.tsv       | 39 -------------------
 .../ko/data/ordinals/digit_no_one.tsv         |  8 ----
 .../ko/taggers/ordinal.py                     | 39 ++++++++++++++-----
 3 files changed, 29 insertions(+), 57 deletions(-)
 delete mode 100644 nemo_text_processing/inverse_text_normalization/ko/data/ordinals/cardinal_digit.tsv
 delete mode 100644 nemo_text_processing/inverse_text_normalization/ko/data/ordinals/digit_no_one.tsv

diff --git a/nemo_text_processing/inverse_text_normalization/ko/data/ordinals/cardinal_digit.tsv b/nemo_text_processing/inverse_text_normalization/ko/data/ordinals/cardinal_digit.tsv
deleted file mode 100644
index 19e188ac6..000000000
--- a/nemo_text_processing/inverse_text_normalization/ko/data/ordinals/cardinal_digit.tsv
+++ /dev/null
@@ -1,39 +0,0 @@
-일	1
-이	2
-삼	3
-사	4
-오	5
-육	6
-칠	7
-팔	8
-구	9
-십	10
-십일	11
-십이	12
-십삼	13
-십사	14
-십오	15
-십육	16
-십칠	17
-십팔	18
-십구	19
-이십	20
-이십일	21
-이십이	22
-이십삼	23
-이십사	24
-이십오	25
-이십육	26
-이십칠	27
-이십팔	28
-이십구	29
-삼십	30
-삼십일	31
-삼십이	32
-삼십삼	33
-삼십사	34
-삼십오	35
-삼십육	36
-삼십칠	37
-삼십팔	38
-삼십구	39
\ No newline at end of file
diff --git a/nemo_text_processing/inverse_text_normalization/ko/data/ordinals/digit_no_one.tsv b/nemo_text_processing/inverse_text_normalization/ko/data/ordinals/digit_no_one.tsv
deleted file mode 100644
index 00ab6d0b4..000000000
--- a/nemo_text_processing/inverse_text_normalization/ko/data/ordinals/digit_no_one.tsv
+++ /dev/null
@@ -1,8 +0,0 @@
-두	2
-세	3
-네	4
-다섯	5
-여섯	6
-일곱	7
-여덟	8
-아홉	9
\ No newline at end of file
diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/ordinal.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/ordinal.py
index 085990f62..a8b6e9842 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/taggers/ordinal.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/ordinal.py
@@ -36,17 +36,22 @@ def __init__(self, cardinal: GraphFst):
         cardinals = cardinal.just_cardinals
         ordinals_suffix = pynini.accep("번째") #Korean ordinal's morphosyntactic feature
 
-        graph_digit = pynini.string_file(get_abs_path("data/ordinals/digit.tsv"))  #1-9
-        graph_digit_no_one = pynini.string_file(get_abs_path("data/ordinals/digit_no_one.tsv"))  #2-9
-        cardinal_1to39 = pynini.string_file(get_abs_path("data/ordinals/cardinal_digit.tsv"))  #1-39 in cardinals
+        graph_digit = pynini.string_file(get_abs_path("data/ordinals/digit.tsv"))  #1-9 in ordinals
+        cardinal_digit = pynini.string_file(get_abs_path("data/numbers/digit.tsv"))  #1-9 in cardinals
 
         graph_tens_prefix = pynini.cross("열", "1") #First digit for tens
         graph_twenties_prefix = pynini.cross("스물", "2") #First digit for twenties
         graph_thirties_prefix = pynini.cross("서른", "3") #First digit for thirties
 
-        graph_one = pynini.cross("첫", "1")
-        graph_single = graph_one | graph_digit_no_one 
-        # 1 has a unique ordinal case in Korean and does not repeat for 11, 21, 31
+        graph_one = pynini.cross("한", "1")
+        single_digits = pynini.project(graph_digit, "input").optimize()
+        graph_one_acceptor = pynini.project(graph_one, "input").optimize()
+        two_to_nine = pynini.difference(single_digits,graph_one_acceptor).optimize()
+        graph_two_to_nine = two_to_nine @ graph_digit
+        graph_first = pynini.cross("첫", "1")
+        graph_single = graph_two_to_nine | graph_first
+        # Line 46-52 exclude regular 1 in ordinal and replace with a special 1. Like "first" in English
+        # The special 1 is a unique ordinal case for Korean and does not repeat for 11, 21, 31
 
         graph_ten = pynini.cross("열", "10")
         graph_tens = graph_ten | graph_tens_prefix + graph_digit
@@ -64,13 +69,27 @@ def __init__(self, cardinal: GraphFst):
             graph_thirties     #30-39
         ).optimize()
 
+        cardinal_10_to_19 = pynini.cross("십", "10") | (pynini.accep("십") + cardinal_digit)
+
+        cardinal_20_to_29 = pynini.cross("이십", "20") | (pynini.accep("이십") + cardinal_digit)
+    
+        cardinal_30_to_39 = pynini.cross("삼십", "30") | (pynini.accep("삼십") + cardinal_digit)
+
+        cardinal_below_40 = pynini.union(
+            cardinal_digit,
+            cardinal_10_to_19,
+            cardinal_20_to_29,
+            cardinal_30_to_39
+        ).optimize()
+        # FST that include 1-39 in cardinal expression
+
         cardinals_acceptor = pynini.project(cardinals, "input").optimize() #Input includes all cardinal expressions
-        cardinals_exception = pynini.project(cardinal_1to39, "input").optimize() #Input includes cardinal expression from 1 to 39
+        cardinals_exception = pynini.project(cardinal_below_40, "input").optimize() #Input includes cardinal expression from 1 to 39
 
-        cardinal_plus_40 = pynini.difference(cardinals_acceptor,cardinals_exception).optimize() #All cardinal values - 1 to 39 cardinal values
-        cardinal_ordinal = cardinal_plus_40 @ cardinals
+        cardinal_over_40 = pynini.difference(cardinals_acceptor,cardinals_exception).optimize() #All cardinal values except 1 to 39 cardinal values
+        cardinal_ordinal_suffix = cardinal_over_40 @ cardinals
 
-        ordinal_final = pynini.union(ordinals, cardinal_ordinal) # 1 to 39 in ordinal, everything else cardinal
+        ordinal_final = pynini.union(ordinals, cardinal_ordinal_suffix) # 1 to 39 in ordinal, everything else cardinal
 
         ordinal_graph = (
             pynutil.insert("integer: \"") + ((ordinal_final + ordinals_suffix)) + pynutil.insert("\"")

From 2c4574b6f58093f2a98e4da9c685d2467d60eada Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 25 Jun 2025 00:26:20 +0000
Subject: [PATCH 13/29] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../ko/taggers/ordinal.py                     | 25 ++++++++++---------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/ordinal.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/ordinal.py
index e62e51da6..f5171c3a5 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/taggers/ordinal.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/ordinal.py
@@ -36,8 +36,8 @@ def __init__(self, cardinal: GraphFst):
         cardinals = cardinal.just_cardinals
         ordinals_suffix = pynini.accep("번째")  # Korean ordinal's morphosyntactic feature
 
-        graph_digit = pynini.string_file(get_abs_path("data/ordinals/digit.tsv"))  #1-9 in ordinals
-        cardinal_digit = pynini.string_file(get_abs_path("data/numbers/digit.tsv"))  #1-9 in cardinals
+        graph_digit = pynini.string_file(get_abs_path("data/ordinals/digit.tsv"))  # 1-9 in ordinals
+        cardinal_digit = pynini.string_file(get_abs_path("data/numbers/digit.tsv"))  # 1-9 in cardinals
 
         graph_tens_prefix = pynini.cross("열", "1")  # First digit for tens
         graph_twenties_prefix = pynini.cross("스물", "2")  # First digit for twenties
@@ -46,7 +46,7 @@ def __init__(self, cardinal: GraphFst):
         graph_one = pynini.cross("한", "1")
         single_digits = pynini.project(graph_digit, "input").optimize()
         graph_one_acceptor = pynini.project(graph_one, "input").optimize()
-        two_to_nine = pynini.difference(single_digits,graph_one_acceptor).optimize()
+        two_to_nine = pynini.difference(single_digits, graph_one_acceptor).optimize()
         graph_two_to_nine = two_to_nine @ graph_digit
         graph_first = pynini.cross("첫", "1")
         graph_single = graph_two_to_nine | graph_first
@@ -69,24 +69,25 @@ def __init__(self, cardinal: GraphFst):
         cardinal_10_to_19 = pynini.cross("십", "10") | (pynini.accep("십") + cardinal_digit)
 
         cardinal_20_to_29 = pynini.cross("이십", "20") | (pynini.accep("이십") + cardinal_digit)
-    
+
         cardinal_30_to_39 = pynini.cross("삼십", "30") | (pynini.accep("삼십") + cardinal_digit)
 
         cardinal_below_40 = pynini.union(
-            cardinal_digit,
-            cardinal_10_to_19,
-            cardinal_20_to_29,
-            cardinal_30_to_39
+            cardinal_digit, cardinal_10_to_19, cardinal_20_to_29, cardinal_30_to_39
         ).optimize()
         # FST that include 1-39 in cardinal expression
 
-        cardinals_acceptor = pynini.project(cardinals, "input").optimize() #Input includes all cardinal expressions
-        cardinals_exception = pynini.project(cardinal_below_40, "input").optimize() #Input includes cardinal expression from 1 to 39
+        cardinals_acceptor = pynini.project(cardinals, "input").optimize()  # Input includes all cardinal expressions
+        cardinals_exception = pynini.project(
+            cardinal_below_40, "input"
+        ).optimize()  # Input includes cardinal expression from 1 to 39
 
-        cardinal_over_40 = pynini.difference(cardinals_acceptor,cardinals_exception).optimize() #All cardinal values except 1 to 39 cardinal values
+        cardinal_over_40 = pynini.difference(
+            cardinals_acceptor, cardinals_exception
+        ).optimize()  # All cardinal values except 1 to 39 cardinal values
         cardinal_ordinal_suffix = cardinal_over_40 @ cardinals
 
-        ordinal_final = pynini.union(ordinals, cardinal_ordinal_suffix) # 1 to 39 in ordinal, everything else cardinal
+        ordinal_final = pynini.union(ordinals, cardinal_ordinal_suffix)  # 1 to 39 in ordinal, everything else cardinal
 
         ordinal_graph = pynutil.insert("integer: \"") + ((ordinal_final + ordinals_suffix)) + pynutil.insert("\"")
 

From bc73bb78be5ea04284a3ee33b8eeeaeb7dccea1e Mon Sep 17 00:00:00 2001
From: hmlee245 <hmlee245@gmail.com>
Date: Thu, 3 Jul 2025 11:02:18 -0700
Subject: [PATCH 14/29] Adding decimal to the PR

Signed-off-by: hmlee245 <hmlee245@gmail.com>
---
 .../ko/taggers/decimal.py                     | 69 +++++++++++++++++++
 .../ko/taggers/tokenize_and_classify.py       |  5 ++
 .../ko/verbalizers/decimal.py                 | 59 ++++++++++++++++
 .../ko/verbalizers/verbalize.py               |  6 +-
 .../test_cases_decimal.txt                    | 14 ++++
 tests/nemo_text_processing/ko/test_decimal.py | 32 +++++++++
 ..._sparrowhawk_inverse_text_normalization.sh |  5 ++
 7 files changed, 189 insertions(+), 1 deletion(-)
 create mode 100644 nemo_text_processing/inverse_text_normalization/ko/taggers/decimal.py
 create mode 100644 nemo_text_processing/inverse_text_normalization/ko/verbalizers/decimal.py
 create mode 100644 tests/nemo_text_processing/ko/data_inverse_text_normalization/test_cases_decimal.txt
 create mode 100644 tests/nemo_text_processing/ko/test_decimal.py

diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/decimal.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/decimal.py
new file mode 100644
index 000000000..b2b82c32b
--- /dev/null
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/decimal.py
@@ -0,0 +1,69 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import pynini
+from pynini.lib import pynutil
+
+from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst
+from nemo_text_processing.inverse_text_normalization.ko.utils import get_abs_path
+
+
+def get_quantity(decimal):
+    suffix = pynini.union("만", "억", "조", "경")
+    numbers = decimal
+    res = numbers + pynutil.insert(' quantity: "') + suffix + pynutil.insert('"')
+
+    return res
+
+
+class DecimalFst(GraphFst):
+    """
+    Finite state transducer for classifying decimal
+        e.g. 일점오 -> decimnl { integer_part: "1" fractional_part: "5" }
+        e.g. 일점오만 -> decimal { integer_part: "1" fractional_part: "5" quantity: "만" }
+    """
+
+    def __init__(self, cardinal: GraphFst):
+        super().__init__(name="decimal", kind="classify")
+
+        cardinals = cardinal.just_cardinals
+        graph_zero = pynini.string_file(get_abs_path("data/numbers/zero.tsv"))
+        graph_digit = pynini.string_file(get_abs_path("data/numbers/digit.tsv"))
+        decimal_part = pynini.closure(graph_zero | graph_digit)
+
+        decimal_point = pynutil.delete("점")
+        integer_part = pynutil.insert("integer_part: \"") + cardinals + pynutil.insert("\"")
+        fractional_part = pynutil.insert("fractional_part: \"") + decimal_part + pynutil.insert("\"")
+
+        graph_decimal_regular = integer_part + decimal_point + pynutil.insert(" ") + fractional_part #Regular decimal like 1.5
+        graph_deicimal_larger = get_quantity(graph_decimal_regular) #If decimal is used to express big numbers like  15000 -> "1.5만"
+        
+
+        self.decimal = graph_decimal_regular | graph_deicimal_larger
+        self.just_decimal = cardinals + pynini.cross("점", ".") + decimal_part
+
+        graph_sign = (
+            pynutil.insert("negative: \"") + (pynini.cross("마이너스", "-") | pynini.accep("-")) + pynutil.insert("\"")
+        )
+
+        final_graph = (
+            (graph_sign + pynutil.insert(" ") + graph_decimal_regular)
+            | (graph_sign + pynutil.insert(" ") + graph_deicimal_larger)
+            | graph_decimal_regular
+            | graph_deicimal_larger
+        )
+
+        final_graph = self.add_tokens(final_graph)
+        self.fst = final_graph.optimize()
diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py
index df5f330f5..3689d69e3 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py
@@ -23,6 +23,7 @@
 from nemo_text_processing.inverse_text_normalization.ko.taggers.cardinal import CardinalFst
 from nemo_text_processing.inverse_text_normalization.ko.taggers.ordinal import OrdinalFst
 from nemo_text_processing.inverse_text_normalization.ko.taggers.word import WordFst
+from nemo_text_processing.inverse_text_normalization.ko.taggers.decimal import DecimalFst
 
 
 class ClassifyFst(GraphFst):
@@ -63,11 +64,15 @@ def __init__(
             ordinal = OrdinalFst(cardinal)
             ordinal_graph = ordinal.fst
 
+            decimal = DecimalFst(cardinal)
+            decimal_graph = decimal.fst
+
             word_graph = WordFst().fst
 
             classify = (
                 pynutil.add_weight(cardinal_graph, 1.1)
                 | pynutil.add_weight(ordinal_graph, 1.1)
+                | pynutil.add_weight(decimal_graph, 1.1)
                 | pynutil.add_weight(word_graph, 100)
             )
 
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/decimal.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/decimal.py
new file mode 100644
index 000000000..f59bd6a10
--- /dev/null
+++ b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/decimal.py
@@ -0,0 +1,59 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import pynini
+from pynini.lib import pynutil
+
+from nemo_text_processing.inverse_text_normalization.ko.graph_utils import NEMO_NOT_QUOTE, GraphFst
+
+
+class DecimalFst(GraphFst):
+    """
+    Finite state transducer for verbalizing decimal
+        e.g.  decimal { integer_part: "1" fractional_part: "5" } -> 1.5
+        e.g.  decimal { integer_part: "1" fractional_part: "5" quantity: "만" } -> 1.5만
+    """
+
+    def __init__(self):
+        super().__init__(name="decimal", kind="verbalize")
+
+        decimal_point = pynutil.insert(".")
+        integer_part = pynutil.delete("integer_part: \"") + pynini.closure(NEMO_NOT_QUOTE) + pynutil.delete("\"")
+        fractional_part = (
+            pynutil.delete("fractional_part: \"") + pynini.closure(NEMO_NOT_QUOTE) + pynutil.delete("\"")
+        )
+        quantity_part = pynutil.delete("quantity: \"") + pynini.closure(NEMO_NOT_QUOTE) + pynutil.delete("\"")
+
+        graph_decimal = integer_part + decimal_point + pynutil.delete(" ") + fractional_part
+        graph_decimal_larger = (
+            integer_part
+            + decimal_point
+            + pynutil.delete(" ")
+            + fractional_part
+            + pynutil.delete(" ")
+            + quantity_part
+        )
+
+        graph_sign = pynutil.delete("negative: \"") + pynini.closure(NEMO_NOT_QUOTE) + pynutil.delete("\"")
+
+        graph = (
+            graph_decimal
+            | graph_decimal_larger
+            | (graph_sign + pynutil.delete(" ") + graph_decimal)
+            | (graph_sign + pynutil.delete(" ") + graph_decimal_larger)
+        )
+
+        final_graph = self.delete_tokens(graph)
+        self.fst = final_graph.optimize()
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py
index 7a2bd341c..98dce0dc4 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py
@@ -16,6 +16,7 @@
 from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.cardinal import CardinalFst
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.ordinal import OrdinalFst
+from nemo_text_processing.inverse_text_normalization.ko.verbalizers.decimal import DecimalFst
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.word import WordFst
 
 
@@ -34,5 +35,8 @@ def __init__(self):
         ordinal = OrdinalFst()
         ordinal_graph = ordinal.fst
 
-        graph = cardinal_graph | ordinal_graph
+        decimal = DecimalFst()
+        decimal_graph = decimal.fst
+
+        graph = cardinal_graph | ordinal_graph | decimal_graph
         self.fst = graph
diff --git a/tests/nemo_text_processing/ko/data_inverse_text_normalization/test_cases_decimal.txt b/tests/nemo_text_processing/ko/data_inverse_text_normalization/test_cases_decimal.txt
new file mode 100644
index 000000000..63d023168
--- /dev/null
+++ b/tests/nemo_text_processing/ko/data_inverse_text_normalization/test_cases_decimal.txt
@@ -0,0 +1,14 @@
+일점삼~1.3
+영점오~0.5
+십점오~10.5
+이십삼점사~23.4
+백점일~100.1
+일점이삼~1.23
+영점오육칠~0.567
+구십구점구구~99.99
+일점영삼~1.03
+영점영영일~0.001
+천이백삼십사점오육~1234.56
+일점오만~1.5만
+일점오억~1.5억
+일점오경~1.5경
\ No newline at end of file
diff --git a/tests/nemo_text_processing/ko/test_decimal.py b/tests/nemo_text_processing/ko/test_decimal.py
new file mode 100644
index 000000000..733139df0
--- /dev/null
+++ b/tests/nemo_text_processing/ko/test_decimal.py
@@ -0,0 +1,32 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pytest
+from parameterized import parameterized
+
+from nemo_text_processing.inverse_text_normalization.inverse_normalize import InverseNormalizer
+from nemo_text_processing.text_normalization.normalize import Normalizer
+
+from ..utils import CACHE_DIR, parse_test_case_file
+
+
+class TestDecimal:
+    inverse_normalizer = InverseNormalizer(lang='ko', cache_dir=CACHE_DIR, overwrite_cache=False)
+
+    @parameterized.expand(parse_test_case_file('ko/data_inverse_text_normalization/test_cases_decimal.txt'))
+    @pytest.mark.run_only_on('CPU')
+    @pytest.mark.unit
+    def test_denorm(self, test_input, expected):
+        pred = self.inverse_normalizer.inverse_normalize(test_input, verbose=False)
+        assert pred == expected
diff --git a/tests/nemo_text_processing/ko/test_sparrowhawk_inverse_text_normalization.sh b/tests/nemo_text_processing/ko/test_sparrowhawk_inverse_text_normalization.sh
index 5053be55d..7927877b8 100644
--- a/tests/nemo_text_processing/ko/test_sparrowhawk_inverse_text_normalization.sh
+++ b/tests/nemo_text_processing/ko/test_sparrowhawk_inverse_text_normalization.sh
@@ -32,6 +32,11 @@ testITNOrdinal() {
   runtest $input
 }
 
+testITNDecimal() {
+  input=$TEST_DIR/data_inverse_text_normalization/test_cases_decimal.txt
+  runtest $input
+}
+
 # Remove all command-line arguments
 shift $#
 

From 68a69069efdc7557d36cafbd7550523e533a26f1 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 3 Jul 2025 18:06:21 +0000
Subject: [PATCH 15/29] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../inverse_text_normalization/ko/taggers/decimal.py  |  9 ++++++---
 .../ko/taggers/tokenize_and_classify.py               |  2 +-
 .../ko/verbalizers/decimal.py                         | 11 ++---------
 .../ko/verbalizers/verbalize.py                       |  2 +-
 4 files changed, 10 insertions(+), 14 deletions(-)

diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/decimal.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/decimal.py
index b2b82c32b..98ba5cef1 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/taggers/decimal.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/decimal.py
@@ -47,9 +47,12 @@ def __init__(self, cardinal: GraphFst):
         integer_part = pynutil.insert("integer_part: \"") + cardinals + pynutil.insert("\"")
         fractional_part = pynutil.insert("fractional_part: \"") + decimal_part + pynutil.insert("\"")
 
-        graph_decimal_regular = integer_part + decimal_point + pynutil.insert(" ") + fractional_part #Regular decimal like 1.5
-        graph_deicimal_larger = get_quantity(graph_decimal_regular) #If decimal is used to express big numbers like  15000 -> "1.5만"
-        
+        graph_decimal_regular = (
+            integer_part + decimal_point + pynutil.insert(" ") + fractional_part
+        )  # Regular decimal like 1.5
+        graph_deicimal_larger = get_quantity(
+            graph_decimal_regular
+        )  # If decimal is used to express big numbers like  15000 -> "1.5만"
 
         self.decimal = graph_decimal_regular | graph_deicimal_larger
         self.just_decimal = cardinals + pynini.cross("점", ".") + decimal_part
diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py
index 3689d69e3..5753e4b66 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py
@@ -21,9 +21,9 @@
 
 from nemo_text_processing.inverse_text_normalization.ko.graph_utils import INPUT_LOWER_CASED, GraphFst, generator_main
 from nemo_text_processing.inverse_text_normalization.ko.taggers.cardinal import CardinalFst
+from nemo_text_processing.inverse_text_normalization.ko.taggers.decimal import DecimalFst
 from nemo_text_processing.inverse_text_normalization.ko.taggers.ordinal import OrdinalFst
 from nemo_text_processing.inverse_text_normalization.ko.taggers.word import WordFst
-from nemo_text_processing.inverse_text_normalization.ko.taggers.decimal import DecimalFst
 
 
 class ClassifyFst(GraphFst):
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/decimal.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/decimal.py
index f59bd6a10..65f225f45 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/decimal.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/decimal.py
@@ -31,19 +31,12 @@ def __init__(self):
 
         decimal_point = pynutil.insert(".")
         integer_part = pynutil.delete("integer_part: \"") + pynini.closure(NEMO_NOT_QUOTE) + pynutil.delete("\"")
-        fractional_part = (
-            pynutil.delete("fractional_part: \"") + pynini.closure(NEMO_NOT_QUOTE) + pynutil.delete("\"")
-        )
+        fractional_part = pynutil.delete("fractional_part: \"") + pynini.closure(NEMO_NOT_QUOTE) + pynutil.delete("\"")
         quantity_part = pynutil.delete("quantity: \"") + pynini.closure(NEMO_NOT_QUOTE) + pynutil.delete("\"")
 
         graph_decimal = integer_part + decimal_point + pynutil.delete(" ") + fractional_part
         graph_decimal_larger = (
-            integer_part
-            + decimal_point
-            + pynutil.delete(" ")
-            + fractional_part
-            + pynutil.delete(" ")
-            + quantity_part
+            integer_part + decimal_point + pynutil.delete(" ") + fractional_part + pynutil.delete(" ") + quantity_part
         )
 
         graph_sign = pynutil.delete("negative: \"") + pynini.closure(NEMO_NOT_QUOTE) + pynutil.delete("\"")
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py
index 98dce0dc4..5d5a01b3c 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py
@@ -15,8 +15,8 @@
 
 from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.cardinal import CardinalFst
-from nemo_text_processing.inverse_text_normalization.ko.verbalizers.ordinal import OrdinalFst
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.decimal import DecimalFst
+from nemo_text_processing.inverse_text_normalization.ko.verbalizers.ordinal import OrdinalFst
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.word import WordFst
 
 

From 1e313354dda96a1da9c4f31f89da580ed87c172b Mon Sep 17 00:00:00 2001
From: hmlee245 <hmlee245@gmail.com>
Date: Wed, 9 Jul 2025 16:25:12 -0700
Subject: [PATCH 16/29] Adding counter suffixes for Korean ordinal and its test
 cases

Signed-off-by: hmlee245 <hmlee245@gmail.com>
---
 .../ko/taggers/cardinal.py                    |  3 +--
 .../ko/taggers/ordinal.py                     | 21 +++++++++++++++++--
 .../ko/verbalizers/ordinal.py                 | 11 +++++++---
 .../ko/verbalizers/word.py                    |  2 +-
 .../test_cases_ordinal.txt                    |  8 +++++++
 5 files changed, 37 insertions(+), 8 deletions(-)

diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/cardinal.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/cardinal.py
index 5987a9771..c8202475d 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/taggers/cardinal.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/cardinal.py
@@ -1,5 +1,4 @@
 # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
-# Copyright 2015 and onwards Google, Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -91,7 +90,7 @@ def __init__(self):
         leading_zero = (
             pynutil.delete(pynini.closure("0")) + pynini.difference(NEMO_DIGIT, "0") + pynini.closure(NEMO_DIGIT)
         )
-        graph = graph @ leading_zero | graph_zero
+        graph = (graph @ leading_zero) | graph_zero
 
         self.just_cardinals = graph
 
diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/ordinal.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/ordinal.py
index f5171c3a5..e59301d0c 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/taggers/ordinal.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/ordinal.py
@@ -1,5 +1,4 @@
 # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
-# Copyright 2015 and onwards Google, Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -20,6 +19,12 @@
 from nemo_text_processing.inverse_text_normalization.ko.graph_utils import NEMO_CHAR, GraphFst
 from nemo_text_processing.inverse_text_normalization.ko.utils import get_abs_path
 
+def get_counter(ordinal):
+    suffix = pynini.union("개", "명", "병", "마리", "대", "송이", "포기", "사람", "자루", "채", "켤레", "그루", "벌", "잔", "장", "권", "살")
+    numbers = ordinal
+    res = numbers + pynutil.insert('" counter: "') + suffix 
+
+    return res
 
 class OrdinalFst(GraphFst):
     """
@@ -91,5 +96,17 @@ def __init__(self, cardinal: GraphFst):
 
         ordinal_graph = pynutil.insert("integer: \"") + ((ordinal_final + ordinals_suffix)) + pynutil.insert("\"")
 
-        final_graph = self.add_tokens(ordinal_graph)
+        #Adding various counter suffix for ordinal
+        counters = pynini.union(
+            graph_digit, graph_tens, graph_twenties, graph_thirties
+        ).optimize()
+        # For counting, Korean does not use the speical "첫" for 1. Instead the regular "한"
+
+        counter_final = (get_counter(counters) | get_counter(cardinal_ordinal_suffix))
+
+        counter_graph = pynutil.insert("integer: \"") + counter_final + pynutil.insert("\"")
+
+        final_graph = (ordinal_graph | counter_graph)
+
+        final_graph = self.add_tokens(final_graph)
         self.fst = final_graph.optimize()
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/ordinal.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/ordinal.py
index b857a3be0..13c5cb55f 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/ordinal.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/ordinal.py
@@ -16,7 +16,7 @@
 import pynini
 from pynini.lib import pynutil
 
-from nemo_text_processing.inverse_text_normalization.ko.graph_utils import NEMO_NOT_QUOTE, GraphFst
+from nemo_text_processing.inverse_text_normalization.ko.graph_utils import NEMO_NOT_QUOTE, GraphFst, delete_space
 
 
 class OrdinalFst(GraphFst):
@@ -31,6 +31,11 @@ def __init__(self):
         super().__init__(name="ordinal", kind="verbalize")
 
         integer_component = pynutil.delete("integer: \"") + pynini.closure(NEMO_NOT_QUOTE) + pynutil.delete("\"")
+        counter_component = pynutil.delete("counter: \"") + pynini.closure(NEMO_NOT_QUOTE) + pynutil.delete("\"")
 
-        final_graph = self.delete_tokens(integer_component)
-        self.fst = final_graph.optimize()
+        graph_with_counter = (integer_component + delete_space + counter_component)
+
+        ordinal_verbalizer = pynini.union(graph_with_counter, integer_component)
+
+        final_graph = self.delete_tokens(ordinal_verbalizer)
+        self.fst = final_graph.optimize()
\ No newline at end of file
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/word.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/word.py
index 29f8fb647..6bdd7e55d 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/word.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/word.py
@@ -16,7 +16,7 @@
 import pynini
 from pynini.lib import pynutil
 
-from nemo_text_processing.text_normalization.en.graph_utils import NEMO_CHAR, NEMO_SIGMA, GraphFst, delete_space
+from nemo_text_processing.inverse_text_normalization.ko.graph_utils import NEMO_CHAR, NEMO_SIGMA, GraphFst, delete_space
 
 
 class WordFst(GraphFst):
diff --git a/tests/nemo_text_processing/ko/data_inverse_text_normalization/test_cases_ordinal.txt b/tests/nemo_text_processing/ko/data_inverse_text_normalization/test_cases_ordinal.txt
index 08baa6c97..8dfc77823 100644
--- a/tests/nemo_text_processing/ko/data_inverse_text_normalization/test_cases_ordinal.txt
+++ b/tests/nemo_text_processing/ko/data_inverse_text_normalization/test_cases_ordinal.txt
@@ -15,3 +15,11 @@
 오십번째~50번째
 오십삼번째~53번째
 백번째~100번째
+한개~1개
+한마리~1마리
+열병~10병
+스물한송이~21송이
+사십그루~40그루
+여섯사람~6사람
+열다섯장~15장
+서른일곱권~37권
\ No newline at end of file

From 53dc07b5fa60b28f2c7c1f32ea6758252e21cb41 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 9 Jul 2025 23:27:56 +0000
Subject: [PATCH 17/29] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../ko/taggers/ordinal.py                     | 34 ++++++++++++++-----
 .../ko/verbalizers/ordinal.py                 |  4 +--
 .../ko/verbalizers/word.py                    |  7 +++-
 3 files changed, 34 insertions(+), 11 deletions(-)

diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/ordinal.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/ordinal.py
index e59301d0c..9c01bdfca 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/taggers/ordinal.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/ordinal.py
@@ -19,13 +19,33 @@
 from nemo_text_processing.inverse_text_normalization.ko.graph_utils import NEMO_CHAR, GraphFst
 from nemo_text_processing.inverse_text_normalization.ko.utils import get_abs_path
 
+
 def get_counter(ordinal):
-    suffix = pynini.union("개", "명", "병", "마리", "대", "송이", "포기", "사람", "자루", "채", "켤레", "그루", "벌", "잔", "장", "권", "살")
+    suffix = pynini.union(
+        "개",
+        "명",
+        "병",
+        "마리",
+        "대",
+        "송이",
+        "포기",
+        "사람",
+        "자루",
+        "채",
+        "켤레",
+        "그루",
+        "벌",
+        "잔",
+        "장",
+        "권",
+        "살",
+    )
     numbers = ordinal
-    res = numbers + pynutil.insert('" counter: "') + suffix 
+    res = numbers + pynutil.insert('" counter: "') + suffix
 
     return res
 
+
 class OrdinalFst(GraphFst):
     """
     Finite state transducer for classifying ordinal
@@ -96,17 +116,15 @@ def __init__(self, cardinal: GraphFst):
 
         ordinal_graph = pynutil.insert("integer: \"") + ((ordinal_final + ordinals_suffix)) + pynutil.insert("\"")
 
-        #Adding various counter suffix for ordinal
-        counters = pynini.union(
-            graph_digit, graph_tens, graph_twenties, graph_thirties
-        ).optimize()
+        # Adding various counter suffix for ordinal
+        counters = pynini.union(graph_digit, graph_tens, graph_twenties, graph_thirties).optimize()
         # For counting, Korean does not use the speical "첫" for 1. Instead the regular "한"
 
-        counter_final = (get_counter(counters) | get_counter(cardinal_ordinal_suffix))
+        counter_final = get_counter(counters) | get_counter(cardinal_ordinal_suffix)
 
         counter_graph = pynutil.insert("integer: \"") + counter_final + pynutil.insert("\"")
 
-        final_graph = (ordinal_graph | counter_graph)
+        final_graph = ordinal_graph | counter_graph
 
         final_graph = self.add_tokens(final_graph)
         self.fst = final_graph.optimize()
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/ordinal.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/ordinal.py
index 13c5cb55f..f8f106734 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/ordinal.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/ordinal.py
@@ -33,9 +33,9 @@ def __init__(self):
         integer_component = pynutil.delete("integer: \"") + pynini.closure(NEMO_NOT_QUOTE) + pynutil.delete("\"")
         counter_component = pynutil.delete("counter: \"") + pynini.closure(NEMO_NOT_QUOTE) + pynutil.delete("\"")
 
-        graph_with_counter = (integer_component + delete_space + counter_component)
+        graph_with_counter = integer_component + delete_space + counter_component
 
         ordinal_verbalizer = pynini.union(graph_with_counter, integer_component)
 
         final_graph = self.delete_tokens(ordinal_verbalizer)
-        self.fst = final_graph.optimize()
\ No newline at end of file
+        self.fst = final_graph.optimize()
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/word.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/word.py
index 6bdd7e55d..226b41e08 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/word.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/word.py
@@ -16,7 +16,12 @@
 import pynini
 from pynini.lib import pynutil
 
-from nemo_text_processing.inverse_text_normalization.ko.graph_utils import NEMO_CHAR, NEMO_SIGMA, GraphFst, delete_space
+from nemo_text_processing.inverse_text_normalization.ko.graph_utils import (
+    NEMO_CHAR,
+    NEMO_SIGMA,
+    GraphFst,
+    delete_space,
+)
 
 
 class WordFst(GraphFst):

From fcfc6c5fb586e35014615b881f1d60ec73f7061b Mon Sep 17 00:00:00 2001
From: hmlee245 <hmlee245@gmail.com>
Date: Mon, 21 Jul 2025 16:53:09 -0700
Subject: [PATCH 18/29] Fixing minor comments error for newly added ordinal
 suffix

Signed-off-by: hmlee245 <hmlee245@gmail.com>
---
 .../ko/data/ordinals/counter_suffix.tsv       | 17 +++++++++++++
 .../ko/taggers/decimal.py                     |  9 +++----
 .../ko/taggers/ordinal.py                     | 25 +++++++++++--------
 3 files changed, 35 insertions(+), 16 deletions(-)
 create mode 100644 nemo_text_processing/inverse_text_normalization/ko/data/ordinals/counter_suffix.tsv

diff --git a/nemo_text_processing/inverse_text_normalization/ko/data/ordinals/counter_suffix.tsv b/nemo_text_processing/inverse_text_normalization/ko/data/ordinals/counter_suffix.tsv
new file mode 100644
index 000000000..e240760ed
--- /dev/null
+++ b/nemo_text_processing/inverse_text_normalization/ko/data/ordinals/counter_suffix.tsv
@@ -0,0 +1,17 @@
+개
+명
+병
+마리
+대
+송이
+포기
+사람
+자루
+채
+켤레
+그루
+벌
+잔
+장
+권
+살
\ No newline at end of file
diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/decimal.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/decimal.py
index 98ba5cef1..b2b82c32b 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/taggers/decimal.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/decimal.py
@@ -47,12 +47,9 @@ def __init__(self, cardinal: GraphFst):
         integer_part = pynutil.insert("integer_part: \"") + cardinals + pynutil.insert("\"")
         fractional_part = pynutil.insert("fractional_part: \"") + decimal_part + pynutil.insert("\"")
 
-        graph_decimal_regular = (
-            integer_part + decimal_point + pynutil.insert(" ") + fractional_part
-        )  # Regular decimal like 1.5
-        graph_deicimal_larger = get_quantity(
-            graph_decimal_regular
-        )  # If decimal is used to express big numbers like  15000 -> "1.5만"
+        graph_decimal_regular = integer_part + decimal_point + pynutil.insert(" ") + fractional_part #Regular decimal like 1.5
+        graph_deicimal_larger = get_quantity(graph_decimal_regular) #If decimal is used to express big numbers like  15000 -> "1.5만"
+        
 
         self.decimal = graph_decimal_regular | graph_deicimal_larger
         self.just_decimal = cardinals + pynini.cross("점", ".") + decimal_part
diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/ordinal.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/ordinal.py
index e59301d0c..799ddce7e 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/taggers/ordinal.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/ordinal.py
@@ -20,7 +20,7 @@
 from nemo_text_processing.inverse_text_normalization.ko.utils import get_abs_path
 
 def get_counter(ordinal):
-    suffix = pynini.union("개", "명", "병", "마리", "대", "송이", "포기", "사람", "자루", "채", "켤레", "그루", "벌", "잔", "장", "권", "살")
+    suffix = pynini.string_file(get_abs_path("data/ordinals/counter_suffix.tsv"))
     numbers = ordinal
     res = numbers + pynutil.insert('" counter: "') + suffix 
 
@@ -48,6 +48,8 @@ def __init__(self, cardinal: GraphFst):
         graph_twenties_prefix = pynini.cross("스물", "2")  # First digit for twenties
         graph_thirties_prefix = pynini.cross("서른", "3")  # First digit for thirties
 
+        # Below exclude regular 1 in ordinal and replace with a special 1. Like "first" in English
+        # The special 1 is a unique ordinal case for Korean and does not repeat for 11, 21, 31
         graph_one = pynini.cross("한", "1")
         single_digits = pynini.project(graph_digit, "input").optimize()
         graph_one_acceptor = pynini.project(graph_one, "input").optimize()
@@ -55,8 +57,7 @@ def __init__(self, cardinal: GraphFst):
         graph_two_to_nine = two_to_nine @ graph_digit
         graph_first = pynini.cross("첫", "1")
         graph_single = graph_two_to_nine | graph_first
-        # Line 46-52 exclude regular 1 in ordinal and replace with a special 1. Like "first" in English
-        # The special 1 is a unique ordinal case for Korean and does not repeat for 11, 21, 31
+
 
         graph_ten = pynini.cross("열", "10")
         graph_tens = graph_ten | graph_tens_prefix + graph_digit
@@ -77,31 +78,35 @@ def __init__(self, cardinal: GraphFst):
 
         cardinal_30_to_39 = pynini.cross("삼십", "30") | (pynini.accep("삼십") + cardinal_digit)
 
+        # FST that include 1-39 in cardinal expression
         cardinal_below_40 = pynini.union(
             cardinal_digit, cardinal_10_to_19, cardinal_20_to_29, cardinal_30_to_39
         ).optimize()
-        # FST that include 1-39 in cardinal expression
 
-        cardinals_acceptor = pynini.project(cardinals, "input").optimize()  # Input includes all cardinal expressions
+        # Input includes all cardinal expressions
+        cardinals_acceptor = pynini.project(cardinals, "input").optimize()
+        # Input includes cardinal expression from 1 to 39
         cardinals_exception = pynini.project(
             cardinal_below_40, "input"
-        ).optimize()  # Input includes cardinal expression from 1 to 39
+        ).optimize()  
 
+        # All cardinal values except 1 to 39 cardinal values
         cardinal_over_40 = pynini.difference(
             cardinals_acceptor, cardinals_exception
-        ).optimize()  # All cardinal values except 1 to 39 cardinal values
+        ).optimize()  
         cardinal_ordinal_suffix = cardinal_over_40 @ cardinals
 
-        ordinal_final = pynini.union(ordinals, cardinal_ordinal_suffix)  # 1 to 39 in ordinal, everything else cardinal
+        # 1 to 39 in ordinal, everything else cardinal
+        ordinal_final = pynini.union(ordinals, cardinal_ordinal_suffix)  
 
         ordinal_graph = pynutil.insert("integer: \"") + ((ordinal_final + ordinals_suffix)) + pynutil.insert("\"")
 
         #Adding various counter suffix for ordinal
+        # For counting, Korean does not use the speical "첫" for 1. Instead the regular "한"
         counters = pynini.union(
             graph_digit, graph_tens, graph_twenties, graph_thirties
         ).optimize()
-        # For counting, Korean does not use the speical "첫" for 1. Instead the regular "한"
-
+        
         counter_final = (get_counter(counters) | get_counter(cardinal_ordinal_suffix))
 
         counter_graph = pynutil.insert("integer: \"") + counter_final + pynutil.insert("\"")

From 9fc941fe48e6216d6b9b561bc8efdf1cd1dc899f Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 21 Jul 2025 23:54:57 +0000
Subject: [PATCH 19/29] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../ko/taggers/decimal.py                     |  9 +++++---
 .../ko/taggers/ordinal.py                     | 21 +++++++------------
 2 files changed, 13 insertions(+), 17 deletions(-)

diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/decimal.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/decimal.py
index b2b82c32b..98ba5cef1 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/taggers/decimal.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/decimal.py
@@ -47,9 +47,12 @@ def __init__(self, cardinal: GraphFst):
         integer_part = pynutil.insert("integer_part: \"") + cardinals + pynutil.insert("\"")
         fractional_part = pynutil.insert("fractional_part: \"") + decimal_part + pynutil.insert("\"")
 
-        graph_decimal_regular = integer_part + decimal_point + pynutil.insert(" ") + fractional_part #Regular decimal like 1.5
-        graph_deicimal_larger = get_quantity(graph_decimal_regular) #If decimal is used to express big numbers like  15000 -> "1.5만"
-        
+        graph_decimal_regular = (
+            integer_part + decimal_point + pynutil.insert(" ") + fractional_part
+        )  # Regular decimal like 1.5
+        graph_deicimal_larger = get_quantity(
+            graph_decimal_regular
+        )  # If decimal is used to express big numbers like  15000 -> "1.5만"
 
         self.decimal = graph_decimal_regular | graph_deicimal_larger
         self.just_decimal = cardinals + pynini.cross("점", ".") + decimal_part
diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/ordinal.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/ordinal.py
index aeaa1d9ee..88c576f59 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/taggers/ordinal.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/ordinal.py
@@ -60,7 +60,6 @@ def __init__(self, cardinal: GraphFst):
         graph_first = pynini.cross("첫", "1")
         graph_single = graph_two_to_nine | graph_first
 
-
         graph_ten = pynini.cross("열", "10")
         graph_tens = graph_ten | graph_tens_prefix + graph_digit
 
@@ -88,28 +87,22 @@ def __init__(self, cardinal: GraphFst):
         # Input includes all cardinal expressions
         cardinals_acceptor = pynini.project(cardinals, "input").optimize()
         # Input includes cardinal expression from 1 to 39
-        cardinals_exception = pynini.project(
-            cardinal_below_40, "input"
-        ).optimize()  
+        cardinals_exception = pynini.project(cardinal_below_40, "input").optimize()
 
         # All cardinal values except 1 to 39 cardinal values
-        cardinal_over_40 = pynini.difference(
-            cardinals_acceptor, cardinals_exception
-        ).optimize()  
+        cardinal_over_40 = pynini.difference(cardinals_acceptor, cardinals_exception).optimize()
         cardinal_ordinal_suffix = cardinal_over_40 @ cardinals
 
         # 1 to 39 in ordinal, everything else cardinal
-        ordinal_final = pynini.union(ordinals, cardinal_ordinal_suffix)  
+        ordinal_final = pynini.union(ordinals, cardinal_ordinal_suffix)
 
         ordinal_graph = pynutil.insert("integer: \"") + ((ordinal_final + ordinals_suffix)) + pynutil.insert("\"")
 
-        #Adding various counter suffix for ordinal
+        # Adding various counter suffix for ordinal
         # For counting, Korean does not use the speical "첫" for 1. Instead the regular "한"
-        counters = pynini.union(
-            graph_digit, graph_tens, graph_twenties, graph_thirties
-        ).optimize()
-        
-        counter_final = (get_counter(counters) | get_counter(cardinal_ordinal_suffix))
+        counters = pynini.union(graph_digit, graph_tens, graph_twenties, graph_thirties).optimize()
+
+        counter_final = get_counter(counters) | get_counter(cardinal_ordinal_suffix)
 
         counter_graph = pynutil.insert("integer: \"") + counter_final + pynutil.insert("\"")
 

From 688c84f7a9c7f2ea5e8a445e133104b56821e7e1 Mon Sep 17 00:00:00 2001
From: hmlee245 <hmlee245@gmail.com>
Date: Thu, 31 Jul 2025 11:02:08 -0700
Subject: [PATCH 20/29] Adding Korean fraction ITN to the codes and raising a
 new PR

Signed-off-by: hmlee245 <hmlee245@gmail.com>
---
 .../ko/taggers/decimal.py                     |  6 +-
 .../ko/taggers/fraction.py                    | 95 +++++++++++++++++++
 .../ko/taggers/ordinal.py                     |  2 +-
 .../ko/taggers/tokenize_and_classify.py       |  6 +-
 .../ko/verbalizers/fraction.py                | 65 +++++++++++++
 .../ko/verbalizers/verbalize.py               |  6 +-
 .../test_cases_fraction.txt                   | 29 ++++++
 .../nemo_text_processing/ko/test_fraction.py  | 32 +++++++
 ..._sparrowhawk_inverse_text_normalization.sh |  5 +
 9 files changed, 240 insertions(+), 6 deletions(-)
 create mode 100644 nemo_text_processing/inverse_text_normalization/ko/taggers/fraction.py
 create mode 100644 nemo_text_processing/inverse_text_normalization/ko/verbalizers/fraction.py
 create mode 100644 tests/nemo_text_processing/ko/data_inverse_text_normalization/test_cases_fraction.txt
 create mode 100644 tests/nemo_text_processing/ko/test_fraction.py

diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/decimal.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/decimal.py
index b2b82c32b..40ca9a3e0 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/taggers/decimal.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/decimal.py
@@ -16,7 +16,7 @@
 import pynini
 from pynini.lib import pynutil
 
-from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst
+from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst, NEMO_SPACE
 from nemo_text_processing.inverse_text_normalization.ko.utils import get_abs_path
 
 
@@ -47,8 +47,8 @@ def __init__(self, cardinal: GraphFst):
         integer_part = pynutil.insert("integer_part: \"") + cardinals + pynutil.insert("\"")
         fractional_part = pynutil.insert("fractional_part: \"") + decimal_part + pynutil.insert("\"")
 
-        graph_decimal_regular = integer_part + decimal_point + pynutil.insert(" ") + fractional_part #Regular decimal like 1.5
-        graph_deicimal_larger = get_quantity(graph_decimal_regular) #If decimal is used to express big numbers like  15000 -> "1.5만"
+        graph_decimal_regular = integer_part + decimal_point + pynutil.insert(NEMO_SPACE) + fractional_part # Regular decimal like 1.5
+        graph_deicimal_larger = get_quantity(graph_decimal_regular) # If decimal is used to express big numbers like  15000 -> "1.5만"
         
 
         self.decimal = graph_decimal_regular | graph_deicimal_larger
diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/fraction.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/fraction.py
new file mode 100644
index 000000000..faf522133
--- /dev/null
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/fraction.py
@@ -0,0 +1,95 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import pynini
+from pynini.lib import pynutil
+
+from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst, NEMO_SPACE
+
+
+class FractionFst(GraphFst):
+    def __init__(self, cardinal: GraphFst, decimal: GraphFst):
+        """
+        Fitite state transducer for classifying fractions
+        e.g.,
+        fraction { denominator: "사" numerator: "삼" } -> 3/4
+        fraction { mixed number: "일" denominator: "사" numerator: "삼" } -> 1 3/4
+        fraction { denominator: "루트삼" numerator: "일" } -> 1/√3
+        fraction { denominator: "일점육오" numerator: "오십" } -> 50/1.65
+        fraction { denominator: "이루트육" numerator: "삼" } -> 3/2√6
+        """
+        super().__init__(name="fraction", kind="classify")
+
+        cardinal = cardinal.just_cardinals
+        decimal = decimal.just_decimal
+
+        # Expression between fraction. Means the dash "/"
+        fraction_word = pynutil.delete("분의")
+        # Expression combining mixed number and fraction. Optional to use
+        connecting_word = pynutil.delete("와") | pynutil.delete("과")
+        # Expression for "√"
+        root_word = pynini.accep("√") | pynini.cross("루트", "√")
+
+        graph_sign = (
+            pynutil.insert("negative: \"") + (pynini.accep("-") | pynini.cross("마이너스", "-")) + pynutil.insert("\"")
+        )
+
+        # graph_mixed_number considers all of possible combination number you can have in front of fraction
+        graph_mixed_number = (
+            pynutil.insert("integer_part: \"")
+            + (
+                decimal | (decimal + connecting_word) | (root_word + decimal) | (cardinal + root_word + decimal)
+                | (root_word + decimal + connecting_word) | (cardinal + root_word + decimal + connecting_word)
+                | cardinal | (cardinal + connecting_word) | (root_word + cardinal) | (cardinal + root_word + cardinal)
+                | (root_word + cardinal + connecting_word) | (cardinal + root_word + cardinal + connecting_word)
+                
+            )
+            + pynutil.insert("\"")
+        )
+
+        graph_denominator = (
+            pynutil.insert("denominator: \"")
+            + (
+                (decimal | (cardinal + root_word + decimal) | (root_word + decimal) 
+                | cardinal | (cardinal + root_word + cardinal) | (root_word + cardinal))
+                + pynini.closure(pynutil.delete(NEMO_SPACE), 0, 1)
+            )
+            + pynutil.insert("\"")
+        )
+
+        graph_numerator = (
+            pynutil.insert("numerator: \"")
+            + (
+                (decimal | (cardinal + root_word + decimal) | (root_word + decimal)
+                | cardinal | (cardinal + root_word + cardinal) | (root_word + cardinal))
+                + pynini.closure(pynutil.delete(NEMO_SPACE))
+            )
+            + pynutil.insert("\"")
+        )
+
+        graph_fraction_sign = (graph_sign + pynutil.insert(NEMO_SPACE) + graph_denominator + pynutil.insert(NEMO_SPACE) + fraction_word + graph_numerator)
+        graph_fraction_no_sign = (graph_denominator + pynutil.insert(NEMO_SPACE) + fraction_word + graph_numerator)
+        # Only fraction like "1/3" or "- 1/3"
+        graph_fractions = (graph_fraction_sign | graph_fraction_no_sign)
+        # Mixed number fraction like "2 1/3" or "-2 1/3"
+        graph_mixed_number_fraction = (
+            pynini.closure((graph_sign + pynutil.insert(" ")), 0, 1) + pynutil.add_weight(graph_mixed_number, 1.1)
+            + pynutil.insert(NEMO_SPACE) + graph_denominator + pynutil.insert(NEMO_SPACE) + fraction_word + graph_numerator
+        )
+
+        final_graph = graph_fractions | graph_mixed_number_fraction
+
+        final_graph = self.add_tokens(final_graph)
+        self.fst = final_graph.optimize()
diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/ordinal.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/ordinal.py
index aeaa1d9ee..0207cb38d 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/taggers/ordinal.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/ordinal.py
@@ -103,7 +103,7 @@ def __init__(self, cardinal: GraphFst):
 
         ordinal_graph = pynutil.insert("integer: \"") + ((ordinal_final + ordinals_suffix)) + pynutil.insert("\"")
 
-        #Adding various counter suffix for ordinal
+        # Adding various counter suffix for ordinal
         # For counting, Korean does not use the speical "첫" for 1. Instead the regular "한"
         counters = pynini.union(
             graph_digit, graph_tens, graph_twenties, graph_thirties
diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py
index 5753e4b66..10bd1791f 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py
@@ -1,5 +1,4 @@
 # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
-# Copyright 2015 and onwards Google, Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -23,6 +22,7 @@
 from nemo_text_processing.inverse_text_normalization.ko.taggers.cardinal import CardinalFst
 from nemo_text_processing.inverse_text_normalization.ko.taggers.decimal import DecimalFst
 from nemo_text_processing.inverse_text_normalization.ko.taggers.ordinal import OrdinalFst
+from nemo_text_processing.inverse_text_normalization.ko.taggers.fraction import FractionFst
 from nemo_text_processing.inverse_text_normalization.ko.taggers.word import WordFst
 
 
@@ -67,12 +67,16 @@ def __init__(
             decimal = DecimalFst(cardinal)
             decimal_graph = decimal.fst
 
+            fraction = FractionFst(cardinal, decimal)
+            fraction_graph = fraction.fst
+
             word_graph = WordFst().fst
 
             classify = (
                 pynutil.add_weight(cardinal_graph, 1.1)
                 | pynutil.add_weight(ordinal_graph, 1.1)
                 | pynutil.add_weight(decimal_graph, 1.1)
+                | pynutil.add_weight(fraction_graph, 1.0)
                 | pynutil.add_weight(word_graph, 100)
             )
 
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/fraction.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/fraction.py
new file mode 100644
index 000000000..4e04f65e3
--- /dev/null
+++ b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/fraction.py
@@ -0,0 +1,65 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import pynini
+from pynini.lib import pynutil
+
+from nemo_text_processing.inverse_text_normalization.ko.graph_utils import (
+    NEMO_NON_BREAKING_SPACE,
+    NEMO_NOT_QUOTE,
+    NEMO_SPACE,
+    GraphFst,
+)
+
+
+class FractionFst(GraphFst):
+    def __init__(self):
+        """
+        Fitite state transducer for classifying fractions
+        e.g.,
+        fraction { denominator: "사" numerator: "삼" } -> 3/4
+        fraction { integer_part: "일" denominator: "사" numerator: "삼" } -> 1 3/4
+        fraction { denominator: "루트삼" numerator: "일" } -> 1/√3
+        fraction { denominator: "일점육오" numerator: "오십" } -> 50/1.65
+        fraction { denominator: "이루트육" numerator: "삼" } -> 3/2√6
+        """
+        super().__init__(name="fraction", kind="verbalize")
+
+        sign_component = pynutil.delete("negative: \"") + pynini.closure("-", 1) + pynutil.delete("\"")
+
+        mixed_number_component = (
+            pynutil.delete("integer_part: \"") + pynini.closure(NEMO_NOT_QUOTE, 1) + pynutil.delete("\"")
+        )
+
+        denominator_component = (
+            pynutil.delete("denominator: \"") + pynini.closure(NEMO_NOT_QUOTE, 1) + pynutil.delete("\"")
+        )
+
+        numerator_component = (
+            pynutil.delete("numerator: \"") + pynini.closure(NEMO_NOT_QUOTE, 1) + pynutil.delete("\"")
+        )
+
+        regular_graph = (
+            pynini.closure((sign_component + pynutil.delete(NEMO_SPACE)), 0, 1)
+            + pynini.closure(mixed_number_component + pynutil.delete(NEMO_SPACE) + pynutil.insert(NEMO_NON_BREAKING_SPACE))
+            + numerator_component
+            + pynutil.delete(NEMO_SPACE)
+            + pynutil.insert("/")
+            + denominator_component
+        )
+
+        final_graph = self.delete_tokens(regular_graph)
+
+        self.fst = final_graph.optimize()
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py
index 5d5a01b3c..567cdc695 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py
@@ -17,6 +17,7 @@
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.cardinal import CardinalFst
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.decimal import DecimalFst
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.ordinal import OrdinalFst
+from nemo_text_processing.inverse_text_normalization.ko.verbalizers.fraction import FractionFst
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.word import WordFst
 
 
@@ -38,5 +39,8 @@ def __init__(self):
         decimal = DecimalFst()
         decimal_graph = decimal.fst
 
-        graph = cardinal_graph | ordinal_graph | decimal_graph
+        fraction = FractionFst()
+        fraction_graph = fraction.fst
+
+        graph = cardinal_graph | ordinal_graph | decimal_graph | fraction_graph
         self.fst = graph
diff --git a/tests/nemo_text_processing/ko/data_inverse_text_normalization/test_cases_fraction.txt b/tests/nemo_text_processing/ko/data_inverse_text_normalization/test_cases_fraction.txt
new file mode 100644
index 000000000..c5fda707d
--- /dev/null
+++ b/tests/nemo_text_processing/ko/data_inverse_text_normalization/test_cases_fraction.txt
@@ -0,0 +1,29 @@
+이분의일~1/2
+사분의일~1/4
+사분의삼~3/4
+오분의이~2/5
+십분의칠~7/10
+십이분의오~5/12
+이십삼분의십~10/23
+백분의일~1/100
+백분의구십구~99/100
+천분의백이십삼~123/1000
+일과이분의일~1 1/2
+삼과사분의일~3 1/4
+오와팔분의삼~5 3/8
+십과백분의칠십오~10 75/100
+마이너스사분의일~-1/4
+영점오분의일~1/0.5
+삼분의일점오~1.5/3
+루트사분의일~1/√4
+구분의루트십육~√16/9
+이와루트구분의일~2 1/√9
+마이너스오분의루트이십오~-√25/5
+칠분의육~6/7
+백오십분의이십~20/150
+사와오분의이~4 2/5
+이십과백분의일~20 1/100
+일점오분의영점이~0.2/1.5
+루트백분의십~10/√100
+십과루트팔십일분의삼~10 3/√81
+마이너스이와십분의일~-2 1/10
\ No newline at end of file
diff --git a/tests/nemo_text_processing/ko/test_fraction.py b/tests/nemo_text_processing/ko/test_fraction.py
new file mode 100644
index 000000000..bb3a889e3
--- /dev/null
+++ b/tests/nemo_text_processing/ko/test_fraction.py
@@ -0,0 +1,32 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pytest
+from parameterized import parameterized
+
+from nemo_text_processing.inverse_text_normalization.inverse_normalize import InverseNormalizer
+from nemo_text_processing.text_normalization.normalize import Normalizer
+
+from ..utils import CACHE_DIR, parse_test_case_file
+
+
+class TestOrdinal:
+    inverse_normalizer = InverseNormalizer(lang='ko', cache_dir=CACHE_DIR, overwrite_cache=False)
+
+    @parameterized.expand(parse_test_case_file('ko/data_inverse_text_normalization/test_cases_fraction.txt'))
+    @pytest.mark.run_only_on('CPU')
+    @pytest.mark.unit
+    def test_denorm(self, test_input, expected):
+        pred = self.inverse_normalizer.inverse_normalize(test_input, verbose=False)
+        assert pred == expected
diff --git a/tests/nemo_text_processing/ko/test_sparrowhawk_inverse_text_normalization.sh b/tests/nemo_text_processing/ko/test_sparrowhawk_inverse_text_normalization.sh
index 7927877b8..a63c08f84 100644
--- a/tests/nemo_text_processing/ko/test_sparrowhawk_inverse_text_normalization.sh
+++ b/tests/nemo_text_processing/ko/test_sparrowhawk_inverse_text_normalization.sh
@@ -37,6 +37,11 @@ testITNDecimal() {
   runtest $input
 }
 
+testITNFraction() {
+  input=$TEST_DIR/data_inverse_text_normalization/test_cases_fraction.txt
+  runtest $input
+}
+
 # Remove all command-line arguments
 shift $#
 

From 9b4c35f530a4176da93555e1bf86f6c1afd3f60c Mon Sep 17 00:00:00 2001
From: hmlee245 <hmlee245@gmail.com>
Date: Tue, 19 Aug 2025 02:56:35 -0700
Subject: [PATCH 21/29] Adding Korean ITN Time

Signed-off-by: hmlee245 <hmlee245@gmail.com>
---
 .../ko/data/time/time_hours.tsv               | 12 ++++
 .../ko/data/time/time_minutes_seconds.tsv     | 60 ++++++++++++++++
 .../ko/taggers/fraction.py                    |  2 +-
 .../ko/taggers/time.py                        | 63 +++++++++++++++++
 .../ko/taggers/tokenize_and_classify.py       |  5 ++
 .../ko/verbalizers/time.py                    | 70 +++++++++++++++++++
 .../ko/verbalizers/verbalize.py               |  6 +-
 .../test_cases_time.txt                       | 19 +++++
 ..._sparrowhawk_inverse_text_normalization.sh |  5 ++
 tests/nemo_text_processing/ko/test_time.py    | 32 +++++++++
 10 files changed, 272 insertions(+), 2 deletions(-)
 create mode 100644 nemo_text_processing/inverse_text_normalization/ko/data/time/time_hours.tsv
 create mode 100644 nemo_text_processing/inverse_text_normalization/ko/data/time/time_minutes_seconds.tsv
 create mode 100644 nemo_text_processing/inverse_text_normalization/ko/taggers/time.py
 create mode 100644 nemo_text_processing/inverse_text_normalization/ko/verbalizers/time.py
 create mode 100644 tests/nemo_text_processing/ko/data_inverse_text_normalization/test_cases_time.txt
 create mode 100644 tests/nemo_text_processing/ko/test_time.py

diff --git a/nemo_text_processing/inverse_text_normalization/ko/data/time/time_hours.tsv b/nemo_text_processing/inverse_text_normalization/ko/data/time/time_hours.tsv
new file mode 100644
index 000000000..24b980aa1
--- /dev/null
+++ b/nemo_text_processing/inverse_text_normalization/ko/data/time/time_hours.tsv
@@ -0,0 +1,12 @@
+한	1
+두	2
+세	3
+네	4
+다섯	5
+여섯	6
+일곱	7
+여덟	8
+아홉	9
+열	10
+열한	11
+열두	12
\ No newline at end of file
diff --git a/nemo_text_processing/inverse_text_normalization/ko/data/time/time_minutes_seconds.tsv b/nemo_text_processing/inverse_text_normalization/ko/data/time/time_minutes_seconds.tsv
new file mode 100644
index 000000000..efd37f03f
--- /dev/null
+++ b/nemo_text_processing/inverse_text_normalization/ko/data/time/time_minutes_seconds.tsv
@@ -0,0 +1,60 @@
+영	0
+일	1
+이	2
+삼	3
+사	4
+오	5
+육	6
+칠	7
+팔	8
+구	9
+십	10
+십일	11
+십이	12
+십삼	13
+십사	14
+십오	15
+십육	16
+십칠	17
+십팔	18
+십구	19
+이십	20
+이십일	21
+이십이	22
+이십삼	23
+이십사	24
+이십오	25
+이십육	26
+이십칠	27
+이십팔	28
+이십구	29
+삼십	30
+삼십일	31
+삼십이	32
+삼십삼	33
+삼십사	34
+삼십오	35
+삼십육	36
+삼십칠	37
+삼십팔	38
+삼십구	39
+사십	40
+사십일	41
+사십이	42
+사십삼	43
+사십사	44
+사십오	45
+사십육	46
+사십칠	47
+사십팔	48
+사십구	49
+오십	50
+오십일	51
+오십이	52
+오십삼	53
+오십사	54
+오십오	55
+오십육	56
+오십칠	57
+오십팔	58
+오십구	59
\ No newline at end of file
diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/fraction.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/fraction.py
index faf522133..d32af2604 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/taggers/fraction.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/fraction.py
@@ -25,7 +25,7 @@ def __init__(self, cardinal: GraphFst, decimal: GraphFst):
         Fitite state transducer for classifying fractions
         e.g.,
         fraction { denominator: "사" numerator: "삼" } -> 3/4
-        fraction { mixed number: "일" denominator: "사" numerator: "삼" } -> 1 3/4
+        fraction { integer_part: "일" denominator: "사" numerator: "삼" } -> 1 3/4
         fraction { denominator: "루트삼" numerator: "일" } -> 1/√3
         fraction { denominator: "일점육오" numerator: "오십" } -> 50/1.65
         fraction { denominator: "이루트육" numerator: "삼" } -> 3/2√6
diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/time.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/time.py
new file mode 100644
index 000000000..5f2000d89
--- /dev/null
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/time.py
@@ -0,0 +1,63 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import pynini
+from pynini.lib import pynutil
+
+from nemo_text_processing.inverse_text_normalization.ko.utils import get_abs_path
+from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst, delete_space
+
+class TimeFst(GraphFst):
+    """
+    Finite state transducer for classifying time
+        e.g. 열두시 삼십분 -> time { hours: "12" minutes: "30" }
+        e.g. 12분전 -> time { minutes: "12" suffix: "전" }
+        e.g. 새벽 두시 -> time { hours: "2" suffix: "새벽" }
+        e.g. 두시반 -> time { hours: "2" minutes: "30" }
+        e.g. 오후 두시반 -> time { prefix: "오후" hours: "2" minutes: "30" }
+    """
+    def __init__(self):
+        super().__init__(name="time", kind="classify")
+
+        # 1-12 for hours
+        graph_hours = pynini.string_file(get_abs_path("data/time/time_hours.tsv"))
+        # 0-59 for minutes, seconds
+        graph_minutes = pynini.string_file(get_abs_path("data/time/time_minutes_seconds.tsv"))
+        # Special expression for 30 minute
+        graph_half = pynini.cross("반", "30")
+
+        hour_component = (pynutil.insert("hours: \"") + (graph_hours + pynutil.delete("시")) + pynutil.insert("\""))
+
+        minute_component = (pynutil.insert("minutes: \"") + ((graph_minutes + pynutil.delete("분")) | graph_half) + pynutil.insert("\""))
+
+        second_component = (pynutil.insert("seconds: \"") + (graph_minutes + pynutil.delete("초")) + pynutil.insert("\""))
+
+        hour = pynini.closure(hour_component, 0, 1)
+        minute = pynini.closure(delete_space + minute_component, 0, 1)
+        second = pynini.closure(delete_space + second_component , 0, 1)
+
+        graph_regular = hour + minute + second
+        
+        # 오전 = AM, 오후 = PM
+        prefix_words = pynini.accep("오전") | pynini.accep("오후")
+        prefix_tag = pynutil.insert("prefix: \"") + prefix_words + pynutil.insert("\"")
+
+        # 전 = before, 후 = after
+        suffix_words = pynini.accep("전") | pynini.accep("후")
+        suffix_tag = pynutil.insert("suffix: \"") + suffix_words + pynutil.insert("\"")
+
+        final_graph = pynini.closure(delete_space + prefix_tag, 0, 1) + graph_regular + pynini.closure(delete_space + suffix_tag, 0, 1)
+
+        self.fst = self.add_tokens(final_graph).optimize()
\ No newline at end of file
diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py
index 10bd1791f..98ce064f9 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py
@@ -23,6 +23,7 @@
 from nemo_text_processing.inverse_text_normalization.ko.taggers.decimal import DecimalFst
 from nemo_text_processing.inverse_text_normalization.ko.taggers.ordinal import OrdinalFst
 from nemo_text_processing.inverse_text_normalization.ko.taggers.fraction import FractionFst
+from nemo_text_processing.inverse_text_normalization.ko.taggers.time import TimeFst
 from nemo_text_processing.inverse_text_normalization.ko.taggers.word import WordFst
 
 
@@ -70,6 +71,9 @@ def __init__(
             fraction = FractionFst(cardinal, decimal)
             fraction_graph = fraction.fst
 
+            time = TimeFst()
+            time_graph = time.fst
+
             word_graph = WordFst().fst
 
             classify = (
@@ -77,6 +81,7 @@ def __init__(
                 | pynutil.add_weight(ordinal_graph, 1.1)
                 | pynutil.add_weight(decimal_graph, 1.1)
                 | pynutil.add_weight(fraction_graph, 1.0)
+                | pynutil.add_weight(time_graph, 1.0)
                 | pynutil.add_weight(word_graph, 100)
             )
 
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/time.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/time.py
new file mode 100644
index 000000000..11e174b82
--- /dev/null
+++ b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/time.py
@@ -0,0 +1,70 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pynini
+from pynini.lib import pynutil
+
+from nemo_text_processing.inverse_text_normalization.ko.graph_utils import (
+    NEMO_DIGIT,
+    GraphFst,
+    delete_space,
+    NEMO_NOT_QUOTE
+)
+
+
+class TimeFst(GraphFst):
+    """
+    Finite state transducer for classifying time
+        e.g. 열두시 삼십분 -> time { hours: "12" minutes: "30" }
+        e.g. 12분전 -> time { minutes: "12" suffix: "전" }
+        e.g. 새벽 두시 -> time { hours: "2" suffix: "새벽" }
+        e.g. 두시반 -> time { hours: "2" minutes: "30" }
+        e.g. 오후 두시반 -> time { prefix: "오후" hours: "2" minutes: "30" }
+    """
+    def __init__(self):
+        super().__init__(name="time", kind="verbalize")
+
+        hours_component = pynutil.delete("hours: \"") + pynini.closure(NEMO_NOT_QUOTE, 1) + pynutil.delete("\"")
+        minutes_component = pynutil.delete("minutes: \"") + pynini.closure(NEMO_NOT_QUOTE, 1) + pynutil.delete("\"")
+        seconds_component = pynutil.delete("seconds: \"") + pynini.closure(NEMO_NOT_QUOTE, 1) + pynutil.delete("\"")
+        suffix_component = pynutil.delete("suffix: \"") + pynini.closure(NEMO_NOT_QUOTE, 1) + pynutil.delete("\"")
+        prefix_component = pynutil.delete("prefix: \"") + pynini.closure(NEMO_NOT_QUOTE, 1) + pynutil.delete("\"") 
+
+        # Add a leading zero to single-digit minutes/seconds
+        single_digit = NEMO_DIGIT
+        leading_zero = pynutil.insert("0") + single_digit
+        add_leading_zero = pynini.union(single_digit @ leading_zero, pynini.closure(NEMO_DIGIT, 2))
+        
+        minutes = minutes_component @ add_leading_zero
+        seconds = seconds_component @ add_leading_zero
+        
+        # Defining all the possible combinations
+        path_h = hours_component + pynutil.insert(":00")
+        path_m = minutes
+        path_s = seconds
+
+        path_hm = hours_component + delete_space + pynutil.insert(":") + minutes
+        path_hs = hours_component + delete_space + pynutil.insert(":") + pynutil.insert("00") + delete_space + pynutil.insert(":") + seconds
+        path_ms = minutes + delete_space + pynutil.insert(":") + seconds
+
+        path_hms = hours_component + delete_space + pynutil.insert(":") + minutes + delete_space + pynutil.insert(":") + seconds
+
+        time_graph = pynini.union(path_h, path_m, path_s, path_hm, path_hs, path_ms, path_hms)
+
+        # Adding prefix and suffix space
+        optional_prefix_out = pynini.closure(delete_space + prefix_component, 0, 1)
+        optional_suffix_out = pynini.closure(delete_space + pynutil.insert(" ") + suffix_component, 0, 1)
+        
+        final_graph = optional_prefix_out + time_graph + optional_suffix_out
+        self.fst = self.delete_tokens(delete_space + final_graph).optimize()
\ No newline at end of file
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py
index 567cdc695..917b519a0 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py
@@ -18,6 +18,7 @@
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.decimal import DecimalFst
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.ordinal import OrdinalFst
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.fraction import FractionFst
+from nemo_text_processing.inverse_text_normalization.ko.verbalizers.time import TimeFst
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.word import WordFst
 
 
@@ -42,5 +43,8 @@ def __init__(self):
         fraction = FractionFst()
         fraction_graph = fraction.fst
 
-        graph = cardinal_graph | ordinal_graph | decimal_graph | fraction_graph
+        time = TimeFst()
+        time_graph = time.fst
+
+        graph = cardinal_graph | ordinal_graph | decimal_graph | fraction_graph | time_graph 
         self.fst = graph
diff --git a/tests/nemo_text_processing/ko/data_inverse_text_normalization/test_cases_time.txt b/tests/nemo_text_processing/ko/data_inverse_text_normalization/test_cases_time.txt
new file mode 100644
index 000000000..fe0615dec
--- /dev/null
+++ b/tests/nemo_text_processing/ko/data_inverse_text_normalization/test_cases_time.txt
@@ -0,0 +1,19 @@
+두시~2:00
+열두시~12:00
+삼십분~30
+오초~05
+두시 삼십분~2:30
+세시 삼분~3:03
+두시 반~2:30
+열두시 반~12:30
+삼십분 오초~30:05
+삼분 오초~03:05
+두시 오초~2:00:05
+두시 삼십분 오초~2:30:05
+오전두시~오전2:00
+오후네시반~오후4:30
+두시전~2:00 전
+두시십분후~2:10 후
+한시 십오분 삼십초~1:15:30
+네시 이분~4:02
+열한시 오십구분~11:59
\ No newline at end of file
diff --git a/tests/nemo_text_processing/ko/test_sparrowhawk_inverse_text_normalization.sh b/tests/nemo_text_processing/ko/test_sparrowhawk_inverse_text_normalization.sh
index a63c08f84..a08d792e7 100644
--- a/tests/nemo_text_processing/ko/test_sparrowhawk_inverse_text_normalization.sh
+++ b/tests/nemo_text_processing/ko/test_sparrowhawk_inverse_text_normalization.sh
@@ -42,6 +42,11 @@ testITNFraction() {
   runtest $input
 }
 
+testITNTime() {
+  input=$TEST_DIR/data_inverse_text_normalization/test_cases_time.txt
+  runtest $input
+}
+
 # Remove all command-line arguments
 shift $#
 
diff --git a/tests/nemo_text_processing/ko/test_time.py b/tests/nemo_text_processing/ko/test_time.py
new file mode 100644
index 000000000..c5e0f71d3
--- /dev/null
+++ b/tests/nemo_text_processing/ko/test_time.py
@@ -0,0 +1,32 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pytest
+from parameterized import parameterized
+
+from nemo_text_processing.inverse_text_normalization.inverse_normalize import InverseNormalizer
+from nemo_text_processing.text_normalization.normalize import Normalizer
+
+from ..utils import CACHE_DIR, parse_test_case_file
+
+
+class TestOrdinal:
+    inverse_normalizer = InverseNormalizer(lang='ko', cache_dir=CACHE_DIR, overwrite_cache=False)
+
+    @parameterized.expand(parse_test_case_file('ko/data_inverse_text_normalization/test_cases_time.txt'))
+    @pytest.mark.run_only_on('CPU')
+    @pytest.mark.unit
+    def test_denorm(self, test_input, expected):
+        pred = self.inverse_normalizer.inverse_normalize(test_input, verbose=False)
+        assert pred == expected

From b7852af3dd0dad02e4447ac887a19d1f745a8ff4 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 19 Aug 2025 10:07:25 +0000
Subject: [PATCH 22/29] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../ko/taggers/decimal.py                     | 10 +++-
 .../ko/taggers/fraction.py                    | 60 ++++++++++++++-----
 .../ko/taggers/time.py                        | 28 ++++++---
 .../ko/taggers/tokenize_and_classify.py       |  2 +-
 .../ko/verbalizers/fraction.py                |  4 +-
 .../ko/verbalizers/time.py                    | 33 +++++++---
 .../ko/verbalizers/verbalize.py               |  4 +-
 7 files changed, 104 insertions(+), 37 deletions(-)

diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/decimal.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/decimal.py
index dd6f05bb3..ecb92df1d 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/taggers/decimal.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/decimal.py
@@ -16,7 +16,7 @@
 import pynini
 from pynini.lib import pynutil
 
-from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst, NEMO_SPACE
+from nemo_text_processing.inverse_text_normalization.ko.graph_utils import NEMO_SPACE, GraphFst
 from nemo_text_processing.inverse_text_normalization.ko.utils import get_abs_path
 
 
@@ -47,8 +47,12 @@ def __init__(self, cardinal: GraphFst):
         integer_part = pynutil.insert("integer_part: \"") + cardinals + pynutil.insert("\"")
         fractional_part = pynutil.insert("fractional_part: \"") + decimal_part + pynutil.insert("\"")
 
-        graph_decimal_regular = integer_part + decimal_point + pynutil.insert(NEMO_SPACE) + fractional_part # Regular decimal like 1.5
-        graph_deicimal_larger = get_quantity(graph_decimal_regular) # If decimal is used to express big numbers like  15000 -> "1.5만"
+        graph_decimal_regular = (
+            integer_part + decimal_point + pynutil.insert(NEMO_SPACE) + fractional_part
+        )  # Regular decimal like 1.5
+        graph_deicimal_larger = get_quantity(
+            graph_decimal_regular
+        )  # If decimal is used to express big numbers like  15000 -> "1.5만"
 
         self.decimal = graph_decimal_regular | graph_deicimal_larger
         self.just_decimal = cardinals + pynini.cross("점", ".") + decimal_part
diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/fraction.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/fraction.py
index d0250d2e9..f7a11e046 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/taggers/fraction.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/fraction.py
@@ -16,7 +16,7 @@
 import pynini
 from pynini.lib import pynutil
 
-from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst, NEMO_SPACE
+from nemo_text_processing.inverse_text_normalization.ko.graph_utils import NEMO_SPACE, GraphFst
 
 
 class FractionFst(GraphFst):
@@ -50,10 +50,18 @@ def __init__(self, cardinal: GraphFst, decimal: GraphFst):
         graph_mixed_number = (
             pynutil.insert("integer_part: \"")
             + (
-                decimal | (decimal + connecting_word) | (root_word + decimal) | (cardinal + root_word + decimal)
-                | (root_word + decimal + connecting_word) | (cardinal + root_word + decimal + connecting_word)
-                | cardinal | (cardinal + connecting_word) | (root_word + cardinal) | (cardinal + root_word + cardinal)
-                | (root_word + cardinal + connecting_word) | (cardinal + root_word + cardinal + connecting_word)             
+                decimal
+                | (decimal + connecting_word)
+                | (root_word + decimal)
+                | (cardinal + root_word + decimal)
+                | (root_word + decimal + connecting_word)
+                | (cardinal + root_word + decimal + connecting_word)
+                | cardinal
+                | (cardinal + connecting_word)
+                | (root_word + cardinal)
+                | (cardinal + root_word + cardinal)
+                | (root_word + cardinal + connecting_word)
+                | (cardinal + root_word + cardinal + connecting_word)
             )
             + pynutil.insert("\"")
         )
@@ -61,8 +69,14 @@ def __init__(self, cardinal: GraphFst, decimal: GraphFst):
         graph_denominator = (
             pynutil.insert("denominator: \"")
             + (
-                (decimal | (cardinal + root_word + decimal) | (root_word + decimal) 
-                | cardinal | (cardinal + root_word + cardinal) | (root_word + cardinal))
+                (
+                    decimal
+                    | (cardinal + root_word + decimal)
+                    | (root_word + decimal)
+                    | cardinal
+                    | (cardinal + root_word + cardinal)
+                    | (root_word + cardinal)
+                )
                 + pynini.closure(pynutil.delete(NEMO_SPACE), 0, 1)
             )
             + pynutil.insert("\"")
@@ -71,21 +85,39 @@ def __init__(self, cardinal: GraphFst, decimal: GraphFst):
         graph_numerator = (
             pynutil.insert("numerator: \"")
             + (
-                (decimal | (cardinal + root_word + decimal) | (root_word + decimal)
-                | cardinal | (cardinal + root_word + cardinal) | (root_word + cardinal))
+                (
+                    decimal
+                    | (cardinal + root_word + decimal)
+                    | (root_word + decimal)
+                    | cardinal
+                    | (cardinal + root_word + cardinal)
+                    | (root_word + cardinal)
+                )
                 + pynini.closure(pynutil.delete(NEMO_SPACE))
             )
             + pynutil.insert("\"")
         )
 
-        graph_fraction_sign = (graph_sign + pynutil.insert(NEMO_SPACE) + graph_denominator + pynutil.insert(NEMO_SPACE) + fraction_word + graph_numerator)
-        graph_fraction_no_sign = (graph_denominator + pynutil.insert(NEMO_SPACE) + fraction_word + graph_numerator)
+        graph_fraction_sign = (
+            graph_sign
+            + pynutil.insert(NEMO_SPACE)
+            + graph_denominator
+            + pynutil.insert(NEMO_SPACE)
+            + fraction_word
+            + graph_numerator
+        )
+        graph_fraction_no_sign = graph_denominator + pynutil.insert(NEMO_SPACE) + fraction_word + graph_numerator
         # Only fraction like "1/3" or "- 1/3"
-        graph_fractions = (graph_fraction_sign | graph_fraction_no_sign)
+        graph_fractions = graph_fraction_sign | graph_fraction_no_sign
         # Mixed number fraction like "2 1/3" or "-2 1/3"
         graph_mixed_number_fraction = (
-            pynini.closure((graph_sign + pynutil.insert(" ")), 0, 1) + pynutil.add_weight(graph_mixed_number, 1.1)
-            + pynutil.insert(NEMO_SPACE) + graph_denominator + pynutil.insert(NEMO_SPACE) + fraction_word + graph_numerator
+            pynini.closure((graph_sign + pynutil.insert(" ")), 0, 1)
+            + pynutil.add_weight(graph_mixed_number, 1.1)
+            + pynutil.insert(NEMO_SPACE)
+            + graph_denominator
+            + pynutil.insert(NEMO_SPACE)
+            + fraction_word
+            + graph_numerator
         )
 
         final_graph = graph_fractions | graph_mixed_number_fraction
diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/time.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/time.py
index 5f2000d89..b9ca9dbfe 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/taggers/time.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/time.py
@@ -16,8 +16,9 @@
 import pynini
 from pynini.lib import pynutil
 
-from nemo_text_processing.inverse_text_normalization.ko.utils import get_abs_path
 from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst, delete_space
+from nemo_text_processing.inverse_text_normalization.ko.utils import get_abs_path
+
 
 class TimeFst(GraphFst):
     """
@@ -28,6 +29,7 @@ class TimeFst(GraphFst):
         e.g. 두시반 -> time { hours: "2" minutes: "30" }
         e.g. 오후 두시반 -> time { prefix: "오후" hours: "2" minutes: "30" }
     """
+
     def __init__(self):
         super().__init__(name="time", kind="classify")
 
@@ -38,18 +40,24 @@ def __init__(self):
         # Special expression for 30 minute
         graph_half = pynini.cross("반", "30")
 
-        hour_component = (pynutil.insert("hours: \"") + (graph_hours + pynutil.delete("시")) + pynutil.insert("\""))
+        hour_component = pynutil.insert("hours: \"") + (graph_hours + pynutil.delete("시")) + pynutil.insert("\"")
 
-        minute_component = (pynutil.insert("minutes: \"") + ((graph_minutes + pynutil.delete("분")) | graph_half) + pynutil.insert("\""))
+        minute_component = (
+            pynutil.insert("minutes: \"")
+            + ((graph_minutes + pynutil.delete("분")) | graph_half)
+            + pynutil.insert("\"")
+        )
 
-        second_component = (pynutil.insert("seconds: \"") + (graph_minutes + pynutil.delete("초")) + pynutil.insert("\""))
+        second_component = (
+            pynutil.insert("seconds: \"") + (graph_minutes + pynutil.delete("초")) + pynutil.insert("\"")
+        )
 
         hour = pynini.closure(hour_component, 0, 1)
         minute = pynini.closure(delete_space + minute_component, 0, 1)
-        second = pynini.closure(delete_space + second_component , 0, 1)
+        second = pynini.closure(delete_space + second_component, 0, 1)
 
         graph_regular = hour + minute + second
-        
+
         # 오전 = AM, 오후 = PM
         prefix_words = pynini.accep("오전") | pynini.accep("오후")
         prefix_tag = pynutil.insert("prefix: \"") + prefix_words + pynutil.insert("\"")
@@ -58,6 +66,10 @@ def __init__(self):
         suffix_words = pynini.accep("전") | pynini.accep("후")
         suffix_tag = pynutil.insert("suffix: \"") + suffix_words + pynutil.insert("\"")
 
-        final_graph = pynini.closure(delete_space + prefix_tag, 0, 1) + graph_regular + pynini.closure(delete_space + suffix_tag, 0, 1)
+        final_graph = (
+            pynini.closure(delete_space + prefix_tag, 0, 1)
+            + graph_regular
+            + pynini.closure(delete_space + suffix_tag, 0, 1)
+        )
 
-        self.fst = self.add_tokens(final_graph).optimize()
\ No newline at end of file
+        self.fst = self.add_tokens(final_graph).optimize()
diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py
index 98ce064f9..5325be602 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py
@@ -21,8 +21,8 @@
 from nemo_text_processing.inverse_text_normalization.ko.graph_utils import INPUT_LOWER_CASED, GraphFst, generator_main
 from nemo_text_processing.inverse_text_normalization.ko.taggers.cardinal import CardinalFst
 from nemo_text_processing.inverse_text_normalization.ko.taggers.decimal import DecimalFst
-from nemo_text_processing.inverse_text_normalization.ko.taggers.ordinal import OrdinalFst
 from nemo_text_processing.inverse_text_normalization.ko.taggers.fraction import FractionFst
+from nemo_text_processing.inverse_text_normalization.ko.taggers.ordinal import OrdinalFst
 from nemo_text_processing.inverse_text_normalization.ko.taggers.time import TimeFst
 from nemo_text_processing.inverse_text_normalization.ko.taggers.word import WordFst
 
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/fraction.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/fraction.py
index 4e04f65e3..7b0845dc1 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/fraction.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/fraction.py
@@ -53,7 +53,9 @@ def __init__(self):
 
         regular_graph = (
             pynini.closure((sign_component + pynutil.delete(NEMO_SPACE)), 0, 1)
-            + pynini.closure(mixed_number_component + pynutil.delete(NEMO_SPACE) + pynutil.insert(NEMO_NON_BREAKING_SPACE))
+            + pynini.closure(
+                mixed_number_component + pynutil.delete(NEMO_SPACE) + pynutil.insert(NEMO_NON_BREAKING_SPACE)
+            )
             + numerator_component
             + pynutil.delete(NEMO_SPACE)
             + pynutil.insert("/")
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/time.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/time.py
index 11e174b82..a1d264caa 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/time.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/time.py
@@ -17,9 +17,9 @@
 
 from nemo_text_processing.inverse_text_normalization.ko.graph_utils import (
     NEMO_DIGIT,
+    NEMO_NOT_QUOTE,
     GraphFst,
     delete_space,
-    NEMO_NOT_QUOTE
 )
 
 
@@ -32,6 +32,7 @@ class TimeFst(GraphFst):
         e.g. 두시반 -> time { hours: "2" minutes: "30" }
         e.g. 오후 두시반 -> time { prefix: "오후" hours: "2" minutes: "30" }
     """
+
     def __init__(self):
         super().__init__(name="time", kind="verbalize")
 
@@ -39,32 +40,48 @@ def __init__(self):
         minutes_component = pynutil.delete("minutes: \"") + pynini.closure(NEMO_NOT_QUOTE, 1) + pynutil.delete("\"")
         seconds_component = pynutil.delete("seconds: \"") + pynini.closure(NEMO_NOT_QUOTE, 1) + pynutil.delete("\"")
         suffix_component = pynutil.delete("suffix: \"") + pynini.closure(NEMO_NOT_QUOTE, 1) + pynutil.delete("\"")
-        prefix_component = pynutil.delete("prefix: \"") + pynini.closure(NEMO_NOT_QUOTE, 1) + pynutil.delete("\"") 
+        prefix_component = pynutil.delete("prefix: \"") + pynini.closure(NEMO_NOT_QUOTE, 1) + pynutil.delete("\"")
 
         # Add a leading zero to single-digit minutes/seconds
         single_digit = NEMO_DIGIT
         leading_zero = pynutil.insert("0") + single_digit
         add_leading_zero = pynini.union(single_digit @ leading_zero, pynini.closure(NEMO_DIGIT, 2))
-        
+
         minutes = minutes_component @ add_leading_zero
         seconds = seconds_component @ add_leading_zero
-        
+
         # Defining all the possible combinations
         path_h = hours_component + pynutil.insert(":00")
         path_m = minutes
         path_s = seconds
 
         path_hm = hours_component + delete_space + pynutil.insert(":") + minutes
-        path_hs = hours_component + delete_space + pynutil.insert(":") + pynutil.insert("00") + delete_space + pynutil.insert(":") + seconds
+        path_hs = (
+            hours_component
+            + delete_space
+            + pynutil.insert(":")
+            + pynutil.insert("00")
+            + delete_space
+            + pynutil.insert(":")
+            + seconds
+        )
         path_ms = minutes + delete_space + pynutil.insert(":") + seconds
 
-        path_hms = hours_component + delete_space + pynutil.insert(":") + minutes + delete_space + pynutil.insert(":") + seconds
+        path_hms = (
+            hours_component
+            + delete_space
+            + pynutil.insert(":")
+            + minutes
+            + delete_space
+            + pynutil.insert(":")
+            + seconds
+        )
 
         time_graph = pynini.union(path_h, path_m, path_s, path_hm, path_hs, path_ms, path_hms)
 
         # Adding prefix and suffix space
         optional_prefix_out = pynini.closure(delete_space + prefix_component, 0, 1)
         optional_suffix_out = pynini.closure(delete_space + pynutil.insert(" ") + suffix_component, 0, 1)
-        
+
         final_graph = optional_prefix_out + time_graph + optional_suffix_out
-        self.fst = self.delete_tokens(delete_space + final_graph).optimize()
\ No newline at end of file
+        self.fst = self.delete_tokens(delete_space + final_graph).optimize()
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py
index 917b519a0..f227bcf7c 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py
@@ -16,8 +16,8 @@
 from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.cardinal import CardinalFst
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.decimal import DecimalFst
-from nemo_text_processing.inverse_text_normalization.ko.verbalizers.ordinal import OrdinalFst
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.fraction import FractionFst
+from nemo_text_processing.inverse_text_normalization.ko.verbalizers.ordinal import OrdinalFst
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.time import TimeFst
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.word import WordFst
 
@@ -46,5 +46,5 @@ def __init__(self):
         time = TimeFst()
         time_graph = time.fst
 
-        graph = cardinal_graph | ordinal_graph | decimal_graph | fraction_graph | time_graph 
+        graph = cardinal_graph | ordinal_graph | decimal_graph | fraction_graph | time_graph
         self.fst = graph

From 736ca34b50160da2af9ed67d6216ab1cf5b5a690 Mon Sep 17 00:00:00 2001
From: Hyunmin Lee <hyunminl@hyunminl-mlt.client.nvidia.com>
Date: Thu, 28 Aug 2025 16:46:28 -0700
Subject: [PATCH 23/29] Changes to time ITN and draft for date ITN

Signed-off-by: Hyunmin Lee <hyunminl@hyunminl-mlt.client.nvidia.com>
---
 .../ko/data/months.tsv                        | 12 +++
 .../ko/data/time/time_minutes_seconds.tsv     | 60 --------------
 .../ko/taggers/date.py                        | 78 +++++++++++++++++++
 .../ko/taggers/time.py                        | 47 +++++++++--
 .../ko/taggers/tokenize_and_classify.py       |  5 ++
 .../ko/verbalizers/date.py                    | 50 ++++++++++++
 .../ko/verbalizers/verbalize.py               |  8 +-
 .../test_cases_date.txt                       | 16 ++++
 .../test_cases_time.txt                       |  4 -
 tests/nemo_text_processing/ko/test_date.py    | 32 ++++++++
 ..._sparrowhawk_inverse_text_normalization.sh |  5 ++
 11 files changed, 244 insertions(+), 73 deletions(-)
 create mode 100644 nemo_text_processing/inverse_text_normalization/ko/data/months.tsv
 delete mode 100644 nemo_text_processing/inverse_text_normalization/ko/data/time/time_minutes_seconds.tsv
 create mode 100644 nemo_text_processing/inverse_text_normalization/ko/taggers/date.py
 create mode 100644 nemo_text_processing/inverse_text_normalization/ko/verbalizers/date.py
 create mode 100644 tests/nemo_text_processing/ko/data_inverse_text_normalization/test_cases_date.txt
 create mode 100644 tests/nemo_text_processing/ko/test_date.py

diff --git a/nemo_text_processing/inverse_text_normalization/ko/data/months.tsv b/nemo_text_processing/inverse_text_normalization/ko/data/months.tsv
new file mode 100644
index 000000000..52039ef35
--- /dev/null
+++ b/nemo_text_processing/inverse_text_normalization/ko/data/months.tsv
@@ -0,0 +1,12 @@
+일	1
+이	2
+삼	3
+사	4
+오	5
+유	6
+칠	7
+팔	8
+구	9
+시	10
+십일	11
+십이	12
\ No newline at end of file
diff --git a/nemo_text_processing/inverse_text_normalization/ko/data/time/time_minutes_seconds.tsv b/nemo_text_processing/inverse_text_normalization/ko/data/time/time_minutes_seconds.tsv
deleted file mode 100644
index efd37f03f..000000000
--- a/nemo_text_processing/inverse_text_normalization/ko/data/time/time_minutes_seconds.tsv
+++ /dev/null
@@ -1,60 +0,0 @@
-영	0
-일	1
-이	2
-삼	3
-사	4
-오	5
-육	6
-칠	7
-팔	8
-구	9
-십	10
-십일	11
-십이	12
-십삼	13
-십사	14
-십오	15
-십육	16
-십칠	17
-십팔	18
-십구	19
-이십	20
-이십일	21
-이십이	22
-이십삼	23
-이십사	24
-이십오	25
-이십육	26
-이십칠	27
-이십팔	28
-이십구	29
-삼십	30
-삼십일	31
-삼십이	32
-삼십삼	33
-삼십사	34
-삼십오	35
-삼십육	36
-삼십칠	37
-삼십팔	38
-삼십구	39
-사십	40
-사십일	41
-사십이	42
-사십삼	43
-사십사	44
-사십오	45
-사십육	46
-사십칠	47
-사십팔	48
-사십구	49
-오십	50
-오십일	51
-오십이	52
-오십삼	53
-오십사	54
-오십오	55
-오십육	56
-오십칠	57
-오십팔	58
-오십구	59
\ No newline at end of file
diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/date.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/date.py
new file mode 100644
index 000000000..7a58f518a
--- /dev/null
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/date.py
@@ -0,0 +1,78 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import pynini
+from pynini.lib import pynutil
+
+from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst, NEMO_SPACE
+from nemo_text_processing.inverse_text_normalization.ko.utils import get_abs_path
+
+
+class DateFst(GraphFst):
+    """
+    Finite state transducer for classifying date,
+        e.g. 이천십이년 일월 오일 -> date { year: "2012" month: "1" day: "5"  }
+        e.g. 오월 -> date { month: "5" }
+        e.g. 칠일 -> date { day: "7" }
+    """
+
+    def __init__(self, cardinal: GraphFst):
+        super().__init__(name="date", kind="classify")
+
+        cardinal = cardinal.just_cardinals
+        month = pynini.string_file(get_abs_path("data/months.tsv"))
+
+        spacing = pynini.closure(pynini.accep(NEMO_SPACE), 0, 1)
+
+        year_suffix = pynini.cross("년", "")
+        month_suffix = pynini.cross("월", "")
+        day_suffix = pynini.cross("일", "")
+
+        year_component = (
+            pynutil.insert("year: \"")
+            + cardinal
+            + pynini.closure(year_suffix, 0, 1)
+            + pynutil.insert("\"")
+        )
+
+        month_component = (
+            pynutil.insert("month: \"")
+            + spacing
+            + month
+            + pynini.closure(month_suffix, 0, 1)
+            + pynutil.insert("\"")
+        )
+
+        day_component = (
+            pynutil.insert("day: \"")
+            + spacing
+            + cardinal
+            + day_suffix
+            + spacing
+            + pynutil.insert("\"")
+        )
+
+        graph_component = year_component | month_component | day_component
+        graph_date = (
+            pynini.closure(year_component, 0, 1)
+            + pynini.closure((pynutil.insert(NEMO_SPACE)) + month_component, 0, 1)
+            + pynini.closure((pynutil.insert(NEMO_SPACE)) + day_component, 0, 1)
+        )
+
+        final_graph = graph_component | graph_date
+
+        final_graph = self.add_tokens(final_graph)
+        self.fst = final_graph.optimize()
+
diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/time.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/time.py
index b9ca9dbfe..63c85487f 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/taggers/time.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/time.py
@@ -16,7 +16,7 @@
 import pynini
 from pynini.lib import pynutil
 
-from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst, delete_space
+from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst, delete_space, NEMO_SPACE
 from nemo_text_processing.inverse_text_normalization.ko.utils import get_abs_path
 
 
@@ -33,23 +33,56 @@ class TimeFst(GraphFst):
     def __init__(self):
         super().__init__(name="time", kind="classify")
 
+        # 1-9 in cardinals for minutes and seconds
+        cardinal_digit = pynini.string_file(get_abs_path("data/numbers/digit.tsv"))
+        cardinal_zero = pynini.cross("영", "0")
+        
+        graph_tens_prefix = pynini.union(
+            pynini.cross("이", "2"),
+            pynini.cross("삼", "3"),
+            pynini.cross("사", "4"),
+            pynini.cross("오", "5")
+        )
+        # Graphing 10-19
+        graph_ten = pynini.union(
+            pynini.cross("십", "10"),
+            pynini.cross("십", "1") + cardinal_digit
+        ).optimize()
+        # Graphing 20-59
+        graph_tens = (
+            (graph_tens_prefix + pynini.cross("십", "0")) 
+            | (graph_tens_prefix + pynini.cross("십", "") + cardinal_digit)
+        )
+
+        graph_0_to_59 = pynini.union(
+            cardinal_zero,
+            cardinal_digit,
+            graph_ten,
+            graph_tens
+        ).optimize()
+
         # 1-12 for hours
         graph_hours = pynini.string_file(get_abs_path("data/time/time_hours.tsv"))
-        # 0-59 for minutes, seconds
-        graph_minutes = pynini.string_file(get_abs_path("data/time/time_minutes_seconds.tsv"))
         # Special expression for 30 minute
         graph_half = pynini.cross("반", "30")
 
-        hour_component = pynutil.insert("hours: \"") + (graph_hours + pynutil.delete("시")) + pynutil.insert("\"")
+        # Adding space if there are one
+        spacing = pynini.closure(pynini.accep(NEMO_SPACE), 0, 1)
+
+        hour_suffix = pynini.cross("시", "")
+        minute_suffix = pynini.cross("분", "")
+        second_suffix = pynini.cross("초", "")
+
+        hour_component = pynutil.insert("hours: \"") + (graph_hours + spacing + hour_suffix) + pynutil.insert("\"")
 
         minute_component = (
             pynutil.insert("minutes: \"")
-            + ((graph_minutes + pynutil.delete("분")) | graph_half)
+            + ((graph_0_to_59 + spacing + minute_suffix) | graph_half)
             + pynutil.insert("\"")
         )
 
         second_component = (
-            pynutil.insert("seconds: \"") + (graph_minutes + pynutil.delete("초")) + pynutil.insert("\"")
+            pynutil.insert("seconds: \"") + (graph_0_to_59 + spacing + second_suffix) + pynutil.insert("\"")
         )
 
         hour = pynini.closure(hour_component, 0, 1)
@@ -59,7 +92,7 @@ def __init__(self):
         graph_regular = hour + minute + second
 
         # 오전 = AM, 오후 = PM
-        prefix_words = pynini.accep("오전") | pynini.accep("오후")
+        prefix_words = (pynini.accep("오전") + spacing) | (pynini.accep("오후") + spacing) | (pynini.accep("새벽") + spacing) | (pynini.accep("아침") + spacing)
         prefix_tag = pynutil.insert("prefix: \"") + prefix_words + pynutil.insert("\"")
 
         # 전 = before, 후 = after
diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py
index 5325be602..3994b232f 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py
@@ -24,6 +24,7 @@
 from nemo_text_processing.inverse_text_normalization.ko.taggers.fraction import FractionFst
 from nemo_text_processing.inverse_text_normalization.ko.taggers.ordinal import OrdinalFst
 from nemo_text_processing.inverse_text_normalization.ko.taggers.time import TimeFst
+from nemo_text_processing.inverse_text_normalization.ko.taggers.date import DateFst
 from nemo_text_processing.inverse_text_normalization.ko.taggers.word import WordFst
 
 
@@ -74,6 +75,9 @@ def __init__(
             time = TimeFst()
             time_graph = time.fst
 
+            date = DateFst(cardinal)
+            date_graph = date.fst
+
             word_graph = WordFst().fst
 
             classify = (
@@ -82,6 +86,7 @@ def __init__(
                 | pynutil.add_weight(decimal_graph, 1.1)
                 | pynutil.add_weight(fraction_graph, 1.0)
                 | pynutil.add_weight(time_graph, 1.0)
+                | pynutil.add_weight(date_graph, 1.1)
                 | pynutil.add_weight(word_graph, 100)
             )
 
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/date.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/date.py
new file mode 100644
index 000000000..83d3611f8
--- /dev/null
+++ b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/date.py
@@ -0,0 +1,50 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import pynini
+from pynini.lib import pynutil
+
+from nemo_text_processing.inverse_text_normalization.ko.graph_utils import NEMO_NOT_QUOTE, GraphFst, NEMO_SPACE
+
+
+class DateFst(GraphFst):
+    """
+    Finite state transducer for classifying date,
+        e.g. 이천십이년 일월 오일 -> date { year: "2012" month: "1" day: "5"  }
+        e.g. 오월 -> date { month: "5" }
+        e.g. 칠일 -> date { day: "7" }
+    """
+
+    def __init__(self):
+        super().__init__(name="date", kind="verbalize")
+
+        year_component = (
+            pynutil.delete("year: \"") + pynini.closure(NEMO_NOT_QUOTE) + pynutil.insert("년") + pynutil.delete("\"")
+        )
+        month_component = (
+            pynutil.delete("month: \"") + pynini.closure(NEMO_NOT_QUOTE) + pynutil.insert("월") + pynutil.delete("\"")
+        )
+        day_component = (
+            pynutil.delete("day: \"") + pynini.closure(NEMO_NOT_QUOTE) + pynutil.insert("일") + pynutil.delete("\"")
+        )
+        
+        graph = (
+            pynini.closure(pynutil.delete(NEMO_SPACE) + year_component, 0, 1)
+            + pynini.closure(pynutil.delete(NEMO_SPACE) + month_component, 0, 1)
+            + pynini.closure(pynutil.delete(NEMO_SPACE) + day_component, 0, 1)
+        )
+
+        final_graph = self.delete_tokens(graph)
+        self.fst = final_graph.optimize()
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py
index f227bcf7c..56a109bae 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py
@@ -19,6 +19,7 @@
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.fraction import FractionFst
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.ordinal import OrdinalFst
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.time import TimeFst
+from nemo_text_processing.inverse_text_normalization.ko.verbalizers.date import DateFst
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.word import WordFst
 
 
@@ -46,5 +47,8 @@ def __init__(self):
         time = TimeFst()
         time_graph = time.fst
 
-        graph = cardinal_graph | ordinal_graph | decimal_graph | fraction_graph | time_graph
-        self.fst = graph
+        date = DateFst()
+        date_graph = date.fst
+
+        graph = cardinal_graph | ordinal_graph | decimal_graph | fraction_graph | time_graph | date_graph
+        self.fst = graph
\ No newline at end of file
diff --git a/tests/nemo_text_processing/ko/data_inverse_text_normalization/test_cases_date.txt b/tests/nemo_text_processing/ko/data_inverse_text_normalization/test_cases_date.txt
new file mode 100644
index 000000000..ecad6dc19
--- /dev/null
+++ b/tests/nemo_text_processing/ko/data_inverse_text_normalization/test_cases_date.txt
@@ -0,0 +1,16 @@
+이천이십사년팔월이십팔일~2024년8월28일
+이천이십삼년 구월 오일~2023년 9월 5일
+천구백구십구년십이월삼십일일~1999년12월31일
+이천년 이월 이십구일~2000년 2월 29일
+이천십년시월십일~2010년10월10일
+이천이십일년유월십육일~2021년6월16일
+이천삼십년삼월십사일~2030년3월14일
+천구백팔십팔년 오월 이십일~1988년 5월 20일
+이천일년 칠월 구일~2001년 7월 9일
+이천십팔년사월삼십일~2018년4월30일
+삼천년팔월십오일~3000년8월15일
+이천구년 일월이십일~2009년 1월20일
+이천삼십오년~2035년
+오월~5월
+이십사일~24일
+구천구백구십구년삼월일일~9999년3월1일
\ No newline at end of file
diff --git a/tests/nemo_text_processing/ko/data_inverse_text_normalization/test_cases_time.txt b/tests/nemo_text_processing/ko/data_inverse_text_normalization/test_cases_time.txt
index fe0615dec..450039132 100644
--- a/tests/nemo_text_processing/ko/data_inverse_text_normalization/test_cases_time.txt
+++ b/tests/nemo_text_processing/ko/data_inverse_text_normalization/test_cases_time.txt
@@ -1,13 +1,9 @@
 두시~2:00
 열두시~12:00
-삼십분~30
-오초~05
 두시 삼십분~2:30
 세시 삼분~3:03
 두시 반~2:30
 열두시 반~12:30
-삼십분 오초~30:05
-삼분 오초~03:05
 두시 오초~2:00:05
 두시 삼십분 오초~2:30:05
 오전두시~오전2:00
diff --git a/tests/nemo_text_processing/ko/test_date.py b/tests/nemo_text_processing/ko/test_date.py
new file mode 100644
index 000000000..f26513a15
--- /dev/null
+++ b/tests/nemo_text_processing/ko/test_date.py
@@ -0,0 +1,32 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pytest
+from parameterized import parameterized
+
+from nemo_text_processing.inverse_text_normalization.inverse_normalize import InverseNormalizer
+from nemo_text_processing.text_normalization.normalize import Normalizer
+
+from ..utils import CACHE_DIR, parse_test_case_file
+
+
+class TestOrdinal:
+    inverse_normalizer = InverseNormalizer(lang='ko', cache_dir=CACHE_DIR, overwrite_cache=False)
+
+    @parameterized.expand(parse_test_case_file('ko/data_inverse_text_normalization/test_cases_date.txt'))
+    @pytest.mark.run_only_on('CPU')
+    @pytest.mark.unit
+    def test_denorm(self, test_input, expected):
+        pred = self.inverse_normalizer.inverse_normalize(test_input, verbose=False)
+        assert pred == expected
diff --git a/tests/nemo_text_processing/ko/test_sparrowhawk_inverse_text_normalization.sh b/tests/nemo_text_processing/ko/test_sparrowhawk_inverse_text_normalization.sh
index a08d792e7..854aeafe7 100644
--- a/tests/nemo_text_processing/ko/test_sparrowhawk_inverse_text_normalization.sh
+++ b/tests/nemo_text_processing/ko/test_sparrowhawk_inverse_text_normalization.sh
@@ -47,6 +47,11 @@ testITNTime() {
   runtest $input
 }
 
+testITNDate() {
+  input=$TEST_DIR/data_inverse_text_normalization/test_cases_date.txt
+  runtest $input
+}
+
 # Remove all command-line arguments
 shift $#
 

From 9f01195ba2e927e3280fb0752cf7e1b39ac471a1 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 28 Aug 2025 23:48:19 +0000
Subject: [PATCH 24/29] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../ko/taggers/date.py                        | 23 +++----------
 .../ko/taggers/time.py                        | 33 ++++++++-----------
 .../ko/taggers/tokenize_and_classify.py       |  2 +-
 .../ko/verbalizers/date.py                    |  4 +--
 .../ko/verbalizers/verbalize.py               |  4 +--
 5 files changed, 22 insertions(+), 44 deletions(-)

diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/date.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/date.py
index 7a58f518a..b9de5c299 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/taggers/date.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/date.py
@@ -16,7 +16,7 @@
 import pynini
 from pynini.lib import pynutil
 
-from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst, NEMO_SPACE
+from nemo_text_processing.inverse_text_normalization.ko.graph_utils import NEMO_SPACE, GraphFst
 from nemo_text_processing.inverse_text_normalization.ko.utils import get_abs_path
 
 
@@ -41,28 +41,14 @@ def __init__(self, cardinal: GraphFst):
         day_suffix = pynini.cross("일", "")
 
         year_component = (
-            pynutil.insert("year: \"")
-            + cardinal
-            + pynini.closure(year_suffix, 0, 1)
-            + pynutil.insert("\"")
+            pynutil.insert("year: \"") + cardinal + pynini.closure(year_suffix, 0, 1) + pynutil.insert("\"")
         )
 
         month_component = (
-            pynutil.insert("month: \"")
-            + spacing
-            + month
-            + pynini.closure(month_suffix, 0, 1)
-            + pynutil.insert("\"")
+            pynutil.insert("month: \"") + spacing + month + pynini.closure(month_suffix, 0, 1) + pynutil.insert("\"")
         )
 
-        day_component = (
-            pynutil.insert("day: \"")
-            + spacing
-            + cardinal
-            + day_suffix
-            + spacing
-            + pynutil.insert("\"")
-        )
+        day_component = pynutil.insert("day: \"") + spacing + cardinal + day_suffix + spacing + pynutil.insert("\"")
 
         graph_component = year_component | month_component | day_component
         graph_date = (
@@ -75,4 +61,3 @@ def __init__(self, cardinal: GraphFst):
 
         final_graph = self.add_tokens(final_graph)
         self.fst = final_graph.optimize()
-
diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/time.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/time.py
index 63c85487f..d804f5999 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/taggers/time.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/time.py
@@ -16,7 +16,7 @@
 import pynini
 from pynini.lib import pynutil
 
-from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst, delete_space, NEMO_SPACE
+from nemo_text_processing.inverse_text_normalization.ko.graph_utils import NEMO_SPACE, GraphFst, delete_space
 from nemo_text_processing.inverse_text_normalization.ko.utils import get_abs_path
 
 
@@ -36,30 +36,18 @@ def __init__(self):
         # 1-9 in cardinals for minutes and seconds
         cardinal_digit = pynini.string_file(get_abs_path("data/numbers/digit.tsv"))
         cardinal_zero = pynini.cross("영", "0")
-        
+
         graph_tens_prefix = pynini.union(
-            pynini.cross("이", "2"),
-            pynini.cross("삼", "3"),
-            pynini.cross("사", "4"),
-            pynini.cross("오", "5")
+            pynini.cross("이", "2"), pynini.cross("삼", "3"), pynini.cross("사", "4"), pynini.cross("오", "5")
         )
         # Graphing 10-19
-        graph_ten = pynini.union(
-            pynini.cross("십", "10"),
-            pynini.cross("십", "1") + cardinal_digit
-        ).optimize()
+        graph_ten = pynini.union(pynini.cross("십", "10"), pynini.cross("십", "1") + cardinal_digit).optimize()
         # Graphing 20-59
-        graph_tens = (
-            (graph_tens_prefix + pynini.cross("십", "0")) 
-            | (graph_tens_prefix + pynini.cross("십", "") + cardinal_digit)
+        graph_tens = (graph_tens_prefix + pynini.cross("십", "0")) | (
+            graph_tens_prefix + pynini.cross("십", "") + cardinal_digit
         )
 
-        graph_0_to_59 = pynini.union(
-            cardinal_zero,
-            cardinal_digit,
-            graph_ten,
-            graph_tens
-        ).optimize()
+        graph_0_to_59 = pynini.union(cardinal_zero, cardinal_digit, graph_ten, graph_tens).optimize()
 
         # 1-12 for hours
         graph_hours = pynini.string_file(get_abs_path("data/time/time_hours.tsv"))
@@ -92,7 +80,12 @@ def __init__(self):
         graph_regular = hour + minute + second
 
         # 오전 = AM, 오후 = PM
-        prefix_words = (pynini.accep("오전") + spacing) | (pynini.accep("오후") + spacing) | (pynini.accep("새벽") + spacing) | (pynini.accep("아침") + spacing)
+        prefix_words = (
+            (pynini.accep("오전") + spacing)
+            | (pynini.accep("오후") + spacing)
+            | (pynini.accep("새벽") + spacing)
+            | (pynini.accep("아침") + spacing)
+        )
         prefix_tag = pynutil.insert("prefix: \"") + prefix_words + pynutil.insert("\"")
 
         # 전 = before, 후 = after
diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py
index 3994b232f..e57d43dd3 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py
@@ -20,11 +20,11 @@
 
 from nemo_text_processing.inverse_text_normalization.ko.graph_utils import INPUT_LOWER_CASED, GraphFst, generator_main
 from nemo_text_processing.inverse_text_normalization.ko.taggers.cardinal import CardinalFst
+from nemo_text_processing.inverse_text_normalization.ko.taggers.date import DateFst
 from nemo_text_processing.inverse_text_normalization.ko.taggers.decimal import DecimalFst
 from nemo_text_processing.inverse_text_normalization.ko.taggers.fraction import FractionFst
 from nemo_text_processing.inverse_text_normalization.ko.taggers.ordinal import OrdinalFst
 from nemo_text_processing.inverse_text_normalization.ko.taggers.time import TimeFst
-from nemo_text_processing.inverse_text_normalization.ko.taggers.date import DateFst
 from nemo_text_processing.inverse_text_normalization.ko.taggers.word import WordFst
 
 
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/date.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/date.py
index 83d3611f8..88ed973df 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/date.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/date.py
@@ -16,7 +16,7 @@
 import pynini
 from pynini.lib import pynutil
 
-from nemo_text_processing.inverse_text_normalization.ko.graph_utils import NEMO_NOT_QUOTE, GraphFst, NEMO_SPACE
+from nemo_text_processing.inverse_text_normalization.ko.graph_utils import NEMO_NOT_QUOTE, NEMO_SPACE, GraphFst
 
 
 class DateFst(GraphFst):
@@ -39,7 +39,7 @@ def __init__(self):
         day_component = (
             pynutil.delete("day: \"") + pynini.closure(NEMO_NOT_QUOTE) + pynutil.insert("일") + pynutil.delete("\"")
         )
-        
+
         graph = (
             pynini.closure(pynutil.delete(NEMO_SPACE) + year_component, 0, 1)
             + pynini.closure(pynutil.delete(NEMO_SPACE) + month_component, 0, 1)
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py
index 56a109bae..51a3a8600 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py
@@ -15,11 +15,11 @@
 
 from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.cardinal import CardinalFst
+from nemo_text_processing.inverse_text_normalization.ko.verbalizers.date import DateFst
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.decimal import DecimalFst
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.fraction import FractionFst
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.ordinal import OrdinalFst
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.time import TimeFst
-from nemo_text_processing.inverse_text_normalization.ko.verbalizers.date import DateFst
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.word import WordFst
 
 
@@ -51,4 +51,4 @@ def __init__(self):
         date_graph = date.fst
 
         graph = cardinal_graph | ordinal_graph | decimal_graph | fraction_graph | time_graph | date_graph
-        self.fst = graph
\ No newline at end of file
+        self.fst = graph

From 2d51a3904320e9cd4d7d64f1ccc00db0fc3ad054 Mon Sep 17 00:00:00 2001
From: hmlee245 <hmlee245@gmail.com>
Date: Tue, 23 Sep 2025 05:00:52 -0700
Subject: [PATCH 25/29] Adding money to the Korean ITN

Signed-off-by: hmlee245 <hmlee245@gmail.com>
---
 .../ko/data/currency.tsv                      |  8 +++
 .../ko/taggers/money.py                       | 59 +++++++++++++++++++
 .../ko/taggers/tokenize_and_classify.py       |  5 ++
 .../ko/verbalizers/money.py                   | 49 +++++++++++++++
 .../ko/verbalizers/time.py                    |  3 +-
 .../ko/verbalizers/verbalize.py               |  6 +-
 .../test_cases_money.txt                      | 30 ++++++++++
 tests/nemo_text_processing/ko/test_money.py   | 32 ++++++++++
 ..._sparrowhawk_inverse_text_normalization.sh |  5 ++
 9 files changed, 195 insertions(+), 2 deletions(-)
 create mode 100644 nemo_text_processing/inverse_text_normalization/ko/data/currency.tsv
 create mode 100644 nemo_text_processing/inverse_text_normalization/ko/taggers/money.py
 create mode 100644 nemo_text_processing/inverse_text_normalization/ko/verbalizers/money.py
 create mode 100644 tests/nemo_text_processing/ko/data_inverse_text_normalization/test_cases_money.txt
 create mode 100644 tests/nemo_text_processing/ko/test_money.py

diff --git a/nemo_text_processing/inverse_text_normalization/ko/data/currency.tsv b/nemo_text_processing/inverse_text_normalization/ko/data/currency.tsv
new file mode 100644
index 000000000..516cf5c0a
--- /dev/null
+++ b/nemo_text_processing/inverse_text_normalization/ko/data/currency.tsv
@@ -0,0 +1,8 @@
+달러	$
+유로	€
+엔	¥
+파운드	£
+위안	¥
+페소	$
+루피	₹
+원	₩
\ No newline at end of file
diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/money.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/money.py
new file mode 100644
index 000000000..49a8b216c
--- /dev/null
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/money.py
@@ -0,0 +1,59 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pynini
+from pynini.lib import pynutil
+
+from nemo_text_processing.inverse_text_normalization.ko.graph_utils import (
+    NEMO_DIGIT,
+    GraphFst,
+    convert_space,
+    delete_extra_space
+)
+from nemo_text_processing.inverse_text_normalization.ko.utils import get_abs_path
+
+
+class MoneyFst(GraphFst):
+    """
+    Finite state transducer for classifying money
+        e.g. 오만 삼천원 -> money { integer_part: "53000" currency: "원" }
+
+    Args:
+        cardinal: CardinalFst
+    """
+
+    def __init__(self, cardinal: GraphFst):
+        super().__init__(name="money", kind="classify")
+
+        cardinals = cardinal.just_cardinals
+        currency = pynini.string_file(get_abs_path("data/currency.tsv"))
+
+        graph_unit = (
+            pynutil.insert('currency: "') 
+            + currency 
+            + pynutil.insert('"')
+        )
+
+        # Main graph for integer money amounts
+        # Structure: <number> + <optional space> + <currency>
+        graph_integer = (
+            pynutil.insert('integer_part: "')
+            + cardinals
+            + pynutil.insert('"')
+            + delete_extra_space  # Handles optional spacing
+            + graph_unit
+        )
+
+        final_graph = self.add_tokens(graph_integer)
+        self.fst = final_graph.optimize()
diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py
index 3994b232f..7a3f2a01e 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py
@@ -25,6 +25,7 @@
 from nemo_text_processing.inverse_text_normalization.ko.taggers.ordinal import OrdinalFst
 from nemo_text_processing.inverse_text_normalization.ko.taggers.time import TimeFst
 from nemo_text_processing.inverse_text_normalization.ko.taggers.date import DateFst
+from nemo_text_processing.inverse_text_normalization.ko.taggers.money import MoneyFst
 from nemo_text_processing.inverse_text_normalization.ko.taggers.word import WordFst
 
 
@@ -78,6 +79,9 @@ def __init__(
             date = DateFst(cardinal)
             date_graph = date.fst
 
+            money = MoneyFst(cardinal)
+            money_graph = money.fst
+
             word_graph = WordFst().fst
 
             classify = (
@@ -87,6 +91,7 @@ def __init__(
                 | pynutil.add_weight(fraction_graph, 1.0)
                 | pynutil.add_weight(time_graph, 1.0)
                 | pynutil.add_weight(date_graph, 1.1)
+                | pynutil.add_weight(money_graph, 1.1)
                 | pynutil.add_weight(word_graph, 100)
             )
 
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/money.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/money.py
new file mode 100644
index 000000000..eb8ce7257
--- /dev/null
+++ b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/money.py
@@ -0,0 +1,49 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pynini
+from pynini.lib import pynutil
+
+from nemo_text_processing.inverse_text_normalization.ko.graph_utils import NEMO_CHAR, GraphFst, delete_space
+
+
+class MoneyFst(GraphFst):
+    """
+    Finite state transducer for classifying money
+        e.g. 오만 삼천원 -> money { integer_part: "53000" currency: "원" }
+
+    Args:
+        cardinal: CardinalFst
+    """
+
+    def __init__(self):
+        super().__init__(name="money", kind="verbalize")
+        integer = (
+            pynutil.delete("integer_part:")
+            + delete_space
+            + pynutil.delete('"')
+            + pynini.closure(NEMO_CHAR - " ", 1)
+            + pynutil.delete('"')
+        )
+        
+        unit = (
+            pynutil.delete("currency:")
+            + delete_space
+            + pynutil.delete('"')
+            + pynini.closure(NEMO_CHAR - " ", 1)
+            + pynutil.delete('"')
+        )
+        graph = unit + delete_space + integer
+        delete_tokens = self.delete_tokens(graph)
+        self.fst = delete_tokens.optimize()
\ No newline at end of file
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/time.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/time.py
index a1d264caa..e1e3755b1 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/time.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/time.py
@@ -20,6 +20,7 @@
     NEMO_NOT_QUOTE,
     GraphFst,
     delete_space,
+    NEMO_SPACE
 )
 
 
@@ -81,7 +82,7 @@ def __init__(self):
 
         # Adding prefix and suffix space
         optional_prefix_out = pynini.closure(delete_space + prefix_component, 0, 1)
-        optional_suffix_out = pynini.closure(delete_space + pynutil.insert(" ") + suffix_component, 0, 1)
+        optional_suffix_out = pynini.closure(delete_space + pynutil.insert(NEMO_SPACE) + suffix_component, 0, 1)
 
         final_graph = optional_prefix_out + time_graph + optional_suffix_out
         self.fst = self.delete_tokens(delete_space + final_graph).optimize()
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py
index 56a109bae..01fdd3e1f 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py
@@ -20,6 +20,7 @@
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.ordinal import OrdinalFst
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.time import TimeFst
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.date import DateFst
+from nemo_text_processing.inverse_text_normalization.ko.verbalizers.money import MoneyFst
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.word import WordFst
 
 
@@ -50,5 +51,8 @@ def __init__(self):
         date = DateFst()
         date_graph = date.fst
 
-        graph = cardinal_graph | ordinal_graph | decimal_graph | fraction_graph | time_graph | date_graph
+        money = MoneyFst()
+        money_graph = money.fst
+
+        graph = cardinal_graph | ordinal_graph | decimal_graph | fraction_graph | time_graph | date_graph | money_graph
         self.fst = graph
\ No newline at end of file
diff --git a/tests/nemo_text_processing/ko/data_inverse_text_normalization/test_cases_money.txt b/tests/nemo_text_processing/ko/data_inverse_text_normalization/test_cases_money.txt
new file mode 100644
index 000000000..14cdea536
--- /dev/null
+++ b/tests/nemo_text_processing/ko/data_inverse_text_normalization/test_cases_money.txt
@@ -0,0 +1,30 @@
+오천 원~₩5,000
+만오천원~₩15,000
+십이만 삼천 원~₩123,000
+백 원~₩100
+천백십일 원~₩1,111
+육십만 원~₩600,000
+백만 원~₩1,000,000
+삼백오십만 원~₩3,500,000
+천이백만 원~₩12,000,000
+일억 원~₩100,000,000
+십이억 오천만 원~₩1,250,000,000
+백억 원~₩10,000,000,000
+오천억~₩500,000,000,000
+일조 원~₩1,000,000,000,000
+삼조 오천억 원~₩3,500,000,000,000
+영원~₩0
+구십구 원~₩99
+이공이오 원~₩2,025
+만 원~₩10,000
+일만 원~₩10,000
+십오 달러~$15
+이십불~$20
+천오백 불~$1,500
+백만 달러~$1,000,000
+오십 유로~€50
+천 엔~¥1,000
+만 엔~¥10,000
+백 파운드~£100
+이십 위안~¥20
+구천구백구십구원~₩9,999
\ No newline at end of file
diff --git a/tests/nemo_text_processing/ko/test_money.py b/tests/nemo_text_processing/ko/test_money.py
new file mode 100644
index 000000000..881a1ee4e
--- /dev/null
+++ b/tests/nemo_text_processing/ko/test_money.py
@@ -0,0 +1,32 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pytest
+from parameterized import parameterized
+
+from nemo_text_processing.inverse_text_normalization.inverse_normalize import InverseNormalizer
+from nemo_text_processing.text_normalization.normalize import Normalizer
+
+from ..utils import CACHE_DIR, parse_test_case_file
+
+
+class TestOrdinal:
+    inverse_normalizer = InverseNormalizer(lang='ko', cache_dir=CACHE_DIR, overwrite_cache=False)
+
+    @parameterized.expand(parse_test_case_file('ko/data_inverse_text_normalization/test_cases_money.txt'))
+    @pytest.mark.run_only_on('CPU')
+    @pytest.mark.unit
+    def test_denorm(self, test_input, expected):
+        pred = self.inverse_normalizer.inverse_normalize(test_input, verbose=False)
+        assert pred == expected
diff --git a/tests/nemo_text_processing/ko/test_sparrowhawk_inverse_text_normalization.sh b/tests/nemo_text_processing/ko/test_sparrowhawk_inverse_text_normalization.sh
index 854aeafe7..2843a88c0 100644
--- a/tests/nemo_text_processing/ko/test_sparrowhawk_inverse_text_normalization.sh
+++ b/tests/nemo_text_processing/ko/test_sparrowhawk_inverse_text_normalization.sh
@@ -52,6 +52,11 @@ testITNDate() {
   runtest $input
 }
 
+testITNMoney() {
+  input=$TEST_DIR/data_inverse_text_normalization/test_cases_money.txt
+  runtest $input
+}
+
 # Remove all command-line arguments
 shift $#
 

From 19dbc2455910972174f6aa9ee7e08cdf435b0666 Mon Sep 17 00:00:00 2001
From: hmlee245 <hmlee245@gmail.com>
Date: Tue, 23 Sep 2025 05:09:04 -0700
Subject: [PATCH 26/29] Adding money to the Korean ITN

Signed-off-by: hmlee245 <hmlee245@gmail.com>
---
 .../inverse_text_normalization/ko/taggers/money.py             | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/money.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/money.py
index 49a8b216c..f02049e33 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/taggers/money.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/money.py
@@ -19,7 +19,8 @@
     NEMO_DIGIT,
     GraphFst,
     convert_space,
-    delete_extra_space
+    delete_extra_space,
+    NEMO_SPACE
 )
 from nemo_text_processing.inverse_text_normalization.ko.utils import get_abs_path
 

From e5252d1f9631d1628eda5327407268e0dc61bcba Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 26 Sep 2025 19:59:22 +0000
Subject: [PATCH 27/29] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../inverse_text_normalization/ko/taggers/money.py        | 8 ++------
 .../ko/taggers/tokenize_and_classify.py                   | 3 +--
 .../inverse_text_normalization/ko/verbalizers/money.py    | 4 ++--
 .../inverse_text_normalization/ko/verbalizers/time.py     | 2 +-
 .../ko/verbalizers/verbalize.py                           | 3 +--
 5 files changed, 7 insertions(+), 13 deletions(-)

diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/money.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/money.py
index f02049e33..d4dcdc3aa 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/taggers/money.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/money.py
@@ -17,10 +17,10 @@
 
 from nemo_text_processing.inverse_text_normalization.ko.graph_utils import (
     NEMO_DIGIT,
+    NEMO_SPACE,
     GraphFst,
     convert_space,
     delete_extra_space,
-    NEMO_SPACE
 )
 from nemo_text_processing.inverse_text_normalization.ko.utils import get_abs_path
 
@@ -40,11 +40,7 @@ def __init__(self, cardinal: GraphFst):
         cardinals = cardinal.just_cardinals
         currency = pynini.string_file(get_abs_path("data/currency.tsv"))
 
-        graph_unit = (
-            pynutil.insert('currency: "') 
-            + currency 
-            + pynutil.insert('"')
-        )
+        graph_unit = pynutil.insert('currency: "') + currency + pynutil.insert('"')
 
         # Main graph for integer money amounts
         # Structure: <number> + <optional space> + <currency>
diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py
index e54d8e3a8..2dc4661dd 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py
@@ -23,10 +23,9 @@
 from nemo_text_processing.inverse_text_normalization.ko.taggers.date import DateFst
 from nemo_text_processing.inverse_text_normalization.ko.taggers.decimal import DecimalFst
 from nemo_text_processing.inverse_text_normalization.ko.taggers.fraction import FractionFst
+from nemo_text_processing.inverse_text_normalization.ko.taggers.money import MoneyFst
 from nemo_text_processing.inverse_text_normalization.ko.taggers.ordinal import OrdinalFst
 from nemo_text_processing.inverse_text_normalization.ko.taggers.time import TimeFst
-from nemo_text_processing.inverse_text_normalization.ko.taggers.date import DateFst
-from nemo_text_processing.inverse_text_normalization.ko.taggers.money import MoneyFst
 from nemo_text_processing.inverse_text_normalization.ko.taggers.word import WordFst
 
 
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/money.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/money.py
index eb8ce7257..2a09c6bb9 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/money.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/money.py
@@ -36,7 +36,7 @@ def __init__(self):
             + pynini.closure(NEMO_CHAR - " ", 1)
             + pynutil.delete('"')
         )
-        
+
         unit = (
             pynutil.delete("currency:")
             + delete_space
@@ -46,4 +46,4 @@ def __init__(self):
         )
         graph = unit + delete_space + integer
         delete_tokens = self.delete_tokens(graph)
-        self.fst = delete_tokens.optimize()
\ No newline at end of file
+        self.fst = delete_tokens.optimize()
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/time.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/time.py
index e1e3755b1..4b63ade99 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/time.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/time.py
@@ -18,9 +18,9 @@
 from nemo_text_processing.inverse_text_normalization.ko.graph_utils import (
     NEMO_DIGIT,
     NEMO_NOT_QUOTE,
+    NEMO_SPACE,
     GraphFst,
     delete_space,
-    NEMO_SPACE
 )
 
 
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py
index 1fb3b3133..ce5ad8cd1 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py
@@ -18,10 +18,9 @@
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.date import DateFst
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.decimal import DecimalFst
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.fraction import FractionFst
+from nemo_text_processing.inverse_text_normalization.ko.verbalizers.money import MoneyFst
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.ordinal import OrdinalFst
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.time import TimeFst
-from nemo_text_processing.inverse_text_normalization.ko.verbalizers.date import DateFst
-from nemo_text_processing.inverse_text_normalization.ko.verbalizers.money import MoneyFst
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.word import WordFst
 
 

From 46ee84891676133759aba0cad73d547b8db4bab2 Mon Sep 17 00:00:00 2001
From: hmlee245 <hmlee245@gmail.com>
Date: Mon, 13 Oct 2025 20:16:26 -0700
Subject: [PATCH 28/29] Addition of telephone class, fixing time, money, date

Signed-off-by: hmlee245 <hmlee245@gmail.com>
---
 .../ko/data/currency.tsv                      |  1 +
 .../ko/data/time/time_hours.tsv               | 13 ++++-
 .../ko/taggers/money.py                       | 31 +++++-----
 .../ko/taggers/telephone.py                   | 57 +++++++++++++++++++
 .../ko/taggers/time.py                        | 26 ++++++---
 .../ko/taggers/tokenize_and_classify.py       |  5 ++
 .../ko/verbalizers/money.py                   | 11 ++--
 .../ko/verbalizers/telephone.py               | 34 +++++++++++
 .../ko/verbalizers/verbalize.py               | 22 ++++++-
 .../test_cases_money.txt                      | 43 +++++++-------
 .../test_cases_telephone.txt                  | 11 ++++
 ..._sparrowhawk_inverse_text_normalization.sh |  5 ++
 .../nemo_text_processing/ko/test_telephone.py | 32 +++++++++++
 13 files changed, 239 insertions(+), 52 deletions(-)
 create mode 100644 nemo_text_processing/inverse_text_normalization/ko/taggers/telephone.py
 create mode 100644 nemo_text_processing/inverse_text_normalization/ko/verbalizers/telephone.py
 create mode 100644 tests/nemo_text_processing/ko/data_inverse_text_normalization/test_cases_telephone.txt
 create mode 100644 tests/nemo_text_processing/ko/test_telephone.py

diff --git a/nemo_text_processing/inverse_text_normalization/ko/data/currency.tsv b/nemo_text_processing/inverse_text_normalization/ko/data/currency.tsv
index 516cf5c0a..fd2127530 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/data/currency.tsv
+++ b/nemo_text_processing/inverse_text_normalization/ko/data/currency.tsv
@@ -1,4 +1,5 @@
 달러	$
+불	$
 유로	€
 엔	¥
 파운드	£
diff --git a/nemo_text_processing/inverse_text_normalization/ko/data/time/time_hours.tsv b/nemo_text_processing/inverse_text_normalization/ko/data/time/time_hours.tsv
index 24b980aa1..8044e4006 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/data/time/time_hours.tsv
+++ b/nemo_text_processing/inverse_text_normalization/ko/data/time/time_hours.tsv
@@ -9,4 +9,15 @@
 아홉	9
 열	10
 열한	11
-열두	12
\ No newline at end of file
+열두	12
+열세	13
+열네	14
+열다섯	15
+열여섯	16
+열일곱	17
+열여덟	18
+열아홉	19
+스무	20
+스물한	21
+스물두	22
+스물세	23
\ No newline at end of file
diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/money.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/money.py
index f02049e33..d150b8e7f 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/taggers/money.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/money.py
@@ -16,10 +16,7 @@
 from pynini.lib import pynutil
 
 from nemo_text_processing.inverse_text_normalization.ko.graph_utils import (
-    NEMO_DIGIT,
     GraphFst,
-    convert_space,
-    delete_extra_space,
     NEMO_SPACE
 )
 from nemo_text_processing.inverse_text_normalization.ko.utils import get_abs_path
@@ -28,7 +25,7 @@
 class MoneyFst(GraphFst):
     """
     Finite state transducer for classifying money
-        e.g. 오만 삼천원 -> money { integer_part: "53000" currency: "원" }
+        e.g. 오만 삼천원 -> money { integer_part: "53000" currency: "₩" }
 
     Args:
         cardinal: CardinalFst
@@ -40,21 +37,25 @@ def __init__(self, cardinal: GraphFst):
         cardinals = cardinal.just_cardinals
         currency = pynini.string_file(get_abs_path("data/currency.tsv"))
 
+        # Accepting space if there are one between integer and currency
+        spacing = pynini.closure(pynini.accep(NEMO_SPACE), 0, 1)
+
+        graph_integer = (
+            pynutil.insert("integer_part: \"")
+            + cardinals
+            + pynutil.insert("\"")
+            + spacing
+        )
+
         graph_unit = (
-            pynutil.insert('currency: "') 
+            pynutil.insert(" currency: \"") 
             + currency 
-            + pynutil.insert('"')
+            + pynutil.insert("\"")
         )
 
-        # Main graph for integer money amounts
-        # Structure: <number> + <optional space> + <currency>
-        graph_integer = (
-            pynutil.insert('integer_part: "')
-            + cardinals
-            + pynutil.insert('"')
-            + delete_extra_space  # Handles optional spacing
-            + graph_unit
+        graph_final = (
+            graph_integer + graph_unit
         )
 
-        final_graph = self.add_tokens(graph_integer)
+        final_graph = self.add_tokens(graph_final)
         self.fst = final_graph.optimize()
diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/telephone.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/telephone.py
new file mode 100644
index 000000000..b270def3d
--- /dev/null
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/telephone.py
@@ -0,0 +1,57 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pynini
+from pynini.lib import pynutil
+
+from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst, delete_extra_space, NEMO_SPACE
+from nemo_text_processing.inverse_text_normalization.ko.utils import get_abs_path
+
+
+class TelephoneFst(GraphFst):
+    """
+    Finite state transducer for classifying a generic 3-4-4 telephone number.
+        e.g. 공일공에 일이삼사에 오육칠팔 -> telephone { number_part: "010-1234-5678" }
+
+    """
+
+    def __init__(self):
+        super().__init__(name="telephone", kind="classify")
+        graph_zero = pynini.string_file(get_abs_path("data/numbers/zero.tsv"))
+        graph_zero_alt = pynini.cross("공", "0")
+        graph_digit = pynini.string_file(get_abs_path("data/numbers/digit.tsv"))
+
+        digit = graph_digit | graph_zero | graph_zero_alt
+
+        separator = pynini.cross(pynini.union(" ", "에"), "-").optimize()
+
+        digit_block_3 = digit + digit + digit
+        digit_block_4 = digit_block_3 + digit    
+
+        optional_separator = pynini.closure(separator, 0, 1)
+        spacing = pynini.closure(pynini.accep(NEMO_SPACE), 0, 1)
+
+        phone_number_graph = (
+            pynutil.insert('number_part: "') +
+            digit_block_3 
+            + optional_separator
+            + digit_block_4 
+            + optional_separator
+            + digit_block_4
+            + pynutil.insert('"')
+        )
+
+        graph = phone_number_graph
+        final_graph = self.add_tokens(graph)
+        self.fst = final_graph.optimize()
diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/time.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/time.py
index d804f5999..96531bde8 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/taggers/time.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/time.py
@@ -16,6 +16,7 @@
 import pynini
 from pynini.lib import pynutil
 
+from nemo_text_processing.inverse_text_normalization.ko.taggers.cardinal import CardinalFst
 from nemo_text_processing.inverse_text_normalization.ko.graph_utils import NEMO_SPACE, GraphFst, delete_space
 from nemo_text_processing.inverse_text_normalization.ko.utils import get_abs_path
 
@@ -65,7 +66,7 @@ def __init__(self):
 
         minute_component = (
             pynutil.insert("minutes: \"")
-            + ((graph_0_to_59 + spacing + minute_suffix) | graph_half)
+            + pynini.union((graph_0_to_59 + spacing + minute_suffix) | graph_half)
             + pynutil.insert("\"")
         )
 
@@ -80,11 +81,11 @@ def __init__(self):
         graph_regular = hour + minute + second
 
         # 오전 = AM, 오후 = PM
-        prefix_words = (
-            (pynini.accep("오전") + spacing)
-            | (pynini.accep("오후") + spacing)
-            | (pynini.accep("새벽") + spacing)
-            | (pynini.accep("아침") + spacing)
+        prefix_words = pynini.union(
+            (pynini.accep("오전") + spacing),
+            (pynini.accep("오후") + spacing),
+            (pynini.accep("새벽") + spacing),
+            (pynini.accep("아침") + spacing)
         )
         prefix_tag = pynutil.insert("prefix: \"") + prefix_words + pynutil.insert("\"")
 
@@ -92,10 +93,19 @@ def __init__(self):
         suffix_words = pynini.accep("전") | pynini.accep("후")
         suffix_tag = pynutil.insert("suffix: \"") + suffix_words + pynutil.insert("\"")
 
-        final_graph = (
+        time_graph = (
             pynini.closure(delete_space + prefix_tag, 0, 1)
             + graph_regular
             + pynini.closure(delete_space + suffix_tag, 0, 1)
         )
 
-        self.fst = self.add_tokens(final_graph).optimize()
+        cardinal = CardinalFst()
+        cardinal_graph = cardinal.fst
+
+        #Adding cardinal graph to prevent processing out of range numbers
+        final_graph = pynini.union(
+            time_graph,
+            cardinal_graph
+        )
+
+        self.fst = self.add_tokens(final_graph).optimize()
\ No newline at end of file
diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py
index e54d8e3a8..f4f46805d 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py
@@ -27,6 +27,7 @@
 from nemo_text_processing.inverse_text_normalization.ko.taggers.time import TimeFst
 from nemo_text_processing.inverse_text_normalization.ko.taggers.date import DateFst
 from nemo_text_processing.inverse_text_normalization.ko.taggers.money import MoneyFst
+from nemo_text_processing.inverse_text_normalization.ko.taggers.telephone import TelephoneFst
 from nemo_text_processing.inverse_text_normalization.ko.taggers.word import WordFst
 
 
@@ -83,6 +84,9 @@ def __init__(
             money = MoneyFst(cardinal)
             money_graph = money.fst
 
+            telephone = TelephoneFst()
+            telephone_graph = telephone.fst
+
             word_graph = WordFst().fst
 
             classify = (
@@ -93,6 +97,7 @@ def __init__(
                 | pynutil.add_weight(time_graph, 1.0)
                 | pynutil.add_weight(date_graph, 1.1)
                 | pynutil.add_weight(money_graph, 1.1)
+                | pynutil.add_weight(telephone_graph, 1.1)
                 | pynutil.add_weight(word_graph, 100)
             )
 
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/money.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/money.py
index eb8ce7257..4a569c3d9 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/money.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/money.py
@@ -15,7 +15,7 @@
 import pynini
 from pynini.lib import pynutil
 
-from nemo_text_processing.inverse_text_normalization.ko.graph_utils import NEMO_CHAR, GraphFst, delete_space
+from nemo_text_processing.inverse_text_normalization.ko.graph_utils import NEMO_CHAR, GraphFst, delete_space, NEMO_SPACE
 
 
 class MoneyFst(GraphFst):
@@ -33,7 +33,7 @@ def __init__(self):
             pynutil.delete("integer_part:")
             + delete_space
             + pynutil.delete('"')
-            + pynini.closure(NEMO_CHAR - " ", 1)
+            + pynini.closure(NEMO_CHAR - NEMO_SPACE, 1)
             + pynutil.delete('"')
         )
         
@@ -41,9 +41,12 @@ def __init__(self):
             pynutil.delete("currency:")
             + delete_space
             + pynutil.delete('"')
-            + pynini.closure(NEMO_CHAR - " ", 1)
+            + pynini.closure(NEMO_CHAR - NEMO_SPACE, 1)
             + pynutil.delete('"')
         )
-        graph = unit + delete_space + integer
+
+        optional_space = pynini.closure(pynutil.delete(NEMO_SPACE), 0, 1).optimize()
+
+        graph = unit + optional_space + integer
         delete_tokens = self.delete_tokens(graph)
         self.fst = delete_tokens.optimize()
\ No newline at end of file
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/telephone.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/telephone.py
new file mode 100644
index 000000000..3ac213b59
--- /dev/null
+++ b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/telephone.py
@@ -0,0 +1,34 @@
+# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
+# Copyright 2015 and onwards Google, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pynini
+from pynini.lib import pynutil
+
+from nemo_text_processing.inverse_text_normalization.ko.graph_utils import NEMO_NOT_QUOTE, GraphFst
+
+
+class TelephoneFst(GraphFst):
+    """
+    Finite state transducer for classifying a generic 3-4-4 telephone number.
+        e.g. 공일공에 일이삼사에 오육칠팔 -> telephone { number: "010-1234-5678" }
+
+    """
+
+    def __init__(self):
+        super().__init__(name="telephone", kind="verbalize")
+
+        number_part = pynutil.delete('number_part: "') + pynini.closure(NEMO_NOT_QUOTE, 1) + pynutil.delete('"')
+        delete_tokens = self.delete_tokens(number_part)
+        self.fst = delete_tokens.optimize()
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py
index 1fb3b3133..d966fcf9e 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py
@@ -12,6 +12,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import pynini
+from pynini.lib import pynutil
 
 from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.cardinal import CardinalFst
@@ -22,6 +24,7 @@
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.time import TimeFst
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.date import DateFst
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.money import MoneyFst
+from nemo_text_processing.inverse_text_normalization.ko.verbalizers.telephone import TelephoneFst
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.word import WordFst
 
 
@@ -55,5 +58,20 @@ def __init__(self):
         money = MoneyFst()
         money_graph = money.fst
 
-        graph = cardinal_graph | ordinal_graph | decimal_graph | fraction_graph | time_graph | date_graph | money_graph
-        self.fst = graph
+        telephone = TelephoneFst()
+        telephone_graph = telephone.fst
+
+        word = WordFst()
+        word_graph = word.fst
+
+        graph = pynini.union(cardinal_graph,
+                              ordinal_graph,
+                              decimal_graph,
+                              fraction_graph,
+                              time_graph,
+                              date_graph,
+                              money_graph,
+                              telephone_graph,
+                              word_graph
+                            )
+        self.fst = graph
\ No newline at end of file
diff --git a/tests/nemo_text_processing/ko/data_inverse_text_normalization/test_cases_money.txt b/tests/nemo_text_processing/ko/data_inverse_text_normalization/test_cases_money.txt
index 14cdea536..09c6b2841 100644
--- a/tests/nemo_text_processing/ko/data_inverse_text_normalization/test_cases_money.txt
+++ b/tests/nemo_text_processing/ko/data_inverse_text_normalization/test_cases_money.txt
@@ -1,30 +1,29 @@
-오천 원~₩5,000
-만오천원~₩15,000
-십이만 삼천 원~₩123,000
+오천 원~₩5000
+만오천원~₩15000
+십이만삼천 원~₩123000
 백 원~₩100
-천백십일 원~₩1,111
-육십만 원~₩600,000
-백만 원~₩1,000,000
-삼백오십만 원~₩3,500,000
-천이백만 원~₩12,000,000
-일억 원~₩100,000,000
-십이억 오천만 원~₩1,250,000,000
-백억 원~₩10,000,000,000
-오천억~₩500,000,000,000
-일조 원~₩1,000,000,000,000
-삼조 오천억 원~₩3,500,000,000,000
+천백십일 원~₩1111
+육십만 원~₩600000
+백만 원~₩1000000
+삼백오십만 원~₩3500000
+천이백만 원~₩12000000
+일억 원~₩100000000
+십이억오천만 원~₩1250000000
+백억 원~₩10000000000
+오천억원~₩500000000000
+일조 원~₩1000000000000
+삼조오천억 원~₩3500000000000
 영원~₩0
 구십구 원~₩99
-이공이오 원~₩2,025
-만 원~₩10,000
-일만 원~₩10,000
+만 원~₩10000
+일만 원~₩10000
 십오 달러~$15
 이십불~$20
-천오백 불~$1,500
-백만 달러~$1,000,000
+천오백 불~$1500
+백만 달러~$1000000
 오십 유로~€50
-천 엔~¥1,000
-만 엔~¥10,000
+천 엔~¥1000
+만 엔~¥10000
 백 파운드~£100
 이십 위안~¥20
-구천구백구십구원~₩9,999
\ No newline at end of file
+구천구백구십구원~₩9999
\ No newline at end of file
diff --git a/tests/nemo_text_processing/ko/data_inverse_text_normalization/test_cases_telephone.txt b/tests/nemo_text_processing/ko/data_inverse_text_normalization/test_cases_telephone.txt
new file mode 100644
index 000000000..6702d4ddd
--- /dev/null
+++ b/tests/nemo_text_processing/ko/data_inverse_text_normalization/test_cases_telephone.txt
@@ -0,0 +1,11 @@
+공일공에 일이삼사에 오육칠팔~010-1234-5678
+영일영 구팔칠육 오사삼이~010-9876-5432
+공이에 삼사오육에 칠팔구공~02-3456-7890
+공삼일에 구팔칠에 육오사삼~031-987-6543
+천오백팔십팔에 이공공공~1588-2000
+천오백칠십칠에 구천번~1577-9000
+일일구~119
+일일이~112
+공일공 일이삼사 오육칠팔~010-1234-5678
+공이 삼사오에 육칠팔구~02-345-6789
+공일공일이삼사오육칠팔~010-1234-5678
\ No newline at end of file
diff --git a/tests/nemo_text_processing/ko/test_sparrowhawk_inverse_text_normalization.sh b/tests/nemo_text_processing/ko/test_sparrowhawk_inverse_text_normalization.sh
index 2843a88c0..816678de3 100644
--- a/tests/nemo_text_processing/ko/test_sparrowhawk_inverse_text_normalization.sh
+++ b/tests/nemo_text_processing/ko/test_sparrowhawk_inverse_text_normalization.sh
@@ -57,6 +57,11 @@ testITNMoney() {
   runtest $input
 }
 
+testITNTelephone() {
+  input=$TEST_DIR/data_inverse_text_normalization/test_cases_telephone.txt
+  runtest $input
+}
+
 # Remove all command-line arguments
 shift $#
 
diff --git a/tests/nemo_text_processing/ko/test_telephone.py b/tests/nemo_text_processing/ko/test_telephone.py
new file mode 100644
index 000000000..4a3684896
--- /dev/null
+++ b/tests/nemo_text_processing/ko/test_telephone.py
@@ -0,0 +1,32 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pytest
+from parameterized import parameterized
+
+from nemo_text_processing.inverse_text_normalization.inverse_normalize import InverseNormalizer
+from nemo_text_processing.text_normalization.normalize import Normalizer
+
+from ..utils import CACHE_DIR, parse_test_case_file
+
+
+class TestOrdinal:
+    inverse_normalizer = InverseNormalizer(lang='ko', cache_dir=CACHE_DIR, overwrite_cache=False)
+
+    @parameterized.expand(parse_test_case_file('ko/data_inverse_text_normalization/test_cases_telephone.txt'))
+    @pytest.mark.run_only_on('CPU')
+    @pytest.mark.unit
+    def test_denorm(self, test_input, expected):
+        pred = self.inverse_normalizer.inverse_normalize(test_input, verbose=False)
+        assert pred == expected

From 8e3100461de7faba807487b0ff02ff9f271bbd1e Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 14 Oct 2025 03:26:08 +0000
Subject: [PATCH 29/29] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../ko/taggers/money.py                       | 26 +++++-------------
 .../ko/taggers/telephone.py                   | 10 +++----
 .../ko/taggers/time.py                        | 13 ++++-----
 .../ko/taggers/tokenize_and_classify.py       |  4 +--
 .../ko/verbalizers/money.py                   |  7 ++++-
 .../ko/verbalizers/verbalize.py               | 27 +++++++++----------
 6 files changed, 36 insertions(+), 51 deletions(-)

diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/money.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/money.py
index d150b8e7f..f890e477e 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/taggers/money.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/money.py
@@ -15,10 +15,7 @@
 import pynini
 from pynini.lib import pynutil
 
-from nemo_text_processing.inverse_text_normalization.ko.graph_utils import (
-    GraphFst,
-    NEMO_SPACE
-)
+from nemo_text_processing.inverse_text_normalization.ko.graph_utils import NEMO_SPACE, GraphFst
 from nemo_text_processing.inverse_text_normalization.ko.utils import get_abs_path
 
 
@@ -40,22 +37,11 @@ def __init__(self, cardinal: GraphFst):
         # Accepting space if there are one between integer and currency
         spacing = pynini.closure(pynini.accep(NEMO_SPACE), 0, 1)
 
-        graph_integer = (
-            pynutil.insert("integer_part: \"")
-            + cardinals
-            + pynutil.insert("\"")
-            + spacing
-        )
-
-        graph_unit = (
-            pynutil.insert(" currency: \"") 
-            + currency 
-            + pynutil.insert("\"")
-        )
-
-        graph_final = (
-            graph_integer + graph_unit
-        )
+        graph_integer = pynutil.insert("integer_part: \"") + cardinals + pynutil.insert("\"") + spacing
+
+        graph_unit = pynutil.insert(" currency: \"") + currency + pynutil.insert("\"")
+
+        graph_final = graph_integer + graph_unit
 
         final_graph = self.add_tokens(graph_final)
         self.fst = final_graph.optimize()
diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/telephone.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/telephone.py
index b270def3d..fe499a838 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/taggers/telephone.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/telephone.py
@@ -15,7 +15,7 @@
 import pynini
 from pynini.lib import pynutil
 
-from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst, delete_extra_space, NEMO_SPACE
+from nemo_text_processing.inverse_text_normalization.ko.graph_utils import NEMO_SPACE, GraphFst, delete_extra_space
 from nemo_text_processing.inverse_text_normalization.ko.utils import get_abs_path
 
 
@@ -37,16 +37,16 @@ def __init__(self):
         separator = pynini.cross(pynini.union(" ", "에"), "-").optimize()
 
         digit_block_3 = digit + digit + digit
-        digit_block_4 = digit_block_3 + digit    
+        digit_block_4 = digit_block_3 + digit
 
         optional_separator = pynini.closure(separator, 0, 1)
         spacing = pynini.closure(pynini.accep(NEMO_SPACE), 0, 1)
 
         phone_number_graph = (
-            pynutil.insert('number_part: "') +
-            digit_block_3 
+            pynutil.insert('number_part: "')
+            + digit_block_3
             + optional_separator
-            + digit_block_4 
+            + digit_block_4
             + optional_separator
             + digit_block_4
             + pynutil.insert('"')
diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/time.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/time.py
index 96531bde8..923a78c1c 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/taggers/time.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/time.py
@@ -16,8 +16,8 @@
 import pynini
 from pynini.lib import pynutil
 
-from nemo_text_processing.inverse_text_normalization.ko.taggers.cardinal import CardinalFst
 from nemo_text_processing.inverse_text_normalization.ko.graph_utils import NEMO_SPACE, GraphFst, delete_space
+from nemo_text_processing.inverse_text_normalization.ko.taggers.cardinal import CardinalFst
 from nemo_text_processing.inverse_text_normalization.ko.utils import get_abs_path
 
 
@@ -85,7 +85,7 @@ def __init__(self):
             (pynini.accep("오전") + spacing),
             (pynini.accep("오후") + spacing),
             (pynini.accep("새벽") + spacing),
-            (pynini.accep("아침") + spacing)
+            (pynini.accep("아침") + spacing),
         )
         prefix_tag = pynutil.insert("prefix: \"") + prefix_words + pynutil.insert("\"")
 
@@ -102,10 +102,7 @@ def __init__(self):
         cardinal = CardinalFst()
         cardinal_graph = cardinal.fst
 
-        #Adding cardinal graph to prevent processing out of range numbers
-        final_graph = pynini.union(
-            time_graph,
-            cardinal_graph
-        )
+        # Adding cardinal graph to prevent processing out of range numbers
+        final_graph = pynini.union(time_graph, cardinal_graph)
 
-        self.fst = self.add_tokens(final_graph).optimize()
\ No newline at end of file
+        self.fst = self.add_tokens(final_graph).optimize()
diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py
index 2999504c6..3f5943b15 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py
@@ -25,10 +25,8 @@
 from nemo_text_processing.inverse_text_normalization.ko.taggers.fraction import FractionFst
 from nemo_text_processing.inverse_text_normalization.ko.taggers.money import MoneyFst
 from nemo_text_processing.inverse_text_normalization.ko.taggers.ordinal import OrdinalFst
-from nemo_text_processing.inverse_text_normalization.ko.taggers.time import TimeFst
-from nemo_text_processing.inverse_text_normalization.ko.taggers.date import DateFst
-from nemo_text_processing.inverse_text_normalization.ko.taggers.money import MoneyFst
 from nemo_text_processing.inverse_text_normalization.ko.taggers.telephone import TelephoneFst
+from nemo_text_processing.inverse_text_normalization.ko.taggers.time import TimeFst
 from nemo_text_processing.inverse_text_normalization.ko.taggers.word import WordFst
 
 
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/money.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/money.py
index 49ec64b7d..45e4c7e2c 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/money.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/money.py
@@ -15,7 +15,12 @@
 import pynini
 from pynini.lib import pynutil
 
-from nemo_text_processing.inverse_text_normalization.ko.graph_utils import NEMO_CHAR, GraphFst, delete_space, NEMO_SPACE
+from nemo_text_processing.inverse_text_normalization.ko.graph_utils import (
+    NEMO_CHAR,
+    NEMO_SPACE,
+    GraphFst,
+    delete_space,
+)
 
 
 class MoneyFst(GraphFst):
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py
index acefdb40f..59d1c9809 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py
@@ -22,10 +22,8 @@
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.fraction import FractionFst
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.money import MoneyFst
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.ordinal import OrdinalFst
-from nemo_text_processing.inverse_text_normalization.ko.verbalizers.time import TimeFst
-from nemo_text_processing.inverse_text_normalization.ko.verbalizers.date import DateFst
-from nemo_text_processing.inverse_text_normalization.ko.verbalizers.money import MoneyFst
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.telephone import TelephoneFst
+from nemo_text_processing.inverse_text_normalization.ko.verbalizers.time import TimeFst
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.word import WordFst
 
 
@@ -65,14 +63,15 @@ def __init__(self):
         word = WordFst()
         word_graph = word.fst
 
-        graph = pynini.union(cardinal_graph,
-                              ordinal_graph,
-                              decimal_graph,
-                              fraction_graph,
-                              time_graph,
-                              date_graph,
-                              money_graph,
-                              telephone_graph,
-                              word_graph
-                            )
-        self.fst = graph
\ No newline at end of file
+        graph = pynini.union(
+            cardinal_graph,
+            ordinal_graph,
+            decimal_graph,
+            fraction_graph,
+            time_graph,
+            date_graph,
+            money_graph,
+            telephone_graph,
+            word_graph,
+        )
+        self.fst = graph