From 775851e80edab0195157fa03cfb5535ee3dde8e6 Mon Sep 17 00:00:00 2001
From: Szabolcs <szabolcs@birosign.com>
Date: Sun, 21 Jul 2019 20:41:03 +0200
Subject: [PATCH 1/9] Number spelling based on the CLDR's RBNF rules

A pure Python engine for parsing RBNF rules.
The rules are incomplete in many cases, fractional
number spelling is hardly supported.

Based on an earlier discussion:
https://github.com/python-babel/babel/pull/114
and referenced in
https://github.com/python-babel/babel/issues/179
---
 babel/numbers.py              |  21 +
 babel/rbnf.py                 | 713 ++++++++++++++++++++++++++++++++++
 scripts/import_cldr.py        |  48 ++-
 tests/test_number_spelling.py | 183 +++++++++
 4 files changed, 964 insertions(+), 1 deletion(-)
 create mode 100644 babel/rbnf.py
 create mode 100644 tests/test_number_spelling.py

diff --git a/babel/numbers.py b/babel/numbers.py
index 6e15fd3a8..d9a0c0462 100644
--- a/babel/numbers.py
+++ b/babel/numbers.py
@@ -24,6 +24,7 @@
 import warnings
 
 from babel.core import default_locale, Locale, get_global
+from babel.rbnf import RuleBasedNumberFormat
 
 try:
     # Python 2
@@ -662,6 +663,26 @@ def __init__(self, message, suggestions=None):
         self.suggestions = suggestions
 
 
+def spell_number(number, locale=LC_NUMERIC, **kwargs):
+    """Return value spelled out for a specific locale
+    
+    :param number: the number to format
+    :param locale: the `Locale` object or locale identifier
+    :param kwargs: optional locale specific parameters
+    """
+    speller = RuleBasedNumberFormat.negotiate(locale)
+    return speller.format(number, **kwargs)
+
+
+def get_rbnf_rules(locale=LC_NUMERIC):
+    """Return all the available public rules for a specific locale
+
+    :param locale: the `Locale` object or locale identifier
+    """
+    speller = RuleBasedNumberFormat.negotiate(locale)
+    return speller.available_rulesets
+
+
 def parse_number(string, locale=LC_NUMERIC):
     """Parse localized number string into an integer.
 
diff --git a/babel/rbnf.py b/babel/rbnf.py
new file mode 100644
index 000000000..2a5d24275
--- /dev/null
+++ b/babel/rbnf.py
@@ -0,0 +1,713 @@
+# -*- coding: utf-8 -*-
+"""
+babel.rbnf
+~~~~~~~~~~
+
+Locale dependent spelling of numbers.
+
+Documentation:
+-   http://www.unicode.org/reports/tr35/tr35-47/tr35-numbers.html#Rule-Based_Number_Formatting
+-   http://www.icu-project.org/apiref/icu4c/classRuleBasedNumberFormat.html
+
+Examples
+-   http://userguide.icu-project.org/formatparse/numbers/rbnf-examples
+-   http://source.icu-project.org/repos/icu/trunk/icu4j/demos/src/com/ibm/icu/dev/demo/rbnf/RbnfSampleRuleSets.java
+
+    
+"""
+# Dev notes
+#
+# Reloading cldr:
+# python ./scripts/import_cldr.py ./cldr/cldr-core-35.1/common/ -f
+# 
+# Tokenization is inspired by Ka-Ping Yee's tokenize library
+
+# Undocumented syntax (←%rule-name←←)
+# Trac ticket filed for CLDR update PL rbnf
+#     http://unicode.org/cldr/trac/ticket/10544
+# Maybe the syntax need to be supported:
+#     http://bugs.icu-project.org/trac/ticket/13264
+# Original request for Hebrew (currently not used in Hebrew):
+#     http://bugs.icu-project.org/trac/ticket/4039
+
+from __future__ import unicode_literals
+
+import re
+import sys
+import math
+import decimal
+import collections
+import warnings
+
+from babel.core import Locale, get_global
+
+TEXT_TOKEN = 1
+INTEGRAL_TOKEN = 2
+REMAINDER_TOKEN = 3
+PREVIOUS_TOKEN = 4
+SUBSTITUTION_TOKEN = 5
+PLURAL_TOKEN = 6
+OPT_START = 7
+OPT_END = 8
+
+regex = [
+    (PLURAL_TOKEN,          r"\$\((.+)\)\$"),
+    (INTEGRAL_TOKEN,        r"←([^←[]*)←(←?)"),
+    (PREVIOUS_TOKEN,        r"→→→"),
+    (REMAINDER_TOKEN,       r"→([^→[]*)→"),
+    (SUBSTITUTION_TOKEN,    r"=([^=[]+)="),
+    (OPT_START,             r"\["),
+    (OPT_END,               r"\]"),
+    (TEXT_TOKEN,            r"[^[\]=→←]+"),
+]
+
+INTERNAL_REF = 1
+PRIVATE_REF = 2
+PUBLIC_REF = 3
+PLURAL_REF = 4
+DECIMAL_REF = 5
+
+REFERENCE_TOKENS = (INTEGRAL_TOKEN, REMAINDER_TOKEN, SUBSTITUTION_TOKEN)
+
+NEGATIVE_NUMBER_RULE = '-x'
+IMPROPER_FRACTION_RULE = 'x.x'
+PROPER_FRACTION_RULE = '0.x'
+MASTER_RULE = 'x.0'
+INFINITY_RULE = 'Inf'
+NOT_A_NUMBER_RULE = 'NaN'
+SPECIAL_FRACTION_RULE = 'x,x'  # there are other options but not existent in CLDR
+# locale.number_symbols['decimal']
+# normal rule means a number is specified
+
+
+class RBNFError(Exception): pass
+class TokenizationError(RBNFError): pass
+class RulesetNotFound(RBNFError): pass
+class RuleNotFound(RBNFError): pass
+
+TokenInfo = collections.namedtuple('TokenInfo', 'type reference optional')
+        
+# compile regex
+regex_comp = [(t, re.compile(r)) for t, r in regex]
+
+
+def tokenize(text):
+    """
+    Each rule has a list of tokens
+    
+    Text parsed by matching a list of regular expressions
+    against the beginning of the text. If the regex match
+    a token is generated and we continue with the rest of
+    the text.
+
+    Some of the tokens are optional if they are in squared
+    brackets. From regular expressions for the begining and
+    end of the optional section no tokens are generated.
+    Instead all the tokens inside the optional section are
+    flaged as optional.
+    
+    Some of them tokens are referencing other rulesets by name
+    this information is stored in the token along with the type
+    of reference.
+
+    """
+    # remove uneccesarry syntax (only used in the non-xml form)
+    if text.endswith(";"): text = text[:-1]
+    if text.startswith("'"): text = text[1:]
+
+    optional = False
+
+    while text:
+        stop = True
+        # print("TEXT: ", text)
+        for tok, regex in regex_comp:
+            # print(token, regex)
+            match = regex.match(text)
+            if match:
+                stop = False
+                text = text[match.end():]
+                if tok == OPT_START: optional = True
+                elif tok == OPT_END: optional = False
+                else:
+                    token = _gen_token(tok, match, optional)
+                    if token:
+                        yield token
+                break  # always start searching with the first regex
+        if stop:
+            raise ValueError(text)
+
+
+def _gen_token(tok, match, optional):
+    # remove this if CLCR is updated based on ticket
+    # http://unicode.org/cldr/trac/ticket/10544
+    if tok == INTEGRAL_TOKEN and match.group(2) == '←':
+        warnings.warn('Unsupported syntax ←...←←', SyntaxWarning)
+
+    if tok in REFERENCE_TOKENS:
+        reference = _parse_reference(match.group(1))
+        return TokenInfo(tok, reference, optional)
+
+    # currently only `en` has this
+    if tok == PLURAL_TOKEN:
+        return TokenInfo(tok, (PLURAL_REF, match.group(1)), optional)
+
+    if tok == PREVIOUS_TOKEN:
+        return TokenInfo(tok, None, optional)
+
+    if tok == TEXT_TOKEN:
+        return TokenInfo(tok, match.group(0), optional)
+
+
+def _parse_reference(string):
+    if string == "":
+        return INTERNAL_REF, ""
+    if string.startswith('%%'):
+        return PRIVATE_REF, string[2:]
+    if string.startswith('%'):
+        return PUBLIC_REF, string[1:]
+    if string[0] in '0#':
+        return DECIMAL_REF, string
+    warnings.warn('Reference parsing error: %s' % string, SyntaxWarning)
+    return INTERNAL_REF, ""  # defaults to this
+
+
+def untokenize_ICU():
+    """
+    TODO implement ICU style representation
+    rather make Ruleset.format_icu()
+    """
+
+
+class RuleBasedNumberFormat(object):
+    """
+    RuleBasedNumberFormat's behavior consists of one or more rule sets
+
+    The first ruleset in a locale is the default ruleset.
+    The substitution descriptor (i.e., the text between the token characters)
+    may take one of three forms:
+    :a rule set name:
+        Perform the mathematical operation on the number, and format the result
+        using the named rule set.
+    :a DecimalFormat pattern:
+        Perform the mathematical operation on the number, and format the
+        result using a DecimalFormat with the specified pattern. The
+        pattern must begin with 0 or #.
+    :nothing:
+        Perform the mathematical operation on the number, and format the
+        result using the rule set containing the current rule, except:
+        
+        -   You can't have an empty substitution descriptor with
+            a == substitution.
+        -   If you omit the substitution descriptor in a >> substitution
+            in a fraction rule, format the result one digit at a time
+            using the rule set containing the current rule.
+        -   If you omit the substitution descriptor in a << substitution
+            in a rule in a fraction rule set, format the result using
+            the default rule set for this formatter.
+    """
+    group_types = ('SpelloutRules', 'OrdinalRules', 'NumberingSystemRules')
+    # spell number should go for Spelloutrules
+    # make interface for the other two groups
+
+    def __init__(self, locale, group='SpelloutRules'):
+        self._locale = locale
+        self._group = group
+
+    @property
+    def rulesets(self):
+        return self._locale._data['rbnf_rules'][self._group]
+
+    @property
+    def available_rulesets(self):
+        """list available public rulesets"""
+        return [r.name for r in self.rulesets if not r.private]
+
+
+    def format(self, number, ordinal=False, year=False, ruleset=None, **kwargs):
+        """spell an actual number (int/float/decimal)
+        
+        Search available_rulesets for an entry point
+        default is `spellout-numbering`.
+
+        If year is True: use spellout-numbering-year
+        If ordinal is True: use spellout-ordinal
+        If year and ordinal both True: raise error
+        
+        TODO
+        If no `spellout-ordinal`:
+            if has `spellout-ordinal-*`: use first one, issue warning
+
+        """
+        if ordinal and year:
+            raise ValueError('both ordinal and year is not possible')
+        if ordinal:
+            search = ruleset or 'spellout-ordinal'
+        elif year:
+            search = ruleset or 'spellout-year'
+        else:
+            search = ruleset or 'spellout-numbering'
+
+        ruleset = self.get_ruleset(search)
+
+        if ruleset is None:
+            raise RulesetNotFound(search)
+
+        return ruleset.apply(number, self)
+
+
+    def get_ruleset(self, name):
+        for r in self.rulesets:
+            if r.name == name:
+                return r
+
+
+    @classmethod
+    def negotiate(cls, locale):
+        """
+        Negotiate proper RBNF rules based on global data item `rbnf_locales`
+        Caching is not necessary the Locale object does that pretty well
+        """
+        loc = Locale.negotiate([str(Locale.parse(locale))], get_global('rbnf_locales'))
+        return cls(loc)
+
+
+class Ruleset(object):
+    """
+    Each rule set consists of a name, a colon, and a list of rules.
+    (in the ICU syntax, CLDR differs because of XML)
+
+    If the rule's rule descriptor is left out, the base value is one plus the
+    preceding rule's base value (or zero if this is the first rule in the list)
+    in a normal rule set.  In a fraction rule set, the base value is the same as
+    the preceding rule's base value.
+
+    A rule set may be either a regular rule set or a fraction rule set, depending
+    on whether it is used to format a number's integral part (or the whole number)
+    or a number's fractional part. Using a rule set to format a rule's fractional
+    part makes it a fraction rule set.
+
+    Which rule is used to format a number is defined according to one of the
+    following algorithms:
+
+    REGULAR (NON-FRACTION) PROCESSING
+    ---------------------------------
+    If the rule set is a regular rule set, do the following:
+    
+    MASTER_RULE
+    If the rule set includes a master rule (and the number was passed in as a
+    double), use the master rule.  (If the number being formatted was passed
+    in as a long, the master rule is ignored.)
+    
+    NEGATIVE_NUMBER_RULE
+    If the number is negative, use the negative-number rule.
+    
+    IMPROPER_FRACTION_RULE
+    If the number has a fractional part and is greater than 1, use
+    the improper fraction rule.
+    
+    PROPER_FRACTION_RULE
+    If the number has a fractional part and is between 0 and 1, use
+    the proper fraction rule.
+
+    Binary-search the rule list for the rule with the highest base value
+    less than or equal to the number. If that rule has two substitutions,
+    its base value is not an even multiple of its divisor, and the number
+    is an even multiple of the rule's divisor, use the rule that precedes
+    it in the rule list. Otherwise, use the rule itself.
+    
+    FRACTION PROCESSING
+    -------------------
+    If the rule set is a fraction rule set, do the following:
+
+    Ignore negative-number and fraction rules.
+    
+    For each rule in the list, multiply the number being formatted (which
+    will always be between 0 and 1) by the rule's base value. Keep track
+    of the distance between the result and the nearest integer.
+    
+    Use the rule that produced the result closest to zero in the above
+    calculation. In the event of a tie or a direct hit, use the first
+    matching rule encountered. (The idea here is to try each rule's base
+    value as a possible denominator of a fraction. Whichever denominator
+    produces the fraction closest in value to the number being formatted
+    wins.)
+
+    If the rule following the matching rule has the same base value,
+    use it if the numerator of the fraction is anything other than 1; if
+    the numerator is 1, use the original matching rule. (This is to allow
+    singular and plural forms of the rule text without a lot of extra hassle.)
+
+    ----
+
+    A rule's body consists of a string of characters terminated by a semicolon.
+    The rule may include zero, one, or two substitution tokens, and a range of
+    text in brackets. The brackets denote optional text (and may also include
+    one or both substitutions). The exact meanings of the substitution tokens,
+    and under what conditions optional text is omitted, depend on the syntax
+    of the substitution token and the context. The rest of the text in a rule
+    body is literal text that is output when the rule matches the number
+    being formatted.
+
+    A substitution token begins and ends with a token character. The token
+    character and the context together specify a mathematical operation to
+    be performed on the number being formatted. An optional substitution
+    descriptor specifies how the value resulting from that operation is
+    used to fill in the substitution. The position of the substitution
+    token in the rule body specifies the location of the resultant text
+    in the original rule text.
+
+    The meanings of the substitution token characters are as follows:
+    
+    →→  REMAINDER_TOKEN
+        :in normal rule:
+            Divide the number by the rule's divisor and format the remainder
+        :in negative-number rule:
+            Find the absolute value of the number and format the result
+        :in fraction or master rule:
+            Isolate the number's fractional part and format it.
+        :in rule in fraction rule set:
+            Not allowed.
+    
+    →→→  PREVIOUS_TOKEN
+        :in normal rule:
+            Divide the number by the rule's divisor and format the
+            remainder, but bypass the normal rule-selection process
+            and just use the rule that precedes this one in this
+            rule list.
+        :in all other rules:
+            Not allowed.
+    
+    ←←  INTEGRAL_TOKEN
+        :in normal rule:
+            Divide the number by the rule's divisor and format the quotient
+        :in negative-number rule:
+            Not allowed.
+        :in fraction or master rule:
+            Isolate the number's integral part and format it.
+        :in rule in fraction rule set:
+            Multiply the number by the rule's base value and format the result.
+    
+    ==  SUBSTITUTION_TOKEN
+        :in all rule sets:
+            Format the number unchanged
+    
+    []  OPT_START, OPT_END
+        :in normal rule:
+            Omit the optional text if the number is an even
+            multiple of the rule's divisor
+        :in negative-number rule:
+            Not allowed.
+        :in improper-fraction rule:
+            Omit the optional text if the number is between 0 and 1
+            (same as specifying both an x.x rule and a 0.x rule)
+        :in master rule:
+            Omit the optional text if the number is an integer
+            (same as specifying both an x.x rule and an x.0 rule)
+            !!! contradicts the above as it says the master rule is ignored
+        :in proper-fraction rule:
+            Not allowed.
+        :in rule in fraction rule set:
+            Omit the optional text if multiplying the number by the
+            rule's base value yields 1.
+    
+    $(cardinal,plural syntax)$  PLURAL_TOKEN
+        :in all rule sets:
+            This provides the ability to choose a word based on the
+            number divided by the radix to the power of the exponent
+            of the base value for the specified locale, which is
+            normally equivalent to the ←← value. This uses the cardinal
+            plural rules from PluralFormat. All strings used in the
+            plural format are treated as the same base value for parsing.
+    
+    $(ordinal,plural syntax)$  PLURAL_TOKEN
+        :in all rule sets:
+            This provides the ability to choose a word based on the
+            number divided by the radix to the power of the exponent
+            of the base value for the specified locale, which is
+            normally equivalent to the ←← value. This uses the ordinal
+            plural rules from PluralFormat. All strings used in the
+            plural format are treated as the same base value for parsing.
+    
+    INFINITY_RULE = 'Inf'
+    
+    NOT_A_NUMBER_RULE = 'NaN'
+    
+    SPECIAL_FRACTION_RULE = 'x,x'  # there are other options but not existent in CLDR
+    """
+    def __init__(self, name, private=False):
+        self.name = name
+        self.private = private
+        self.rules = []
+
+
+    def apply(self, number, parent, fractional=False):
+        number = decimal.Decimal(str(number))
+        # str is needed to avoid unecessary precision
+        # decimal is necessary for exact representation in fraction rules
+
+        context = {
+            'search_at': parent,
+            'ruleset': self,
+            'fractional': fractional,
+            'omit_optional': False,  # no default value is defined in the spec
+            SUBSTITUTION_TOKEN: number,
+            'remainder_as_fractional': False  # format remainder as  fractional rule?
+        }
+        integral, remainder = divmod(number, 1)
+
+        # fractional rule (ruleset in fractional processing)
+        # the value should always be between 0 and 1
+        # not yet tested it needs clarification
+        if fractional:
+            index = self.get_rule_fractional(remainder)
+            if index is None:
+                raise RuleNotFound("rule for fractional processing of %s" % remainder)
+            rule = self.rules[index]
+            context[INTEGRAL_TOKEN] = rule.value * remainder  # here remainder == number
+            context['omit_optional'] = rule.value * number == 1
+            return rule.apply(number, context)
+
+        # negative number rule
+        if number < 0:
+            rule =  self.get_rule_special(NEGATIVE_NUMBER_RULE)
+            if rule is None:
+                raise RuleNotFound("negative number rule (%s)" % NEGATIVE_NUMBER_RULE)
+            context[REMAINDER_TOKEN] = abs(number)
+            return rule.apply(number, context)
+
+        # master and fraction rules
+        if remainder != 0:
+            context[REMAINDER_TOKEN] = number - integral
+            context[INTEGRAL_TOKEN] = integral
+            context['remainder_as_fractional'] = True
+
+            # search for master rule
+            rule = self.get_rule_special(MASTER_RULE, strict=True)
+
+            # no master rule found
+            if rule is None:
+                if integral == 0:
+                    rule = self.get_rule_special(PROPER_FRACTION_RULE)
+                    if rule is None:
+                        raise RuleNotFound("proper fraction rule (%s)" % PROPER_FRACTION_RULE)
+
+                else:
+                    rule = self.get_rule_special(IMPROPER_FRACTION_RULE)
+                    if rule is None:
+                        raise RuleNotFound("improper fraction rule (%s)" % IMPROPER_FRACTION_RULE)
+                    context['omit_optional'] = 0 < number < 1  # between 0 and 1
+
+            return rule.apply(number, context)
+
+        # normal rule
+        index = self.get_rule_integral(integral)
+        if index is None:
+            raise RuleNotFound("normal rule for %s" % integral)
+        rule = self.rules[index]
+        i, r = divmod(integral, rule.divisor)
+        context[REMAINDER_TOKEN] = r
+        context[INTEGRAL_TOKEN] = i
+        context[PREVIOUS_TOKEN] = index-1  # get rule using ruleset
+        context['omit_optional'] = r != 0  # only if not even multiple (TODO no need to store separatelly)
+        return rule.apply(number, context)
+
+
+    def get_rule_special(self, val, strict=False):
+        if val in Rule.specials:
+            for r in self.rules:
+                if r.value == val:
+                    return r
+        
+        # return last rule if no match occured and strict is false
+        if not strict:
+            return self.rules[-1]
+
+
+    def get_rule_integral(self, val):
+        """
+        Binary-search the rule list for the rule with the highest base value
+        less than or equal to the number.
+
+        If that rule has two substitutions,
+        its base value is not an even multiple of its divisor, and the number
+        is an even multiple of the rule's divisor, use the rule that precedes
+        it in the rule list. Otherwise, use the rule itself.
+        """
+        # automatically return last rule if no range matched
+        ret = len(self.rules)-1
+
+        for i in range(len(self.rules)-1):
+            if self.rules[i].value in Rule.specials:
+                continue
+            
+            if self.rules[i].value <= val < self.rules[i+1].value:
+                ret = i
+                break
+
+        # need to have at least one normal rule? (otherwise ret could be None)
+        rule = self.rules[ret]
+        if rule.substitutions == 2 and \
+                rule.value % rule.divisor == 0 and \
+                val % rule.divisor == 0:
+            ret -= 1
+
+        return ret
+
+
+    def get_rule_fractional(self, val):
+        """If the rule set is a fraction rule set, do the following:
+
+        Ignore negative-number and fraction rules.
+        
+        For each rule in the list, multiply the number being formatted (which
+        will always be between 0 and 1) by the rule's base value. Keep track
+        of the distance between the result and the nearest integer.
+        
+        Use the rule that produced the result closest to zero in the above
+        calculation. In the event of a tie or a direct hit, use the first
+        matching rule encountered. (The idea here is to try each rule's base
+        value as a possible denominator of a fraction. Whichever denominator
+        produces the fraction closest in value to the number being formatted
+        wins.)
+
+        If the rule following the matching rule has the same base value,
+        use it if the numerator of the fraction is anything other than 1; if
+        the numerator is 1, use the original matching rule. (This is to allow
+        singular and plural forms of the rule text without a lot of extra hassle.)
+
+        ??? what is considered the numerator of what fraction here
+        ??? is it rather not the closeset integer
+        """
+        dists = []
+        for i, rule in enumerate(self.rules):
+            if rule.value in Rule.specials or rule.value == 0:  # ignore specials and 0 rules
+                continue
+            d = abs(round(val*rule.value) - val*rule.value)
+            dists.append((i, d))
+
+        # get the index of the closest 0 match
+        bst = min(dists, key=lambda x: x[1])[0]
+
+        # there is a following rule
+        if len(self.rules) > bst+1 and \
+                self.rules[bst].value == self.rules[bst+1].value and \
+                val*self.rules[bst].value > 1:
+            bst += 1
+
+        return bst
+
+
+    def __repr__(self):
+        return 'Ruleset %s %s\n%s\n' % (self.name, self.private, '\n'.join(['\t'+str(r) for r in self.rules]))
+
+
+class Rule(object):
+    """
+    base value, a divisor, rule text, and zero, one, or two substitutions.
+    """
+    specials = (
+        NEGATIVE_NUMBER_RULE, IMPROPER_FRACTION_RULE,
+        PROPER_FRACTION_RULE, MASTER_RULE, INFINITY_RULE,
+        NOT_A_NUMBER_RULE, SPECIAL_FRACTION_RULE,
+    )
+
+
+    def __init__(self, value, text, radix=None):
+        """
+        divisor : iterator of literal, back_sub, fwd_sub, lit_exact elements parsed from rule 
+        """
+        if value in self.specials:
+            self.value = value
+        else:
+            try:
+                self.value = int(value)
+            except:
+                warnings.warn("Unknown rule value: [%s]" % value, SyntaxWarning)
+
+        self.text = text
+        self._radix = radix
+        
+        self._parse(text)
+
+
+    def apply(self, number, context):
+        """
+        """
+        from .numbers import format_decimal
+        res = []
+        for t in self.tokens:
+            if t.optional and not context['omit_optional']:
+                continue
+
+            if t.type == TEXT_TOKEN:
+                res.append(t.reference)
+
+            elif t.type in REFERENCE_TOKENS:
+                ref_type, ref = t.reference
+                ruleset = None
+                if ref_type == INTERNAL_REF:
+                    ruleset = context['ruleset']
+                elif ref_type in (PUBLIC_REF, PRIVATE_REF):  # currently no distinction
+                    ruleset = context['search_at'].get_ruleset(ref)
+                elif ref_type == DECIMAL_REF:
+                    loc = context['search_at']._locale
+                    x = numbers.format_decimal(number, format=ref, locale=loc)
+                    res.append(x)
+
+                if ruleset:
+                    if t.type == REMAINDER_TOKEN and context['remainder_as_fractional']:
+                        fractional = True
+                    else:
+                        fractional = context['fractional']
+                    res.append(ruleset.apply(
+                        context[t.type],  # number
+                        context['search_at'],  # parent
+                        fractional,
+                    ))
+
+            elif t.type == PREVIOUS_TOKEN:
+                rule = context['ruleset'].rules[context[PREVIOUS_TOKEN]]
+                res.append(rule.apply(
+                    context[REMAINDER_TOKEN],  # number
+                    context,  # ???
+                ))
+
+            else:
+                raise ValueError('unknown token %s', t)
+
+
+        return ''.join(res)
+
+
+    @property
+    def divisor(self):
+        """it is highest exponent of radix less then or equal to the rules's base"""
+        if isinstance(self.value, int):
+            if self.value == 0:
+                return 1
+            exp = decimal.Decimal(self.value).ln()/decimal.Decimal(self.radix).ln()
+            return int(self.radix**math.floor(exp))
+
+    
+    @property
+    def radix(self):
+        return self._radix or 10
+
+
+    @property
+    def substitutions(self):
+        return len([t for t in self.tokens if t.type in REFERENCE_TOKENS])
+
+
+    def _parse(self, text):
+        try:
+            self.tokens = [t for t in tokenize(text)]
+        except ValueError:
+            raise TokenizationError(self.text)
+
+
+    def __repr__(self):
+        return 'Rule %s (%s) - %s\n%s\n' % (
+            self.value, self.text,
+            self.radix,
+            '\n'.join(['\t\t'+str(t) for t in self.tokens]))
diff --git a/scripts/import_cldr.py b/scripts/import_cldr.py
index 5fda2deb4..531d8528b 100755
--- a/scripts/import_cldr.py
+++ b/scripts/import_cldr.py
@@ -33,7 +33,7 @@
 BABEL_PACKAGE_ROOT = os.path.join(CHECKOUT_ROOT, "babel")
 sys.path.insert(0, CHECKOUT_ROOT)
 
-from babel import dates, numbers
+from babel import dates, numbers, rbnf
 from babel.dates import split_interval_pattern
 from babel.localedata import Alias
 from babel.plural import PluralRule
@@ -225,6 +225,7 @@ def parse_global(srcdir, sup):
     all_currencies = collections.defaultdict(set)
     currency_fractions = global_data.setdefault('currency_fractions', {})
     territory_languages = global_data.setdefault('territory_languages', {})
+    rbnf_locales = global_data.setdefault('rbnf_locales', [])
     bcp47_timezone = parse(os.path.join(srcdir, 'bcp47', 'timezone.xml'))
     sup_windows_zones = parse(os.path.join(sup_dir, 'windowsZones.xml'))
     sup_metadata = parse(os.path.join(sup_dir, 'supplementalMetadata.xml'))
@@ -328,6 +329,14 @@ def parse_global(srcdir, sup):
                 'official_status': language.attrib.get('officialStatus'),
             }
         territory_languages[territory.attrib['type']] = languages
+
+    # To help the negotiation in `babel.numbers.spell_number`
+    # add all locales with rbnf rules to a list under `rbnf_locales`
+    filenames = os.listdir(os.path.join(srcdir, 'rbnf'))
+    filenames.remove('root.xml')
+    # TODO parse root.xml for global data (how to fall back?)
+    global_data['rbnf_locales'] = [os.path.splitext(f)[0] for f in filenames]
+
     return global_data
 
 
@@ -443,6 +452,13 @@ def _process_local_datas(sup, srcdir, destdir, force=False, dump_json=False):
                 unsupported_number_systems_string,
             ))
 
+        # there will be no rbnf rules for all locales
+        # there could be a separate iteration for rbnf rule files
+        rbnf_filename = os.path.join(srcdir, 'rbnf', filename)
+        if os.path.isfile(rbnf_filename):
+            rbnf_tree = parse(rbnf_filename)
+            parse_rbnf_rules(data, rbnf_tree)
+
         write_datafile(data_filename, data, dump_json=dump_json)
 
 
@@ -981,6 +997,36 @@ def parse_measurement_systems(data, tree):
             _import_type_text(measurement_systems, measurement_system, type=type)
 
 
+def parse_rbnf_rules(data, tree):
+    """
+    Parse rules based on:
+    http://www.unicode.org/reports/tr35/tr35-47/tr35-numbers.html#Rule-Based_Number_Formatting
+    """
+    rbnf_rules = data.setdefault('rbnf_rules', {})
+
+    # ElementTree.dump(tree)
+
+    for ruleset_grouping in tree.findall('.//rbnf/rulesetGrouping'):
+        group_name = ruleset_grouping.attrib['type']
+        rbnf_rules[group_name] = []  # TODO check for overwrite
+        for ruleset in ruleset_grouping.findall('ruleset'):
+            ruleset_name = ruleset.attrib['type']
+            private = ruleset.attrib.get('access') == 'private'
+            ruleset_obj = rbnf.Ruleset(ruleset_name, private)
+            for rule in ruleset.findall('rbnfrule'):
+                radix = rule.attrib.get('radix')
+                try:
+                    rule_obj = rbnf.Rule(rule.attrib['value'], rule.text, radix)
+                except rbnf.TokenizationError as e:
+                    log('%s: Unable to parse rule "%s%s: %s "' % (
+                        data['locale_id'],
+                        rule.attrib['value'],
+                        rule.text,
+                        '' if radix is None else ('/%s' % radix),
+                    ))
+                ruleset_obj.rules.append(rule_obj)
+            rbnf_rules[group_name].append(ruleset_obj)        
+
 
 if __name__ == '__main__':
     main()
diff --git a/tests/test_number_spelling.py b/tests/test_number_spelling.py
new file mode 100644
index 000000000..bcfcf8644
--- /dev/null
+++ b/tests/test_number_spelling.py
@@ -0,0 +1,183 @@
+import unittest
+import pytest
+
+from babel import numbers
+from babel import rbnf
+from babel.core import get_global
+from babel.localedata import locale_identifiers
+
+soft_hyphen = '\xad'
+
+class TestRuleEngine(unittest.TestCase):
+    """
+    Test everything related to the rules engine
+    """
+    def test_basic(self):
+        x = rbnf.RuleBasedNumberFormat.negotiate('hu_HU')
+        assert str(x._locale) == 'hu'
+        assert 'spellout-numbering' in x.available_rulesets
+
+
+    def test_negotiation(self):
+        valid_ruleset_groups = ("SpelloutRules", "OrdinalRules", "NumberingSystemRules")
+        
+        for lid in locale_identifiers():
+            loc = rbnf.RuleBasedNumberFormat.negotiate(lid)._locale
+            if loc is None:
+                # generate warning if necessary
+                pass
+            else:
+                # test groups
+                for k in loc._data['rbnf_rules']:
+                    assert k in valid_ruleset_groups
+
+
+    def test_tokenization(self):
+
+        x = list(rbnf.tokenize("text[opt];"))
+        res = [
+            rbnf.TokenInfo(type=1, reference='text', optional=False),
+            rbnf.TokenInfo(type=1, reference='opt', optional=True),
+        ]
+        assert x == res
+
+
+    def test_xml_parsing(self):
+        """
+        all the rules should be able to go through the parser and tokenizer
+        made up some rules and run the tokenizer on them
+
+        TODO
+        read data from all the locales that have rbnf_rules defined
+        all the raw rules should be in a specific structure based
+        on the XML specification
+        """
+        assert True
+
+
+class TestSpelling(unittest.TestCase):
+    """
+    Locale specific tests
+    """
+    def test_hu_HU_cardinal(self):
+        def _spell(x):
+            return numbers.spell_number(x, locale='hu_HU').replace(soft_hyphen, '')
+
+        assert _spell(0) == "nulla"
+        assert _spell(1) == "egy"
+        assert _spell(2) == "kettő"
+        assert _spell(3) == "három"
+        assert _spell(10) == "tíz"
+        assert _spell(20) == "húsz"
+        # assert _spell('-0') == "mínusz nulla"
+        # assert _spell(123.25) == "százhuszonhárom egész huszonöt század"
+        assert _spell(-12) == "mínusz tizenkettő"
+        # assert _spell(23457829) == "huszonhárommillió-négyszázötvenhétezer-nyolcszázhuszonkilenc"
+        assert _spell(1950) == "ezerkilencszázötven"
+        # only soft hyphens in the rules !!!
+        # assert _spell(2001) == "kétezer-egy"
+        # assert _spell('1999.2386') == "ezerkilencszázkilencvenkilenc egész kétezer-háromszáznyolcvanhat tízezred"
+        # assert _spell(-.199923862) == "mínusz nulla egész százkilencvenkilencezer-kilencszázhuszonnégy milliomod"
+        # assert _spell(-.199923862) == "kerekítve mínusz nulla egész ezerkilencszázkilencvenkilenc tízezred"
+        # assert _spell(.4326752) == "nulla egész negyvenhárom század"
+
+
+    def test_hu_HU_ordinal(self):
+        def _spell(x):
+            return numbers.spell_number(x, locale='hu_HU', ordinal=True).replace(soft_hyphen, '')
+
+        assert _spell(0) == "nulla"
+        # assert _spell(0) == "nulladik"
+        assert _spell(1) == "első"
+        assert _spell(2) == "második"
+        assert _spell(3) == "harmadik"
+        assert _spell(10) == "tizedik"
+        assert _spell(20) == "huszadik"
+        assert _spell(30) == "harmincadik"
+        assert _spell(-12) == "mínusz tizenkettedik"
+        # assert _spell(23457829) == "huszonhárommilliónégyszázötvenhétezernyolcszázhuszonkilencedik"  # wrong mutiple cldr errors
+        # assert _spell(23457829) == "huszonhárommillió-négyszázötvenhétezer-nyolcszázhuszonkilencedik"
+        assert _spell(1100) == "ezerszázadik"
+        assert _spell(1950) == "ezerkilencszázötvenedik"
+        # assert _spell(2001) == "kétezer-egyedik"
+
+
+    def test_en_GB_cardinal(self):
+        def _spell(x):
+            return numbers.spell_number(x, locale='en_GB').replace(soft_hyphen, '')
+
+        assert _spell(0) == "zero"
+        assert _spell(1) == "one"
+        assert _spell(2) == "two"
+        assert _spell(3) == "three"
+        # assert _spell('-0') == "minus zero"
+        # assert _spell(123.25) == "one hundred and twenty-three point twenty-five hundredths"
+        assert _spell(-12) == "minus twelve"
+        assert _spell(23457829) == "twenty-three million four hundred fifty-seven thousand eight hundred twenty-nine"
+        # assert _spell(23457829) == "twenty-three million four hundred and fifty-seven thousand eight hundred and twenty-nine"
+        assert _spell(1950) == "one thousand nine hundred fifty"
+        # assert _spell(1950) == "one thousand nine hundred and fifty"
+        assert _spell(2001) == "two thousand one"
+        # assert _spell('1999.238') == "one thousand nine hundred and ninety-nine point two hundred and thirty-eight thousandths"
+        # assert _spell(-.199923862, precision=3, state_rounded=True) == "approximately minus zero point two tenths"
+        # assert _spell(-.1) == "minus zero point one tenth" # float to string conversion preserves precision
+
+
+    def test_en_GB_ordinal(self):
+        def _spell(x):
+            return numbers.spell_number(x, locale='en_GB', ordinal=True).replace(soft_hyphen, '')
+
+        assert _spell(0) == "zeroth"
+        assert _spell(1) == "first"
+        assert _spell(2) == "second"
+        assert _spell(3) == "third"
+        assert _spell(4) == "fourth"
+        assert _spell(5) == "fifth"
+        assert _spell(6) == "sixth"
+        assert _spell(7) == "seventh"
+        assert _spell(8) == "eighth"
+        assert _spell(9) == "ninth"
+        assert _spell(10) == "tenth"
+        assert _spell(11) == "eleventh"
+        assert _spell(12) == "twelfth"
+        assert _spell(13) == "thirteenth"
+        assert _spell(20) == "twentieth"
+        assert _spell(30) == "thirtieth"
+        assert _spell(40) == "fortieth"
+        # assert _spell(40) == "fourtieth"
+        assert _spell(-12) == "minus twelfth"
+        # assert _spell(23457829) == "twenty-three million four hundred fifty-seven thousand eight hundred twenty-ninth"  # apostrophes
+        # assert _spell(23457829) == "twenty-three million four hundred and fifty-seven thousand eight hundred and twenty-ninth"
+        assert _spell(1950) == "one thousand nine hundred fiftieth"
+        # assert _spell(1950) == "one thousand nine hundred and fiftieth"
+        assert _spell(2001) == "two thousand first"
+
+
+
+# def test_hu_HU_error():
+#     with pytest.raises(exceptions.TooBigToSpell) as excinfo:
+#         _spell(10**66, ordinal=True)
+
+#     with pytest.raises(exceptions.PrecisionError) as excinfo:
+#         _spell(.4326752, locale='hu_HU', precision=7)
+
+#     with pytest.raises(exceptions.PrecisionError) as excinfo:
+#         _spell(.4326752)
+
+#     with pytest.raises(exceptions.NoFractionOrdinalsAllowed) as excinfo:
+#         _spell('1999.23862', ordinal=True)
+
+# def test_en_GB_error():
+#     with pytest.raises(exceptions.TooBigToSpell) as excinfo:
+#         _spell(10**24, ordinal=True, locale='en_GB')
+
+#     with pytest.raises(exceptions.PrecisionError) as excinfo:
+#         _spell(.4326752, locale='en_GB', precision=4)
+
+#     with pytest.raises(exceptions.PrecisionError) as excinfo:
+#         _spell(.4326752, locale='en_GB')
+
+#     with pytest.raises(exceptions.NoFractionOrdinalsAllowed) as excinfo:
+#         _spell('1999.23', ordinal=True, locale='en_GB')
+
+

From 47051732a957532ccd51730452ebbf2958ce30fc Mon Sep 17 00:00:00 2001
From: Aarni Koskela <akx@iki.fi>
Date: Tue, 31 Dec 2019 12:22:05 +0200
Subject: [PATCH 2/9] rbnf: correct radix reading

---
 babel/rbnf.py          | 20 +++++---------------
 scripts/import_cldr.py |  7 ++++---
 2 files changed, 9 insertions(+), 18 deletions(-)

diff --git a/babel/rbnf.py b/babel/rbnf.py
index 2a5d24275..4b28fd037 100644
--- a/babel/rbnf.py
+++ b/babel/rbnf.py
@@ -619,16 +619,12 @@ def __init__(self, value, text, radix=None):
         if value in self.specials:
             self.value = value
         else:
-            try:
-                self.value = int(value)
-            except:
-                warnings.warn("Unknown rule value: [%s]" % value, SyntaxWarning)
+            self.value = int(value)
 
         self.text = text
-        self._radix = radix
-        
-        self._parse(text)
+        self.radix = int(radix or 10)
 
+        self._parse(text)
 
     def apply(self, number, context):
         """
@@ -685,14 +681,8 @@ def divisor(self):
         if isinstance(self.value, int):
             if self.value == 0:
                 return 1
-            exp = decimal.Decimal(self.value).ln()/decimal.Decimal(self.radix).ln()
-            return int(self.radix**math.floor(exp))
-
-    
-    @property
-    def radix(self):
-        return self._radix or 10
-
+            exp = decimal.Decimal(self.value).ln() / decimal.Decimal(self.radix).ln()
+            return int(self.radix ** math.floor(exp))
 
     @property
     def substitutions(self):
diff --git a/scripts/import_cldr.py b/scripts/import_cldr.py
index 531d8528b..bd6967b2f 100755
--- a/scripts/import_cldr.py
+++ b/scripts/import_cldr.py
@@ -225,7 +225,6 @@ def parse_global(srcdir, sup):
     all_currencies = collections.defaultdict(set)
     currency_fractions = global_data.setdefault('currency_fractions', {})
     territory_languages = global_data.setdefault('territory_languages', {})
-    rbnf_locales = global_data.setdefault('rbnf_locales', [])
     bcp47_timezone = parse(os.path.join(srcdir, 'bcp47', 'timezone.xml'))
     sup_windows_zones = parse(os.path.join(sup_dir, 'windowsZones.xml'))
     sup_metadata = parse(os.path.join(sup_dir, 'supplementalMetadata.xml'))
@@ -1015,8 +1014,11 @@ def parse_rbnf_rules(data, tree):
             ruleset_obj = rbnf.Ruleset(ruleset_name, private)
             for rule in ruleset.findall('rbnfrule'):
                 radix = rule.attrib.get('radix')
+                if radix == "1,000":  # HACK: work around misspelled radix in mt.xml
+                    radix = "1000"
                 try:
                     rule_obj = rbnf.Rule(rule.attrib['value'], rule.text, radix)
+                    ruleset_obj.rules.append(rule_obj)
                 except rbnf.TokenizationError as e:
                     log('%s: Unable to parse rule "%s%s: %s "' % (
                         data['locale_id'],
@@ -1024,8 +1026,7 @@ def parse_rbnf_rules(data, tree):
                         rule.text,
                         '' if radix is None else ('/%s' % radix),
                     ))
-                ruleset_obj.rules.append(rule_obj)
-            rbnf_rules[group_name].append(ruleset_obj)        
+            rbnf_rules[group_name].append(ruleset_obj)
 
 
 if __name__ == '__main__':

From 1e3c5251499781ff5848f35627b2b3b40df6b65a Mon Sep 17 00:00:00 2001
From: Aarni Koskela <akx@iki.fi>
Date: Tue, 31 Dec 2019 12:51:03 +0200
Subject: [PATCH 3/9] rbnf: light clean up

---
 babel/rbnf.py                 | 125 ++++++++++++++++------------------
 tests/test_number_spelling.py |  17 ++---
 2 files changed, 62 insertions(+), 80 deletions(-)

diff --git a/babel/rbnf.py b/babel/rbnf.py
index 4b28fd037..60540ccb2 100644
--- a/babel/rbnf.py
+++ b/babel/rbnf.py
@@ -33,7 +33,6 @@
 from __future__ import unicode_literals
 
 import re
-import sys
 import math
 import decimal
 import collections
@@ -50,15 +49,18 @@
 OPT_START = 7
 OPT_END = 8
 
-regex = [
-    (PLURAL_TOKEN,          r"\$\((.+)\)\$"),
-    (INTEGRAL_TOKEN,        r"←([^←[]*)←(←?)"),
-    (PREVIOUS_TOKEN,        r"→→→"),
-    (REMAINDER_TOKEN,       r"→([^→[]*)→"),
-    (SUBSTITUTION_TOKEN,    r"=([^=[]+)="),
-    (OPT_START,             r"\["),
-    (OPT_END,               r"\]"),
-    (TEXT_TOKEN,            r"[^[\]=→←]+"),
+token_regexes = [
+    (t, re.compile(r))
+    for (t, r) in [
+        (PLURAL_TOKEN, r"\$\((.+)\)\$"),
+        (INTEGRAL_TOKEN, r"←([^←[]*)←(←?)"),
+        (PREVIOUS_TOKEN, r"→→→"),
+        (REMAINDER_TOKEN, r"→([^→[]*)→"),
+        (SUBSTITUTION_TOKEN, r"=([^=[]+)="),
+        (OPT_START, r"\["),
+        (OPT_END, r"\]"),
+        (TEXT_TOKEN, r"[^[\]=→←]+"),
+    ]
 ]
 
 INTERNAL_REF = 1
@@ -76,6 +78,8 @@
 INFINITY_RULE = 'Inf'
 NOT_A_NUMBER_RULE = 'NaN'
 SPECIAL_FRACTION_RULE = 'x,x'  # there are other options but not existent in CLDR
+
+
 # locale.number_symbols['decimal']
 # normal rule means a number is specified
 
@@ -86,9 +90,6 @@ class RulesetNotFound(RBNFError): pass
 class RuleNotFound(RBNFError): pass
 
 TokenInfo = collections.namedtuple('TokenInfo', 'type reference optional')
-        
-# compile regex
-regex_comp = [(t, re.compile(r)) for t, r in regex]
 
 
 def tokenize(text):
@@ -97,37 +98,41 @@ def tokenize(text):
     
     Text parsed by matching a list of regular expressions
     against the beginning of the text. If the regex match
-    a token is generated and we continue with the rest of
+    a token is generated, and we continue with the rest of
     the text.
 
-    Some of the tokens are optional if they are in squared
-    brackets. From regular expressions for the begining and
+    Some tokens are optional if they are in squared
+    brackets. From regular expressions for the beginning and
     end of the optional section no tokens are generated.
-    Instead all the tokens inside the optional section are
-    flaged as optional.
+    Instead, all the tokens inside the optional section are
+    flagged as optional.
     
-    Some of them tokens are referencing other rulesets by name
-    this information is stored in the token along with the type
+    Some of the tokens are referencing other rulesets by name.
+    This information is stored in the token along with the type
     of reference.
 
     """
-    # remove uneccesarry syntax (only used in the non-xml form)
-    if text.endswith(";"): text = text[:-1]
-    if text.startswith("'"): text = text[1:]
+    # remove unnecessary syntax (only used in the non-xml form)
+    if text.endswith(";"):
+        text = text[:-1]
+    if text.startswith("'"):
+        text = text[1:]
 
     optional = False
 
     while text:
         stop = True
         # print("TEXT: ", text)
-        for tok, regex in regex_comp:
+        for tok, regex in token_regexes:
             # print(token, regex)
             match = regex.match(text)
             if match:
                 stop = False
                 text = text[match.end():]
-                if tok == OPT_START: optional = True
-                elif tok == OPT_END: optional = False
+                if tok == OPT_START:
+                    optional = True
+                elif tok == OPT_END:
+                    optional = False
                 else:
                     token = _gen_token(tok, match, optional)
                     if token:
@@ -138,7 +143,7 @@ def tokenize(text):
 
 
 def _gen_token(tok, match, optional):
-    # remove this if CLCR is updated based on ticket
+    # remove this if CLDR is updated based on ticket
     # http://unicode.org/cldr/trac/ticket/10544
     if tok == INTEGRAL_TOKEN and match.group(2) == '←':
         warnings.warn('Unsupported syntax ←...←←', SyntaxWarning)
@@ -171,13 +176,6 @@ def _parse_reference(string):
     return INTERNAL_REF, ""  # defaults to this
 
 
-def untokenize_ICU():
-    """
-    TODO implement ICU style representation
-    rather make Ruleset.format_icu()
-    """
-
-
 class RuleBasedNumberFormat(object):
     """
     RuleBasedNumberFormat's behavior consists of one or more rule sets
@@ -206,6 +204,7 @@ class RuleBasedNumberFormat(object):
             the default rule set for this formatter.
     """
     group_types = ('SpelloutRules', 'OrdinalRules', 'NumberingSystemRules')
+
     # spell number should go for Spelloutrules
     # make interface for the other two groups
 
@@ -434,15 +433,15 @@ class Ruleset(object):
     
     SPECIAL_FRACTION_RULE = 'x,x'  # there are other options but not existent in CLDR
     """
+
     def __init__(self, name, private=False):
         self.name = name
         self.private = private
         self.rules = []
 
-
     def apply(self, number, parent, fractional=False):
         number = decimal.Decimal(str(number))
-        # str is needed to avoid unecessary precision
+        # str is needed to avoid unnecessary precision
         # decimal is necessary for exact representation in fraction rules
 
         context = {
@@ -451,7 +450,7 @@ def apply(self, number, parent, fractional=False):
             'fractional': fractional,
             'omit_optional': False,  # no default value is defined in the spec
             SUBSTITUTION_TOKEN: number,
-            'remainder_as_fractional': False  # format remainder as  fractional rule?
+            'remainder_as_fractional': False  # format remainder as fractional rule?
         }
         integral, remainder = divmod(number, 1)
 
@@ -469,7 +468,7 @@ def apply(self, number, parent, fractional=False):
 
         # negative number rule
         if number < 0:
-            rule =  self.get_rule_special(NEGATIVE_NUMBER_RULE)
+            rule = self.get_rule_special(NEGATIVE_NUMBER_RULE)
             if rule is None:
                 raise RuleNotFound("negative number rule (%s)" % NEGATIVE_NUMBER_RULE)
             context[REMAINDER_TOKEN] = abs(number)
@@ -507,22 +506,20 @@ def apply(self, number, parent, fractional=False):
         i, r = divmod(integral, rule.divisor)
         context[REMAINDER_TOKEN] = r
         context[INTEGRAL_TOKEN] = i
-        context[PREVIOUS_TOKEN] = index-1  # get rule using ruleset
-        context['omit_optional'] = r != 0  # only if not even multiple (TODO no need to store separatelly)
+        context[PREVIOUS_TOKEN] = index - 1  # get rule using ruleset
+        context['omit_optional'] = r != 0  # only if not even multiple (TODO no need to store separately)
         return rule.apply(number, context)
 
-
     def get_rule_special(self, val, strict=False):
         if val in Rule.specials:
             for r in self.rules:
                 if r.value == val:
                     return r
-        
-        # return last rule if no match occured and strict is false
+
+        # return last rule if no match occurred and strict is false
         if not strict:
             return self.rules[-1]
 
-
     def get_rule_integral(self, val):
         """
         Binary-search the rule list for the rule with the highest base value
@@ -534,13 +531,13 @@ def get_rule_integral(self, val):
         it in the rule list. Otherwise, use the rule itself.
         """
         # automatically return last rule if no range matched
-        ret = len(self.rules)-1
+        ret = len(self.rules) - 1
 
-        for i in range(len(self.rules)-1):
+        for i in range(len(self.rules) - 1):
             if self.rules[i].value in Rule.specials:
                 continue
-            
-            if self.rules[i].value <= val < self.rules[i+1].value:
+
+            if self.rules[i].value <= val < self.rules[i + 1].value:
                 ret = i
                 break
 
@@ -553,16 +550,15 @@ def get_rule_integral(self, val):
 
         return ret
 
-
     def get_rule_fractional(self, val):
         """If the rule set is a fraction rule set, do the following:
 
         Ignore negative-number and fraction rules.
-        
+
         For each rule in the list, multiply the number being formatted (which
         will always be between 0 and 1) by the rule's base value. Keep track
         of the distance between the result and the nearest integer.
-        
+
         Use the rule that produced the result closest to zero in the above
         calculation. In the event of a tie or a direct hit, use the first
         matching rule encountered. (The idea here is to try each rule's base
@@ -582,35 +578,33 @@ def get_rule_fractional(self, val):
         for i, rule in enumerate(self.rules):
             if rule.value in Rule.specials or rule.value == 0:  # ignore specials and 0 rules
                 continue
-            d = abs(round(val*rule.value) - val*rule.value)
+            d = abs(round(val * rule.value) - val * rule.value)
             dists.append((i, d))
 
         # get the index of the closest 0 match
         bst = min(dists, key=lambda x: x[1])[0]
 
         # there is a following rule
-        if len(self.rules) > bst+1 and \
-                self.rules[bst].value == self.rules[bst+1].value and \
-                val*self.rules[bst].value > 1:
+        if len(self.rules) > bst + 1 and \
+                self.rules[bst].value == self.rules[bst + 1].value and \
+                val * self.rules[bst].value > 1:
             bst += 1
 
         return bst
 
-
     def __repr__(self):
-        return 'Ruleset %s %s\n%s\n' % (self.name, self.private, '\n'.join(['\t'+str(r) for r in self.rules]))
+        return 'Ruleset %s %s\n%s\n' % (self.name, self.private, '\n'.join(['\t' + str(r) for r in self.rules]))
 
 
 class Rule(object):
     """
     base value, a divisor, rule text, and zero, one, or two substitutions.
     """
-    specials = (
+    specials = {
         NEGATIVE_NUMBER_RULE, IMPROPER_FRACTION_RULE,
         PROPER_FRACTION_RULE, MASTER_RULE, INFINITY_RULE,
         NOT_A_NUMBER_RULE, SPECIAL_FRACTION_RULE,
-    )
-
+    }
 
     def __init__(self, value, text, radix=None):
         """
@@ -647,8 +641,7 @@ def apply(self, number, context):
                     ruleset = context['search_at'].get_ruleset(ref)
                 elif ref_type == DECIMAL_REF:
                     loc = context['search_at']._locale
-                    x = numbers.format_decimal(number, format=ref, locale=loc)
-                    res.append(x)
+                    res.append(format_decimal(number, format=ref, locale=loc))
 
                 if ruleset:
                     if t.type == REMAINDER_TOKEN and context['remainder_as_fractional']:
@@ -671,10 +664,8 @@ def apply(self, number, context):
             else:
                 raise ValueError('unknown token %s', t)
 
-
         return ''.join(res)
 
-
     @property
     def divisor(self):
         """it is highest exponent of radix less then or equal to the rules's base"""
@@ -688,16 +679,14 @@ def divisor(self):
     def substitutions(self):
         return len([t for t in self.tokens if t.type in REFERENCE_TOKENS])
 
-
     def _parse(self, text):
         try:
             self.tokens = [t for t in tokenize(text)]
         except ValueError:
-            raise TokenizationError(self.text)
-
+            raise TokenizationError(text)
 
     def __repr__(self):
         return 'Rule %s (%s) - %s\n%s\n' % (
             self.value, self.text,
             self.radix,
-            '\n'.join(['\t\t'+str(t) for t in self.tokens]))
+            '\n'.join(['\t\t' + str(t) for t in self.tokens]))
diff --git a/tests/test_number_spelling.py b/tests/test_number_spelling.py
index bcfcf8644..3a8024e93 100644
--- a/tests/test_number_spelling.py
+++ b/tests/test_number_spelling.py
@@ -1,26 +1,25 @@
 import unittest
+
 import pytest
 
 from babel import numbers
 from babel import rbnf
-from babel.core import get_global
 from babel.localedata import locale_identifiers
 
 soft_hyphen = '\xad'
 
+
 class TestRuleEngine(unittest.TestCase):
     """
     Test everything related to the rules engine
     """
+
     def test_basic(self):
         x = rbnf.RuleBasedNumberFormat.negotiate('hu_HU')
         assert str(x._locale) == 'hu'
         assert 'spellout-numbering' in x.available_rulesets
 
-
     def test_negotiation(self):
-        valid_ruleset_groups = ("SpelloutRules", "OrdinalRules", "NumberingSystemRules")
-        
         for lid in locale_identifiers():
             loc = rbnf.RuleBasedNumberFormat.negotiate(lid)._locale
             if loc is None:
@@ -29,8 +28,7 @@ def test_negotiation(self):
             else:
                 # test groups
                 for k in loc._data['rbnf_rules']:
-                    assert k in valid_ruleset_groups
-
+                    assert k in rbnf.RuleBasedNumberFormat.group_types
 
     def test_tokenization(self):
 
@@ -41,7 +39,6 @@ def test_tokenization(self):
         ]
         assert x == res
 
-
     def test_xml_parsing(self):
         """
         all the rules should be able to go through the parser and tokenizer
@@ -59,6 +56,7 @@ class TestSpelling(unittest.TestCase):
     """
     Locale specific tests
     """
+
     def test_hu_HU_cardinal(self):
         def _spell(x):
             return numbers.spell_number(x, locale='hu_HU').replace(soft_hyphen, '')
@@ -81,7 +79,6 @@ def _spell(x):
         # assert _spell(-.199923862) == "kerekítve mínusz nulla egész ezerkilencszázkilencvenkilenc tízezred"
         # assert _spell(.4326752) == "nulla egész negyvenhárom század"
 
-
     def test_hu_HU_ordinal(self):
         def _spell(x):
             return numbers.spell_number(x, locale='hu_HU', ordinal=True).replace(soft_hyphen, '')
@@ -101,7 +98,6 @@ def _spell(x):
         assert _spell(1950) == "ezerkilencszázötvenedik"
         # assert _spell(2001) == "kétezer-egyedik"
 
-
     def test_en_GB_cardinal(self):
         def _spell(x):
             return numbers.spell_number(x, locale='en_GB').replace(soft_hyphen, '')
@@ -122,7 +118,6 @@ def _spell(x):
         # assert _spell(-.199923862, precision=3, state_rounded=True) == "approximately minus zero point two tenths"
         # assert _spell(-.1) == "minus zero point one tenth" # float to string conversion preserves precision
 
-
     def test_en_GB_ordinal(self):
         def _spell(x):
             return numbers.spell_number(x, locale='en_GB', ordinal=True).replace(soft_hyphen, '')
@@ -179,5 +174,3 @@ def _spell(x):
 
 #     with pytest.raises(exceptions.NoFractionOrdinalsAllowed) as excinfo:
 #         _spell('1999.23', ordinal=True, locale='en_GB')
-
-

From a6b6d2d5bfb36fd9675c27fcb9b22541c0f5c424 Mon Sep 17 00:00:00 2001
From: Aarni Koskela <akx@iki.fi>
Date: Tue, 31 Dec 2019 12:40:47 +0200
Subject: [PATCH 4/9] rbnf: make spell_number API less kwargsy

---
 babel/numbers.py              |  6 +--
 babel/rbnf.py                 | 98 ++++++++++++++++++++++-------------
 tests/test_number_spelling.py | 18 +++----
 3 files changed, 75 insertions(+), 47 deletions(-)

diff --git a/babel/numbers.py b/babel/numbers.py
index d9a0c0462..12e82d219 100644
--- a/babel/numbers.py
+++ b/babel/numbers.py
@@ -663,15 +663,15 @@ def __init__(self, message, suggestions=None):
         self.suggestions = suggestions
 
 
-def spell_number(number, locale=LC_NUMERIC, **kwargs):
+def spell_number(number, locale=LC_NUMERIC, ruleset=None):
     """Return value spelled out for a specific locale
     
     :param number: the number to format
     :param locale: the `Locale` object or locale identifier
-    :param kwargs: optional locale specific parameters
+    :param ruleset: the ruleset to use; defaults to regular numbers.
     """
     speller = RuleBasedNumberFormat.negotiate(locale)
-    return speller.format(number, **kwargs)
+    return speller.format(number, ruleset=ruleset)
 
 
 def get_rbnf_rules(locale=LC_NUMERIC):
diff --git a/babel/rbnf.py b/babel/rbnf.py
index 60540ccb2..fbcec253a 100644
--- a/babel/rbnf.py
+++ b/babel/rbnf.py
@@ -84,10 +84,25 @@
 # normal rule means a number is specified
 
 
-class RBNFError(Exception): pass
-class TokenizationError(RBNFError): pass
-class RulesetNotFound(RBNFError): pass
-class RuleNotFound(RBNFError): pass
+class RBNFError(Exception):
+    pass
+
+
+class TokenizationError(RBNFError):
+    pass
+
+
+class RulesetNotFound(RBNFError):
+    pass
+
+
+class RuleNotFound(RBNFError):
+    pass
+
+
+class RulesetSubstitutionWarning(UserWarning):
+    pass
+
 
 TokenInfo = collections.namedtuple('TokenInfo', 'type reference optional')
 
@@ -221,45 +236,56 @@ def available_rulesets(self):
         """list available public rulesets"""
         return [r.name for r in self.rulesets if not r.private]
 
-
-    def format(self, number, ordinal=False, year=False, ruleset=None, **kwargs):
-        """spell an actual number (int/float/decimal)
-        
-        Search available_rulesets for an entry point
-        default is `spellout-numbering`.
-
-        If year is True: use spellout-numbering-year
-        If ordinal is True: use spellout-ordinal
-        If year and ordinal both True: raise error
-        
-        TODO
-        If no `spellout-ordinal`:
-            if has `spellout-ordinal-*`: use first one, issue warning
-
+    def _find_matching_ruleset(self, prefix):
+        available_rulesets = self.available_rulesets
+        if prefix in available_rulesets:
+            return (prefix, True)
+        # Sorting here avoids use of more specific ("spellout-ordinal-sinokorean-count")
+        # rulesets when a shorter one might be available.
+        for ruleset in sorted(available_rulesets):
+            if ruleset.startswith(prefix):
+                return (ruleset, False)
+        return (None, False)
+
+    def match_ruleset(self, ruleset):
         """
-        if ordinal and year:
-            raise ValueError('both ordinal and year is not possible')
-        if ordinal:
-            search = ruleset or 'spellout-ordinal'
-        elif year:
-            search = ruleset or 'spellout-year'
-        else:
-            search = ruleset or 'spellout-numbering'
-
-        ruleset = self.get_ruleset(search)
-
-        if ruleset is None:
-            raise RulesetNotFound(search)
-
-        return ruleset.apply(number, self)
+        Try to find a matching ruleset given a ruleset name or alias ("year", "ordinal").
+        """
+        if ruleset == "year":
+            ruleset = "spellout-numbering-year"
+        elif ruleset == "ordinal":
+            ruleset, exact_match = self._find_matching_ruleset("spellout-ordinal")
+            if not ruleset:
+                raise RulesetNotFound("No ordinal ruleset is available for %s" % (
+                    self._locale,
+                ))
+            if not exact_match:
+                warnings.warn("Using non-specific ordinal ruleset %s" % ruleset, RulesetSubstitutionWarning)
+        ruleset_obj = self.get_ruleset(ruleset)
+        if not ruleset_obj:
+            raise RulesetNotFound("Ruleset %r is not one of the ones available for %s: %r" % (
+                ruleset,
+                self._locale,
+                self.available_rulesets,
+            ))
+        return ruleset_obj
+
+    def format(self, number, ruleset=None):
+        """Format a number (int/float/decimal) with spelling rules.
+
+        Ruleset may be an actual ruleset name for the locale,
+        or one of the aliases "year" or "ordinal".
+        """
+        if not ruleset:
+            ruleset = "spellout-numbering"
 
+        return self.match_ruleset(ruleset).apply(number, self)
 
     def get_ruleset(self, name):
         for r in self.rulesets:
             if r.name == name:
                 return r
 
-
     @classmethod
     def negotiate(cls, locale):
         """
@@ -267,6 +293,8 @@ def negotiate(cls, locale):
         Caching is not necessary the Locale object does that pretty well
         """
         loc = Locale.negotiate([str(Locale.parse(locale))], get_global('rbnf_locales'))
+        if not loc:
+            raise RulesetNotFound("No RBNF rules available for %s" % locale)
         return cls(loc)
 
 
diff --git a/tests/test_number_spelling.py b/tests/test_number_spelling.py
index 3a8024e93..65815b40b 100644
--- a/tests/test_number_spelling.py
+++ b/tests/test_number_spelling.py
@@ -21,14 +21,14 @@ def test_basic(self):
 
     def test_negotiation(self):
         for lid in locale_identifiers():
-            loc = rbnf.RuleBasedNumberFormat.negotiate(lid)._locale
-            if loc is None:
+            try:
+                loc = rbnf.RuleBasedNumberFormat.negotiate(lid)._locale
+            except rbnf.RulesetNotFound:
                 # generate warning if necessary
-                pass
-            else:
-                # test groups
-                for k in loc._data['rbnf_rules']:
-                    assert k in rbnf.RuleBasedNumberFormat.group_types
+                continue
+            # test groups
+            for k in loc._data['rbnf_rules']:
+                assert k in rbnf.RuleBasedNumberFormat.group_types
 
     def test_tokenization(self):
 
@@ -81,7 +81,7 @@ def _spell(x):
 
     def test_hu_HU_ordinal(self):
         def _spell(x):
-            return numbers.spell_number(x, locale='hu_HU', ordinal=True).replace(soft_hyphen, '')
+            return numbers.spell_number(x, locale='hu_HU', ruleset="ordinal").replace(soft_hyphen, '')
 
         assert _spell(0) == "nulla"
         # assert _spell(0) == "nulladik"
@@ -120,7 +120,7 @@ def _spell(x):
 
     def test_en_GB_ordinal(self):
         def _spell(x):
-            return numbers.spell_number(x, locale='en_GB', ordinal=True).replace(soft_hyphen, '')
+            return numbers.spell_number(x, locale='en_GB', ruleset="ordinal").replace(soft_hyphen, '')
 
         assert _spell(0) == "zeroth"
         assert _spell(1) == "first"

From ba6bbc2a737dabcbb504f31fa3c2bfc0c8aac69d Mon Sep 17 00:00:00 2001
From: Aarni Koskela <akx@iki.fi>
Date: Tue, 31 Dec 2019 13:11:36 +0200
Subject: [PATCH 5/9] rbnf: store divisor and substitutions in Rule to avoid
 recomputation

---
 babel/rbnf.py | 37 +++++++++++++++----------------------
 1 file changed, 15 insertions(+), 22 deletions(-)

diff --git a/babel/rbnf.py b/babel/rbnf.py
index fbcec253a..6df8d5932 100644
--- a/babel/rbnf.py
+++ b/babel/rbnf.py
@@ -191,6 +191,17 @@ def _parse_reference(string):
     return INTERNAL_REF, ""  # defaults to this
 
 
+def compute_divisor(value, radix):
+    # Compute the highest exponent of radix less than or equal to the rule's base
+    if isinstance(value, int):
+        if value == 0:
+            return 1
+        exp = decimal.Decimal(value).ln() / decimal.Decimal(radix).ln()
+        return int(radix ** math.floor(exp))
+    else:
+        return None
+
+
 class RuleBasedNumberFormat(object):
     """
     RuleBasedNumberFormat's behavior consists of one or more rule sets
@@ -638,15 +649,16 @@ def __init__(self, value, text, radix=None):
         """
         divisor : iterator of literal, back_sub, fwd_sub, lit_exact elements parsed from rule 
         """
+        self.radix = int(radix or 10)
         if value in self.specials:
             self.value = value
         else:
             self.value = int(value)
 
+        self.divisor = compute_divisor(self.value, self.radix)
         self.text = text
-        self.radix = int(radix or 10)
-
-        self._parse(text)
+        self.tokens = list(tokenize(text))
+        self.substitutions = len([t for t in self.tokens if t.type in REFERENCE_TOKENS])
 
     def apply(self, number, context):
         """
@@ -694,25 +706,6 @@ def apply(self, number, context):
 
         return ''.join(res)
 
-    @property
-    def divisor(self):
-        """it is highest exponent of radix less then or equal to the rules's base"""
-        if isinstance(self.value, int):
-            if self.value == 0:
-                return 1
-            exp = decimal.Decimal(self.value).ln() / decimal.Decimal(self.radix).ln()
-            return int(self.radix ** math.floor(exp))
-
-    @property
-    def substitutions(self):
-        return len([t for t in self.tokens if t.type in REFERENCE_TOKENS])
-
-    def _parse(self, text):
-        try:
-            self.tokens = [t for t in tokenize(text)]
-        except ValueError:
-            raise TokenizationError(text)
-
     def __repr__(self):
         return 'Rule %s (%s) - %s\n%s\n' % (
             self.value, self.text,

From efb57245f57ae4cf554ba372c94a7d7d86ca7ed7 Mon Sep 17 00:00:00 2001
From: Aarni Koskela <akx@iki.fi>
Date: Tue, 31 Dec 2019 13:15:05 +0200
Subject: [PATCH 6/9] rbnf: eagerly evaluate self.rulesets to avoid alias
 lookup every time

---
 babel/rbnf.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/babel/rbnf.py b/babel/rbnf.py
index 6df8d5932..29d9ea4cf 100644
--- a/babel/rbnf.py
+++ b/babel/rbnf.py
@@ -237,10 +237,7 @@ class RuleBasedNumberFormat(object):
     def __init__(self, locale, group='SpelloutRules'):
         self._locale = locale
         self._group = group
-
-    @property
-    def rulesets(self):
-        return self._locale._data['rbnf_rules'][self._group]
+        self.rulesets = self._locale._data['rbnf_rules'][self._group]
 
     @property
     def available_rulesets(self):

From 777eea610df35aab4806006041d6d1d2bc9f6960 Mon Sep 17 00:00:00 2001
From: Aarni Koskela <akx@iki.fi>
Date: Tue, 31 Dec 2019 13:16:45 +0200
Subject: [PATCH 7/9] rbnf: correctly dump rulesets/rules to JSON file

---
 scripts/import_cldr.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/scripts/import_cldr.py b/scripts/import_cldr.py
index bd6967b2f..d3e05940b 100755
--- a/scripts/import_cldr.py
+++ b/scripts/import_cldr.py
@@ -152,6 +152,8 @@ def _compact_dict(dict):
 def debug_repr(obj):
     if isinstance(obj, PluralRule):
         return obj.abstract
+    if isinstance(obj, (rbnf.Ruleset, rbnf.Rule)):
+        return vars(obj)
     return repr(obj)
 
 

From 8113d1bf9492a5c78239eec24479c4930e418b9d Mon Sep 17 00:00:00 2001
From: Aarni Koskela <akx@iki.fi>
Date: Fri, 28 Jan 2022 17:25:28 +0200
Subject: [PATCH 8/9] rbnf: replace .format & friends with f-strings

---
 babel/rbnf.py | 48 +++++++++++++++++++++---------------------------
 1 file changed, 21 insertions(+), 27 deletions(-)

diff --git a/babel/rbnf.py b/babel/rbnf.py
index 29d9ea4cf..77f771240 100644
--- a/babel/rbnf.py
+++ b/babel/rbnf.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 """
 babel.rbnf
 ~~~~~~~~~~
@@ -30,7 +29,6 @@
 # Original request for Hebrew (currently not used in Hebrew):
 #     http://bugs.icu-project.org/trac/ticket/4039
 
-from __future__ import unicode_literals
 
 import re
 import math
@@ -187,7 +185,7 @@ def _parse_reference(string):
         return PUBLIC_REF, string[1:]
     if string[0] in '0#':
         return DECIMAL_REF, string
-    warnings.warn('Reference parsing error: %s' % string, SyntaxWarning)
+    warnings.warn(f'Reference parsing error: {string}', SyntaxWarning)
     return INTERNAL_REF, ""  # defaults to this
 
 
@@ -202,7 +200,7 @@ def compute_divisor(value, radix):
         return None
 
 
-class RuleBasedNumberFormat(object):
+class RuleBasedNumberFormat:
     """
     RuleBasedNumberFormat's behavior consists of one or more rule sets
 
@@ -264,18 +262,15 @@ def match_ruleset(self, ruleset):
         elif ruleset == "ordinal":
             ruleset, exact_match = self._find_matching_ruleset("spellout-ordinal")
             if not ruleset:
-                raise RulesetNotFound("No ordinal ruleset is available for %s" % (
-                    self._locale,
-                ))
+                raise RulesetNotFound(f"No ordinal ruleset is available for {self._locale}")
             if not exact_match:
-                warnings.warn("Using non-specific ordinal ruleset %s" % ruleset, RulesetSubstitutionWarning)
+                warnings.warn(f"Using non-specific ordinal ruleset {ruleset}", RulesetSubstitutionWarning)
         ruleset_obj = self.get_ruleset(ruleset)
         if not ruleset_obj:
-            raise RulesetNotFound("Ruleset %r is not one of the ones available for %s: %r" % (
-                ruleset,
-                self._locale,
-                self.available_rulesets,
-            ))
+            raise RulesetNotFound(
+                f"Ruleset {ruleset!r} is not one of the ones available for "
+                f"{self._locale}: {self.available_rulesets!r}"
+            )
         return ruleset_obj
 
     def format(self, number, ruleset=None):
@@ -302,11 +297,11 @@ def negotiate(cls, locale):
         """
         loc = Locale.negotiate([str(Locale.parse(locale))], get_global('rbnf_locales'))
         if not loc:
-            raise RulesetNotFound("No RBNF rules available for %s" % locale)
+            raise RulesetNotFound(f"No RBNF rules available for {locale}")
         return cls(loc)
 
 
-class Ruleset(object):
+class Ruleset:
     """
     Each rule set consists of a name, a colon, and a list of rules.
     (in the ICU syntax, CLDR differs because of XML)
@@ -496,7 +491,7 @@ def apply(self, number, parent, fractional=False):
         if fractional:
             index = self.get_rule_fractional(remainder)
             if index is None:
-                raise RuleNotFound("rule for fractional processing of %s" % remainder)
+                raise RuleNotFound(f"rule for fractional processing of {remainder}")
             rule = self.rules[index]
             context[INTEGRAL_TOKEN] = rule.value * remainder  # here remainder == number
             context['omit_optional'] = rule.value * number == 1
@@ -506,7 +501,7 @@ def apply(self, number, parent, fractional=False):
         if number < 0:
             rule = self.get_rule_special(NEGATIVE_NUMBER_RULE)
             if rule is None:
-                raise RuleNotFound("negative number rule (%s)" % NEGATIVE_NUMBER_RULE)
+                raise RuleNotFound(f"negative number rule ({NEGATIVE_NUMBER_RULE})")
             context[REMAINDER_TOKEN] = abs(number)
             return rule.apply(number, context)
 
@@ -524,12 +519,12 @@ def apply(self, number, parent, fractional=False):
                 if integral == 0:
                     rule = self.get_rule_special(PROPER_FRACTION_RULE)
                     if rule is None:
-                        raise RuleNotFound("proper fraction rule (%s)" % PROPER_FRACTION_RULE)
+                        raise RuleNotFound(f"proper fraction rule ({PROPER_FRACTION_RULE})")
 
                 else:
                     rule = self.get_rule_special(IMPROPER_FRACTION_RULE)
                     if rule is None:
-                        raise RuleNotFound("improper fraction rule (%s)" % IMPROPER_FRACTION_RULE)
+                        raise RuleNotFound(f"improper fraction rule ({IMPROPER_FRACTION_RULE})")
                     context['omit_optional'] = 0 < number < 1  # between 0 and 1
 
             return rule.apply(number, context)
@@ -537,7 +532,7 @@ def apply(self, number, parent, fractional=False):
         # normal rule
         index = self.get_rule_integral(integral)
         if index is None:
-            raise RuleNotFound("normal rule for %s" % integral)
+            raise RuleNotFound(f"normal rule for {integral}")
         rule = self.rules[index]
         i, r = divmod(integral, rule.divisor)
         context[REMAINDER_TOKEN] = r
@@ -629,10 +624,11 @@ def get_rule_fractional(self, val):
         return bst
 
     def __repr__(self):
-        return 'Ruleset %s %s\n%s\n' % (self.name, self.private, '\n'.join(['\t' + str(r) for r in self.rules]))
+        rules_str = '\n'.join(['\t' + str(r) for r in self.rules])
+        return f'Ruleset {self.name} {self.private}\n{rules_str}\n'
 
 
-class Rule(object):
+class Rule:
     """
     base value, a divisor, rule text, and zero, one, or two substitutions.
     """
@@ -699,12 +695,10 @@ def apply(self, number, context):
                 ))
 
             else:
-                raise ValueError('unknown token %s', t)
+                raise ValueError(f'unknown token {t}', t)
 
         return ''.join(res)
 
     def __repr__(self):
-        return 'Rule %s (%s) - %s\n%s\n' % (
-            self.value, self.text,
-            self.radix,
-            '\n'.join(['\t\t' + str(t) for t in self.tokens]))
+        tokens_str = '\n'.join(['\t\t' + str(t) for t in self.tokens])
+        return f'Rule {self.value} ({self.text}) - {self.radix}\n{tokens_str}\n'

From 7e9c9bcf2a63c87670bc7fd1bb9291bd731bd024 Mon Sep 17 00:00:00 2001
From: Aarni Koskela <akx@iki.fi>
Date: Tue, 31 Dec 2019 12:08:30 +0200
Subject: [PATCH 9/9] Add smoke test for all RBNF-enabled locales and rulesets

---
 setup.cfg                     | 1 +
 tests/conftest.py             | 4 ++++
 tests/test_number_spelling.py | 9 +++++++++
 3 files changed, 14 insertions(+)

diff --git a/setup.cfg b/setup.cfg
index 12585f0d7..c7f0a0644 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -6,6 +6,7 @@ norecursedirs = venv* .* _* scripts {args}
 doctest_optionflags = ELLIPSIS NORMALIZE_WHITESPACE ALLOW_UNICODE IGNORE_EXCEPTION_DETAIL
 markers =
     all_locales: parameterize test with all locales
+    all_rbnf_locales: parameterize test with all locales with RBNF rules
 
 [bdist_wheel]
 universal = 1
diff --git a/tests/conftest.py b/tests/conftest.py
index 5b14b1ca7..86c0adf45 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -16,3 +16,7 @@ def pytest_generate_tests(metafunc):
                 from babel.localedata import locale_identifiers
                 metafunc.parametrize("locale", list(locale_identifiers()))
                 break
+            if mark.name == "all_rbnf_locales":
+                from babel.core import get_global
+                metafunc.parametrize("locale", list(get_global('rbnf_locales')))
+                break
diff --git a/tests/test_number_spelling.py b/tests/test_number_spelling.py
index 65815b40b..6c98ea6af 100644
--- a/tests/test_number_spelling.py
+++ b/tests/test_number_spelling.py
@@ -148,6 +148,15 @@ def _spell(x):
         assert _spell(2001) == "two thousand first"
 
 
+@pytest.mark.all_rbnf_locales
+@pytest.mark.parametrize('ruleset', (None, 'year', 'ordinal'))
+def test_spelling_smoke(locale, ruleset):
+    try:
+        assert numbers.spell_number(2020, locale=locale, ruleset=ruleset)
+    except rbnf.RulesetNotFound:  # Not all locales have all rulesets, so skip the smoke test.
+        pass
+    except RecursionError:  # Some combinations currently fail with this :(
+        pytest.xfail(f'Locale {locale}, ruleset {ruleset}')
 
 # def test_hu_HU_error():
 #     with pytest.raises(exceptions.TooBigToSpell) as excinfo: