From 014c5c96cc85b87822680cea871bde5c57e35719 Mon Sep 17 00:00:00 2001 From: Ruben Ignacio Guzman Zamora Date: Sat, 25 Jan 2025 05:50:42 -0500 Subject: [PATCH] Improve currency symbol detection --- .gitignore | 1 + lib/monetize/parser.rb | 75 ++++++++++++++++++++++++++++++------------ spec/monetize_spec.rb | 19 ++++++++--- 3 files changed, 70 insertions(+), 25 deletions(-) diff --git a/.gitignore b/.gitignore index 555aadd1bd..670448391b 100644 --- a/.gitignore +++ b/.gitignore @@ -16,3 +16,4 @@ test/tmp test/version_tmp tmp .ruby-version +.ruby-gemset diff --git a/lib/monetize/parser.rb b/lib/monetize/parser.rb index 8d88c09a67..7350350f61 100644 --- a/lib/monetize/parser.rb +++ b/lib/monetize/parser.rb @@ -2,7 +2,7 @@ module Monetize class Parser - CURRENCY_SYMBOLS = { + INITIAL_CURRENCY_SYMBOLS = { '$' => 'USD', '€' => 'EUR', '£' => 'GBP', @@ -28,8 +28,10 @@ class Parser 'S$' => 'SGD', 'HK$'=> 'HKD', 'NT$'=> 'TWD', - '₱' => 'PHP', - } + '₱' => 'PHP' + }.freeze + # FIXME: This ignored symbols could be ambiguous or conflict with other symbols + IGNORED_SYMBOLS = ['kr', 'NIO$', 'UM', 'L', 'oz t', "so'm", 'CUC$'].freeze MULTIPLIER_SUFFIXES = { 'K' => 3, 'M' => 6, 'B' => 9, 'T' => 12 } MULTIPLIER_SUFFIXES.default = 0 @@ -37,6 +39,19 @@ class Parser DEFAULT_DECIMAL_MARK = '.'.freeze + def self.currency_symbols + @@currency_symbols ||= Money::Currency.table.reduce(INITIAL_CURRENCY_SYMBOLS.dup) do |memo, (_, currency)| + symbol = currency[:symbol] + symbol = currency[:disambiguate_symbol] if symbol && memo.key?(symbol) + + next memo if is_invalid_currency_symbol?(symbol) + + memo[symbol] = currency[:iso_code] unless memo.value?(currency[:iso_code]) + + memo + end.freeze + end + def initialize(input, fallback_currency = Money.default_currency, options = {}) @input = input.to_s.strip @fallback_currency = fallback_currency @@ -65,6 +80,17 @@ def parse private + def self.is_invalid_currency_symbol?(symbol) + currency_symbol_blank?(symbol) || + symbol.include?('.') || # Ignore symbols with dots because they can be confused with decimal marks + IGNORED_SYMBOLS.include?(symbol) || + MULTIPLIER_REGEXP.match?("1#{symbol}") # Ignore symbols that can be confused with multipliers + end + + def self.currency_symbol_blank?(symbol) + symbol.nil? || symbol.empty? + end + def to_big_decimal(value) BigDecimal(value) rescue ::ArgumentError => err @@ -74,11 +100,8 @@ def to_big_decimal(value) attr_reader :input, :fallback_currency, :options def parse_currency - computed_currency = nil - computed_currency = input[/[A-Z]{2,3}/] - computed_currency = nil unless Monetize::Parser::CURRENCY_SYMBOLS.value?(computed_currency) - computed_currency ||= compute_currency if assume_from_symbol? - + computed_currency = compute_currency_from_iso_code + computed_currency ||= compute_currency_from_symbol if assume_from_symbol? computed_currency || fallback_currency || Money.default_currency end @@ -99,9 +122,18 @@ def apply_sign(negative, amount) negative ? amount * -1 : amount end - def compute_currency + def compute_currency_from_iso_code + computed_currency = input[/[A-Z]{2,4}/] + + return unless computed_currency + + computed_currency if self.class.currency_symbols.value?(computed_currency) + end + + def compute_currency_from_symbol match = input.match(currency_symbol_regex) - CURRENCY_SYMBOLS[match.to_s] if match + + self.class.currency_symbols[match.to_s] if match end def extract_major_minor(num, currency) @@ -127,20 +159,19 @@ def minor_has_correct_dp_for_currency_subunit?(minor, currency) def extract_major_minor_with_single_delimiter(num, currency, delimiter) if expect_whole_subunits? possible_major, possible_minor = split_major_minor(num, delimiter) + if minor_has_correct_dp_for_currency_subunit?(possible_minor, currency) - split_major_minor(num, delimiter) - else - extract_major_minor_with_tentative_delimiter(num, delimiter) + return [possible_major, possible_minor] end else - if delimiter == currency.decimal_mark - split_major_minor(num, delimiter) - elsif Monetize.enforce_currency_delimiters && delimiter == currency.thousands_separator - [num.gsub(delimiter, ''), 0] - else - extract_major_minor_with_tentative_delimiter(num, delimiter) + return split_major_minor(num, delimiter) if delimiter == currency.decimal_mark + + if Monetize.enforce_currency_delimiters && delimiter == currency.thousands_separator + return [num.gsub(delimiter, ''), 0] end end + + extract_major_minor_with_tentative_delimiter(num, delimiter) end def extract_major_minor_with_tentative_delimiter(num, delimiter) @@ -165,7 +196,9 @@ def extract_major_minor_with_tentative_delimiter(num, delimiter) end def extract_multiplier - if (matches = MULTIPLIER_REGEXP.match(input)) + matches = MULTIPLIER_REGEXP.match(input) + + if matches multiplier_suffix = matches[2].upcase [MULTIPLIER_SUFFIXES[multiplier_suffix], "#{$1}#{$3}"] else @@ -180,7 +213,7 @@ def extract_sign(input) end def regex_safe_symbols - CURRENCY_SYMBOLS.keys.map { |key| Regexp.escape(key) }.join('|') + self.class.currency_symbols.keys.map { |key| Regexp.escape(key) }.join('|') end def split_major_minor(num, delimiter) diff --git a/spec/monetize_spec.rb b/spec/monetize_spec.rb index 00aad80025..2f02d4150f 100644 --- a/spec/monetize_spec.rb +++ b/spec/monetize_spec.rb @@ -56,10 +56,13 @@ Monetize.assume_from_symbol = false end - Monetize::Parser::CURRENCY_SYMBOLS.each_pair do |symbol, iso_code| + Monetize::Parser.currency_symbols.each_pair do |symbol, iso_code| context iso_code do let(:currency) { Money::Currency.find(iso_code) } - let(:amount) { 5_95 } + let(:amount) do + # FIXME: The exponent > 3 (e.g. BTC) causes problems when converting to string from float + (currency.exponent > 3)? (595 * currency.subunit_to_unit) : 595 + end let(:amount_in_units) { amount.to_f / currency.subunit_to_unit } it 'ensures correct amount calculations for test' do @@ -109,13 +112,21 @@ end it 'parses formatted inputs without currency detection when overridden' do - expect(Monetize.parse("#{symbol}5.95", nil, assume_from_symbol: false)).to eq Money.new(amount, 'USD') + if Monetize::Parser.currency_symbols.value?(symbol) + currency_iso_code = symbol + amount_str = currency.exponent == 0 ? '595' : '5.95' + else + currency_iso_code = 'USD' + amount_str = '5.95' + end + + expect(Monetize.parse("#{symbol}#{amount_str}", nil, assume_from_symbol: false)).to eq Money.new(595, currency_iso_code) end end end it 'should assume default currency if not a recognised symbol' do - expect(Monetize.parse('L9.99')).to eq Money.new(999, 'USD') + expect(Monetize.parse('NRS9.99')).to eq Money.new(999, 'USD') end it 'should use provided currency over symbol' do