From 9d48c33dceffededc6e8600d104d66fd4e32eba4 Mon Sep 17 00:00:00 2001 From: Ruben Ignacio Guzman Zamora Date: Sat, 25 Jan 2025 05:50:42 -0500 Subject: [PATCH 1/4] Improve currency symbol detection --- .gitignore | 1 + lib/monetize/parser.rb | 87 ++++++++++++++++++++++++++++++------------ spec/monetize_spec.rb | 19 +++++++-- 3 files changed, 78 insertions(+), 29 deletions(-) diff --git a/.gitignore b/.gitignore index 2e57a9e2c..f4a763408 100644 --- a/.gitignore +++ b/.gitignore @@ -17,3 +17,4 @@ test/version_tmp tmp .ruby-version .tool-versions +.ruby-gemset diff --git a/lib/monetize/parser.rb b/lib/monetize/parser.rb index bace626ad..f88eef507 100644 --- a/lib/monetize/parser.rb +++ b/lib/monetize/parser.rb @@ -2,7 +2,7 @@ module Monetize class Parser - CURRENCY_SYMBOLS = { + INITIAL_CURRENCY_SYMBOLS = { '$' => 'USD', '€' => 'EUR', '£' => 'GBP', @@ -28,16 +28,35 @@ class Parser 'S$' => 'SGD', 'HK$'=> 'HKD', 'NT$'=> 'TWD', - '₱' => 'PHP', - } - - CURRENCY_SYMBOL_REGEX = /(? 'PHP' + }.freeze + + # FIXME: This ignored symbols could be ambiguous or conflict with other symbols + IGNORED_SYMBOLS = ['kr', 'NIO$', 'UM', 'L', 'oz t', "so'm", 'CUC$'].freeze + MULTIPLIER_SUFFIXES = { 'K' => 3, 'M' => 6, 'B' => 9, 'T' => 12 } MULTIPLIER_SUFFIXES.default = 0 - MULTIPLIER_REGEXP = Regexp.new(format('^(.*?\d)(%s)\b([^\d]*)$', MULTIPLIER_SUFFIXES.keys.join('|')), 'i') + MULTIPLIER_REGEXP = /^(.*?\d)(#{MULTIPLIER_SUFFIXES.keys.join('|')})\b([^\d]*)$/i DEFAULT_DECIMAL_MARK = '.'.freeze + def self.currency_symbols + @@currency_symbols ||= Money::Currency.table.reduce(INITIAL_CURRENCY_SYMBOLS.dup) do |memo, (_, currency)| + symbol = currency[:symbol] + symbol = currency[:disambiguate_symbol] if memo.key?(symbol) + + next memo if is_invalid_currency_symbol?(symbol) + + memo[symbol] = currency[:iso_code] unless memo.value?(currency[:iso_code]) + + memo + end.freeze + end + + def self.currency_symbol_regex + @@currency_symbol_regex ||= /(? err @@ -75,11 +105,8 @@ def to_big_decimal(value) attr_reader :input, :fallback_currency, :options def parse_currency - computed_currency = nil - computed_currency = input[/[A-Z]{2,3}/] - computed_currency = nil unless Monetize::Parser::CURRENCY_SYMBOLS.value?(computed_currency) - computed_currency ||= compute_currency if assume_from_symbol? - + computed_currency = compute_currency_from_iso_code + computed_currency ||= compute_currency_from_symbol if assume_from_symbol? computed_currency || fallback_currency || Money.default_currency end @@ -100,9 +127,18 @@ def apply_sign(negative, amount) negative ? amount * -1 : amount end - def compute_currency - match = input.match(CURRENCY_SYMBOL_REGEX) - CURRENCY_SYMBOLS[match.to_s] if match + def compute_currency_from_iso_code + computed_currency = input[/[A-Z]{2,4}/] + + return unless computed_currency + + computed_currency if self.class.currency_symbols.value?(computed_currency) + end + + def compute_currency_from_symbol + match = input.match(self.class.currency_symbol_regex) + + self.class.currency_symbols[match.to_s] if match end def extract_major_minor(num, currency) @@ -127,21 +163,20 @@ def minor_has_correct_dp_for_currency_subunit?(minor, currency) def extract_major_minor_with_single_delimiter(num, currency, delimiter) if expect_whole_subunits? - _possible_major, possible_minor = split_major_minor(num, delimiter) + possible_major, possible_minor = split_major_minor(num, delimiter) + if minor_has_correct_dp_for_currency_subunit?(possible_minor, currency) - split_major_minor(num, delimiter) - else - extract_major_minor_with_tentative_delimiter(num, delimiter) + return [possible_major, possible_minor] end else - if delimiter == currency.decimal_mark - split_major_minor(num, delimiter) - elsif Monetize.enforce_currency_delimiters && delimiter == currency.thousands_separator - [num.gsub(delimiter, ''), 0] - else - extract_major_minor_with_tentative_delimiter(num, delimiter) + return split_major_minor(num, delimiter) if delimiter == currency.decimal_mark + + if Monetize.enforce_currency_delimiters && delimiter == currency.thousands_separator + return [num.gsub(delimiter, ''), 0] end end + + extract_major_minor_with_tentative_delimiter(num, delimiter) end def extract_major_minor_with_tentative_delimiter(num, delimiter) @@ -166,7 +201,9 @@ def extract_major_minor_with_tentative_delimiter(num, delimiter) end def extract_multiplier - if (matches = MULTIPLIER_REGEXP.match(input)) + matches = MULTIPLIER_REGEXP.match(input) + + if matches multiplier_suffix = matches[2].upcase [MULTIPLIER_SUFFIXES[multiplier_suffix], "#{$1}#{$3}"] else diff --git a/spec/monetize_spec.rb b/spec/monetize_spec.rb index 00aad8002..2f02d4150 100644 --- a/spec/monetize_spec.rb +++ b/spec/monetize_spec.rb @@ -56,10 +56,13 @@ Monetize.assume_from_symbol = false end - Monetize::Parser::CURRENCY_SYMBOLS.each_pair do |symbol, iso_code| + Monetize::Parser.currency_symbols.each_pair do |symbol, iso_code| context iso_code do let(:currency) { Money::Currency.find(iso_code) } - let(:amount) { 5_95 } + let(:amount) do + # FIXME: The exponent > 3 (e.g. BTC) causes problems when converting to string from float + (currency.exponent > 3)? (595 * currency.subunit_to_unit) : 595 + end let(:amount_in_units) { amount.to_f / currency.subunit_to_unit } it 'ensures correct amount calculations for test' do @@ -109,13 +112,21 @@ end it 'parses formatted inputs without currency detection when overridden' do - expect(Monetize.parse("#{symbol}5.95", nil, assume_from_symbol: false)).to eq Money.new(amount, 'USD') + if Monetize::Parser.currency_symbols.value?(symbol) + currency_iso_code = symbol + amount_str = currency.exponent == 0 ? '595' : '5.95' + else + currency_iso_code = 'USD' + amount_str = '5.95' + end + + expect(Monetize.parse("#{symbol}#{amount_str}", nil, assume_from_symbol: false)).to eq Money.new(595, currency_iso_code) end end end it 'should assume default currency if not a recognised symbol' do - expect(Monetize.parse('L9.99')).to eq Money.new(999, 'USD') + expect(Monetize.parse('NRS9.99')).to eq Money.new(999, 'USD') end it 'should use provided currency over symbol' do From a25761c73fbb4bf146473dd575d5a615beb2b692 Mon Sep 17 00:00:00 2001 From: Ruben Ignacio Guzman Zamora Date: Tue, 12 Aug 2025 11:33:53 -0500 Subject: [PATCH 2/4] Add currency getter method in parser class --- lib/monetize/parser.rb | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/lib/monetize/parser.rb b/lib/monetize/parser.rb index f88eef507..f18dd7f90 100644 --- a/lib/monetize/parser.rb +++ b/lib/monetize/parser.rb @@ -63,9 +63,11 @@ def initialize(input, fallback_currency = Money.default_currency, options = {}) @options = options end - def parse - currency = Money::Currency.wrap(parse_currency) + def currency + @currency ||= Money::Currency.wrap(parse_currency) + end + def parse multiplier_exp, input = extract_multiplier num = input.gsub(/(?:^#{currency.symbol}|[^\d.,'-]+)/, '') @@ -74,9 +76,9 @@ def parse num.chop! if num =~ /[\.|,]$/ - major, minor = extract_major_minor(num, currency) + num = extract_major_minor(num).join(DEFAULT_DECIMAL_MARK) - amount = to_big_decimal([major, minor].join(DEFAULT_DECIMAL_MARK)) + amount = to_big_decimal(num) amount = apply_multiplier(multiplier_exp, amount) amount = apply_sign(negative, amount) @@ -141,7 +143,7 @@ def compute_currency_from_symbol self.class.currency_symbols[match.to_s] if match end - def extract_major_minor(num, currency) + def extract_major_minor(num) used_delimiters = num.scan(/[^\d]/).uniq case used_delimiters.length @@ -151,21 +153,21 @@ def extract_major_minor(num, currency) thousands_separator, decimal_mark = used_delimiters split_major_minor(num.gsub(thousands_separator, ''), decimal_mark) when 1 - extract_major_minor_with_single_delimiter(num, currency, used_delimiters.first) + extract_major_minor_with_single_delimiter(num, used_delimiters.first) else fail ParseError, 'Invalid amount' end end - def minor_has_correct_dp_for_currency_subunit?(minor, currency) + def minor_has_correct_dp_for_currency_subunit?(minor) minor.length == currency.subunit_to_unit.to_s.length - 1 end - def extract_major_minor_with_single_delimiter(num, currency, delimiter) + def extract_major_minor_with_single_delimiter(num, delimiter) if expect_whole_subunits? possible_major, possible_minor = split_major_minor(num, delimiter) - if minor_has_correct_dp_for_currency_subunit?(possible_minor, currency) + if minor_has_correct_dp_for_currency_subunit?(possible_minor) return [possible_major, possible_minor] end else From b9419c8ff293a2ed6f210939f257d278205c5e82 Mon Sep 17 00:00:00 2001 From: Ruben Ignacio Guzman Zamora Date: Tue, 12 Aug 2025 22:28:05 -0500 Subject: [PATCH 3/4] extract_sign method return 1 or -1 instead boolean value --- lib/monetize/parser.rb | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/lib/monetize/parser.rb b/lib/monetize/parser.rb index f18dd7f90..18192c2f2 100644 --- a/lib/monetize/parser.rb +++ b/lib/monetize/parser.rb @@ -72,7 +72,7 @@ def parse num = input.gsub(/(?:^#{currency.symbol}|[^\d.,'-]+)/, '') - negative, num = extract_sign(num) + sign, num = extract_sign(num) num.chop! if num =~ /[\.|,]$/ @@ -80,7 +80,7 @@ def parse amount = to_big_decimal(num) amount = apply_multiplier(multiplier_exp, amount) - amount = apply_sign(negative, amount) + amount = sign * amount [amount, currency] end @@ -125,10 +125,6 @@ def apply_multiplier(multiplier_exp, amount) amount * 10**multiplier_exp end - def apply_sign(negative, amount) - negative ? amount * -1 : amount - end - def compute_currency_from_iso_code computed_currency = input[/[A-Z]{2,4}/] @@ -214,7 +210,7 @@ def extract_multiplier end def extract_sign(input) - result = (input =~ /^-+(.*)$/ || input =~ /^(.*)-+$/) ? [true, $1] : [false, input] + result = (input =~ /^-+(.*)$/ || input =~ /^(.*)-+$/) ? [-1, $1] : [1, input] fail ParseError, 'Invalid amount (hyphen)' if result[1].include?('-') result end From 4f532ba74d3d37506a4e341e6683129fb014cbf4 Mon Sep 17 00:00:00 2001 From: Ruben Ignacio Guzman Zamora Date: Tue, 12 Aug 2025 22:37:36 -0500 Subject: [PATCH 4/4] Refactor split_major_minor method --- lib/monetize/parser.rb | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/lib/monetize/parser.rb b/lib/monetize/parser.rb index 18192c2f2..acb8f4354 100644 --- a/lib/monetize/parser.rb +++ b/lib/monetize/parser.rb @@ -217,9 +217,12 @@ def extract_sign(input) def split_major_minor(num, delimiter) splits = num.split(delimiter) + fail ParseError, 'Invalid amount (multiple delimiters)' if splits.length > 2 - [splits[0], splits[1] || '00'] + splits[1] = '00' if splits.length == 1 + + splits end end end