diff --git a/CHANGELOG.md b/CHANGELOG.md index ba73ad2..6f42369 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,11 +10,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Replace OpenStruct with regular class in `Zxcvbn::Match` for 2x performance improvement ([#61]) - Implement Trie data structure for dictionary matching with 1.4x additional performance improvement ([#62]) - Replace range operators with `String#slice` for string slicing operations ([#63]) + - Optimise L33t matcher with early bailout and improved deduplication ([#64]) [Unreleased]: https://github.com/envato/zxcvbn-ruby/compare/v1.2.4...HEAD [#61]: https://github.com/envato/zxcvbn-ruby/pull/61 [#62]: https://github.com/envato/zxcvbn-ruby/pull/62 [#63]: https://github.com/envato/zxcvbn-ruby/pull/63 +[#64]: https://github.com/envato/zxcvbn-ruby/pull/64 ## [1.2.4] - 2025-12-07 diff --git a/Gemfile b/Gemfile index 5f8c060..2abfca1 100644 --- a/Gemfile +++ b/Gemfile @@ -5,6 +5,7 @@ source 'https://rubygems.org' gemspec group :development do + gem 'benchmark' gem 'guard' gem 'guard-bundler', require: false gem 'guard-rspec', require: false diff --git a/lib/zxcvbn/matchers/l33t.rb b/lib/zxcvbn/matchers/l33t.rb index fbe104b..41db43e 100644 --- a/lib/zxcvbn/matchers/l33t.rb +++ b/lib/zxcvbn/matchers/l33t.rb @@ -1,5 +1,7 @@ # frozen_string_literal: true +require 'set' + module Zxcvbn module Matchers class L33t @@ -25,26 +27,17 @@ def initialize(dictionary_matchers) def matches(password) matches = [] lowercased_password = password.downcase - combinations_to_try = l33t_subs(relevent_l33t_subtable(lowercased_password)) + relevent_subtable = relevent_l33t_subtable(lowercased_password) + + # Early bailout: if no l33t characters present, return empty matches + return matches if relevent_subtable.empty? + + combinations_to_try = l33t_subs(relevent_subtable) combinations_to_try.each do |substitution| @dictionary_matchers.each do |matcher| subbed_password = translate(lowercased_password, substitution) matcher.matches(subbed_password).each do |match| - length = match.j - match.i + 1 - token = password.slice(match.i, length) - next if token.downcase == match.matched_word.downcase - - match_substitutions = {} - substitution.each do |s, letter| - match_substitutions[s] = letter if token.include?(s) - end - match.l33t = true - match.token = token - match.sub = match_substitutions - match.sub_display = match_substitutions.map do |k, v| - "#{k} -> #{v}" - end.join(', ') - matches << match + process_match(match, password, substitution, matches) end end end @@ -52,9 +45,11 @@ def matches(password) end def translate(password, sub) - password.split('').map do |chr| - sub[chr] || chr - end.join + result = String.new + password.each_char do |chr| + result << (sub[chr] || chr) + end + result end def relevent_l33t_subtable(password) @@ -81,6 +76,26 @@ def l33t_subs(table) new_subs end + private + + def process_match(match, password, substitution, matches) + length = match.j - match.i + 1 + token = password.slice(match.i, length) + return if token.downcase == match.matched_word.downcase + + match_substitutions = {} + substitution.each do |s, letter| + match_substitutions[s] = letter if token.include?(s) + end + match.l33t = true + match.token = token + match.sub = match_substitutions + match.sub_display = match_substitutions.map do |k, v| + "#{k} -> #{v}" + end.join(', ') + matches << match + end + def find_substitutions(subs, table, keys) return subs if keys.empty? @@ -114,14 +129,12 @@ def find_substitutions(subs, table, keys) def dedup(subs) deduped = [] - members = [] + seen = Set.new subs.each do |sub| - assoc = sub.dup - - assoc.sort! - label = assoc.map { |k, v| "#{k},#{v}" }.join('-') - unless members.include?(label) - members << label + # Sort and convert to hash for consistent comparison + sorted_sub = sub.sort.to_h + unless seen.include?(sorted_sub) + seen.add(sorted_sub) deduped << sub end end diff --git a/spec/matchers/l33t_spec.rb b/spec/matchers/l33t_spec.rb index 21b06db..dbc1fb9 100644 --- a/spec/matchers/l33t_spec.rb +++ b/spec/matchers/l33t_spec.rb @@ -73,5 +73,107 @@ ] ) end + + it 'marks all matches as l33t' do + expect(matches.map(&:l33t).uniq).to eq([true]) + end + + it 'sets the sub_display field' do + expect(matches.first.sub_display).to eq('@ -> a') + end + + context 'with no l33t characters' do + it 'returns empty array for password without l33t chars' do + expect(matcher.matches('password')).to be_empty + end + + it 'returns empty array for simple words' do + expect(matcher.matches('hello')).to be_empty + end + end + + context 'with multiple l33t substitutions' do + it 'handles multiple substitution types' do + matches = matcher.matches('p@ssw0rd') + expect(matches).not_to be_empty + expect(matches.any? { |m| m.sub.keys.include?('@') }).to be true + expect(matches.any? { |m| m.sub.keys.include?('0') }).to be true + end + + it 'creates correct sub_display for multiple substitutions' do + matches = matcher.matches('h3ll0') + multi_sub_match = matches.find { |m| m.sub.length > 1 } + expect(multi_sub_match.sub_display).to include('->') + end + end + + context 'with same character representing different letters' do + it 'handles ambiguous l33t characters' do + # '1' can represent both 'i' and 'l' + matches = matcher.matches('test1ng') + expect(matches).not_to be_empty + end + end + + context 'with uppercase l33t speak' do + it 'finds matches in mixed case passwords' do + matches = matcher.matches('P@ssW0RD') + expect(matches).not_to be_empty + end + + it 'preserves original case in token' do + matches = matcher.matches('P@SS') + uppercase_match = matches.find { |m| m.token == 'P@S' } + expect(uppercase_match).not_to be_nil + expect(uppercase_match.token).to eq('P@S') + expect(uppercase_match.matched_word).to eq('pas') + end + end + + context 'with edge cases' do + it 'handles empty password' do + expect(matcher.matches('')).to be_empty + end + + it 'handles password with only l33t characters' do + matches = matcher.matches('@$') + expect(matches).to be_an(Array) + end + + it 'handles repeated l33t characters' do + matches = matcher.matches('@@@@') + expect(matches).to be_an(Array) + end + end + end + + describe '#translate' do + it 'substitutes l33t characters with their letter equivalents' do + substitution = { '@' => 'a', '0' => 'o' } + expect(matcher.translate('p@ssw0rd', substitution)).to eq('password') + end + + it 'leaves non-substituted characters unchanged' do + substitution = { '@' => 'a' } + expect(matcher.translate('p@ssword', substitution)).to eq('password') + end + + it 'handles empty password' do + expect(matcher.translate('', { '@' => 'a' })).to eq('') + end + + it 'handles empty substitution table' do + expect(matcher.translate('password', {})).to eq('password') + end + + it 'handles multiple occurrences of same character' do + substitution = { '@' => 'a' } + expect(matcher.translate('@@@@', substitution)).to eq('aaaa') + end + + it 'only substitutes specified characters' do + substitution = { '3' => 'e' } + expect(matcher.translate('l33t', substitution)).to eq('leet') + end end end