From c22e53cd2d4154b266be7f3479930e1a3806018e Mon Sep 17 00:00:00 2001 From: Orien Madgwick <497874+orien@users.noreply.github.com> Date: Fri, 2 Jan 2026 09:23:41 +0700 Subject: [PATCH 1/3] Pre-compute spatial graph statistics during Data initialization Instead of calculating average degree and starting positions for each spatial match, pre-compute these statistics once when loading adjacency graphs. This avoids repeated map/inject operations on graph data during password matching, improving performance by approximately 9.3%. Performance improvement: 0.097ms -> 0.088ms per password (9.3% faster) --- .rubocop_todo.yml | 16 ++-------------- CHANGELOG.md | 2 ++ lib/zxcvbn/data.rb | 19 ++++++++++++++++++- lib/zxcvbn/math.rb | 7 ++----- 4 files changed, 24 insertions(+), 20 deletions(-) diff --git a/.rubocop_todo.yml b/.rubocop_todo.yml index a89c2fe..eefc646 100644 --- a/.rubocop_todo.yml +++ b/.rubocop_todo.yml @@ -1,6 +1,6 @@ # This configuration was generated by # `rubocop --auto-gen-config --auto-gen-only-exclude --exclude-limit 100 --no-offense-counts --no-auto-gen-timestamp` -# using RuboCop version 1.81.7. +# using RuboCop version 1.82.1. # The point is for the user to remove these configuration records # one by one as the offenses are removed from the code base. # Note that changes in the inspected code, or installation of new @@ -22,17 +22,7 @@ Metrics/AbcSize: # AllowedMethods: refine Metrics/BlockLength: Exclude: - - '**/*.gemspec' - 'lib/zxcvbn/matchers/spatial.rb' - - 'spec/feedback_giver_spec.rb' - - 'spec/match_spec.rb' - - 'spec/matchers/date_spec.rb' - - 'spec/matchers/l33t_spec.rb' - - 'spec/scoring/crack_time_spec.rb' - - 'spec/scoring/entropy_spec.rb' - - 'spec/scoring/math_spec.rb' - - 'spec/support/matcher.rb' - - 'spec/tester_spec.rb' # Configuration parameters: CountComments, Max, CountAsOne. Metrics/ClassLength: @@ -46,7 +36,6 @@ Metrics/CyclomaticComplexity: Exclude: - 'lib/zxcvbn/entropy.rb' - 'lib/zxcvbn/feedback_giver.rb' - - 'lib/zxcvbn/matchers/l33t.rb' - 'lib/zxcvbn/matchers/new_l33t.rb' - 'lib/zxcvbn/matchers/spatial.rb' - 'lib/zxcvbn/math.rb' @@ -56,6 +45,7 @@ Metrics/CyclomaticComplexity: Metrics/MethodLength: Exclude: - 'lib/zxcvbn/crack_time.rb' + - 'lib/zxcvbn/data.rb' - 'lib/zxcvbn/entropy.rb' - 'lib/zxcvbn/feedback_giver.rb' - 'lib/zxcvbn/matchers/date.rb' @@ -109,8 +99,6 @@ Style/ClassAndModuleChildren: # Configuration parameters: AllowedConstants. Style/Documentation: Exclude: - - 'spec/**/*' - - 'test/**/*' - 'lib/zxcvbn.rb' - 'lib/zxcvbn/clock.rb' - 'lib/zxcvbn/crack_time.rb' diff --git a/CHANGELOG.md b/CHANGELOG.md index 6f42369..18f5969 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,12 +11,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Implement Trie data structure for dictionary matching with 1.4x additional performance improvement ([#62]) - Replace range operators with `String#slice` for string slicing operations ([#63]) - Optimise L33t matcher with early bailout and improved deduplication ([#64]) + - Pre-compute spatial graph statistics during data initialisation ([#65]) [Unreleased]: https://github.com/envato/zxcvbn-ruby/compare/v1.2.4...HEAD [#61]: https://github.com/envato/zxcvbn-ruby/pull/61 [#62]: https://github.com/envato/zxcvbn-ruby/pull/62 [#63]: https://github.com/envato/zxcvbn-ruby/pull/63 [#64]: https://github.com/envato/zxcvbn-ruby/pull/64 +[#65]: https://github.com/envato/zxcvbn-ruby/pull/65 ## [1.2.4] - 2025-12-07 diff --git a/lib/zxcvbn/data.rb b/lib/zxcvbn/data.rb index 76ad89d..bb5cffe 100644 --- a/lib/zxcvbn/data.rb +++ b/lib/zxcvbn/data.rb @@ -16,9 +16,10 @@ def initialize ) @adjacency_graphs = JSON.parse(DATA_PATH.join('adjacency_graphs.json').read) @dictionary_tries = build_tries + @graph_stats = compute_graph_stats end - attr_reader :ranked_dictionaries, :adjacency_graphs, :dictionary_tries + attr_reader :ranked_dictionaries, :adjacency_graphs, :dictionary_tries, :graph_stats def add_word_list(name, list) ranked_dict = DictionaryRanker.rank_dictionary(list) @@ -41,5 +42,21 @@ def build_trie(ranked_dictionary) ranked_dictionary.each { |word, rank| trie.insert(word, rank) } trie end + + def compute_graph_stats + stats = {} + @adjacency_graphs.each do |graph_name, graph| + degrees = graph.map { |_, neighbors| neighbors.compact.size } + sum = degrees.inject(0, :+) + average_degree = sum.to_f / graph.size + starting_positions = graph.length + + stats[graph_name] = { + average_degree: average_degree, + starting_positions: starting_positions + } + end + stats + end end end diff --git a/lib/zxcvbn/math.rb b/lib/zxcvbn/math.rb index dbf614d..b68a37e 100644 --- a/lib/zxcvbn/math.rb +++ b/lib/zxcvbn/math.rb @@ -44,14 +44,11 @@ def nCk(n, k) end def average_degree_for_graph(graph_name) - graph = data.adjacency_graphs[graph_name] - degrees = graph.map { |_, neighbors| neighbors.compact.size } - sum = degrees.inject(0, :+) - sum.to_f / graph.size + data.graph_stats[graph_name][:average_degree] end def starting_positions_for_graph(graph_name) - data.adjacency_graphs[graph_name].length + data.graph_stats[graph_name][:starting_positions] end end end From 13496b23c6593cedbbe822605afd8d107074ccea Mon Sep 17 00:00:00 2001 From: Orien Madgwick <497874+orien@users.noreply.github.com> Date: Fri, 2 Jan 2026 09:38:02 +0700 Subject: [PATCH 2/3] Add comprehensive tests for lg and nCk methods Added test coverage for previously untested Math module methods: lg (logarithm base 2): - Powers of 2 (exact values) - Non-power-of-2 values (with tolerance) - Decimal values (negative logs) nCk (combinations): - Edge cases (k > n, k = 0) - Small combinations (n=5) - Larger values (poker hands: 52 choose 5) - Symmetry property verification - Basic edge cases Test count increased from 18 to 27 examples. --- spec/scoring/math_spec.rb | 58 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/spec/scoring/math_spec.rb b/spec/scoring/math_spec.rb index 1139fc5..9164778 100644 --- a/spec/scoring/math_spec.rb +++ b/spec/scoring/math_spec.rb @@ -134,4 +134,62 @@ def data end end end + + describe '#lg' do + it 'calculates log base 2 correctly' do + expect(lg(1)).to eq 0.0 + expect(lg(2)).to eq 1.0 + expect(lg(4)).to eq 2.0 + expect(lg(8)).to eq 3.0 + expect(lg(16)).to eq 4.0 + end + + it 'handles non-power-of-2 values' do + expect(lg(3)).to be_within(0.0001).of(1.5849625) + expect(lg(10)).to be_within(0.0001).of(3.3219281) + expect(lg(100)).to be_within(0.0001).of(6.6438562) + end + + it 'handles decimal values' do + expect(lg(0.5)).to eq(-1.0) + expect(lg(0.25)).to eq(-2.0) + end + end + + describe '#nCk' do + it 'returns 0 when k > n' do + expect(nCk(5, 10)).to eq 0 + expect(nCk(0, 1)).to eq 0 + end + + it 'returns 1 when k is zero' do + expect(nCk(0, 0)).to eq 1 + expect(nCk(5, 0)).to eq 1 + expect(nCk(100, 0)).to eq 1 + end + + it 'calculates combinations correctly' do + expect(nCk(5, 1)).to eq 5 + expect(nCk(5, 2)).to eq 10 + expect(nCk(5, 3)).to eq 10 + expect(nCk(5, 4)).to eq 5 + expect(nCk(5, 5)).to eq 1 + end + + it 'handles larger values' do + expect(nCk(10, 5)).to eq 252 + expect(nCk(20, 10)).to eq 184_756 + expect(nCk(52, 5)).to eq 2_598_960 # poker hands + end + + it 'demonstrates symmetry property C(n,k) = C(n,n-k)' do + expect(nCk(10, 3)).to eq nCk(10, 7) + expect(nCk(20, 5)).to eq nCk(20, 15) + end + + it 'handles edge cases' do + expect(nCk(1, 1)).to eq 1 + expect(nCk(2, 1)).to eq 2 + end + end end From e8539d32f14594469c3af969f907b5ec7ca6e795 Mon Sep 17 00:00:00 2001 From: Orien Madgwick <497874+orien@users.noreply.github.com> Date: Fri, 2 Jan 2026 09:40:43 +0700 Subject: [PATCH 3/3] Add comprehensive tests for Data class Created test suite for previously untested Data class covering: Initialization: - Dictionary loading (5 expected dictionaries) - Adjacency graph loading (4 expected graphs) - Trie building for all dictionaries - Graph statistics pre-computation Graph statistics: - Verification of average_degree values - Verification of starting_positions values - Correctness checks for qwerty and keypad Ranked dictionaries: - Word ranking verification - Common password frequency checks Custom word lists: - Dictionary addition via add_word_list - Trie generation for custom dictionaries - Word searchability via tries - Empty list handling Test count increased from 271 to 291 examples (20 new tests). --- spec/data_spec.rb | 130 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 130 insertions(+) create mode 100644 spec/data_spec.rb diff --git a/spec/data_spec.rb b/spec/data_spec.rb new file mode 100644 index 0000000..3150e0b --- /dev/null +++ b/spec/data_spec.rb @@ -0,0 +1,130 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Zxcvbn::Data do + let(:data) { described_class.new } + + describe '#initialize' do + it 'loads ranked dictionaries' do + expect(data.ranked_dictionaries).to be_a(Hash) + expect(data.ranked_dictionaries).not_to be_empty + end + + it 'loads all expected dictionaries' do + expect(data.ranked_dictionaries.keys).to include('english', 'female_names', 'male_names', 'passwords', 'surnames') + end + + it 'loads adjacency graphs' do + expect(data.adjacency_graphs).to be_a(Hash) + expect(data.adjacency_graphs).not_to be_empty + end + + it 'loads expected adjacency graphs' do + expect(data.adjacency_graphs.keys).to include('qwerty', 'dvorak', 'keypad', 'mac_keypad') + end + + it 'builds dictionary tries' do + expect(data.dictionary_tries).to be_a(Hash) + expect(data.dictionary_tries).not_to be_empty + end + + it 'builds tries for all dictionaries' do + expect(data.dictionary_tries.keys).to match_array(data.ranked_dictionaries.keys) + end + + it 'creates Trie objects' do + data.dictionary_tries.each_value do |trie| + expect(trie).to be_a(Zxcvbn::Trie) + end + end + + it 'computes graph statistics' do + expect(data.graph_stats).to be_a(Hash) + expect(data.graph_stats).not_to be_empty + end + + it 'computes stats for all graphs' do + expect(data.graph_stats.keys).to match_array(data.adjacency_graphs.keys) + end + + it 'includes average_degree in graph stats' do + data.graph_stats.each_value do |stats| + expect(stats).to have_key(:average_degree) + expect(stats[:average_degree]).to be_a(Float) + expect(stats[:average_degree]).to be > 0 + end + end + + it 'includes starting_positions in graph stats' do + data.graph_stats.each_value do |stats| + expect(stats).to have_key(:starting_positions) + expect(stats[:starting_positions]).to be_a(Integer) + expect(stats[:starting_positions]).to be > 0 + end + end + end + + describe '#ranked_dictionaries' do + it 'returns dictionaries with word rankings' do + dict = data.ranked_dictionaries['english'] + expect(dict).to be_a(Hash) + expect(dict.values.first).to be_a(Integer) + end + + it 'ranks common words lower (more frequent)' do + dict = data.ranked_dictionaries['passwords'] + # Common passwords should have low rank numbers + expect(dict['password']).to be_a(Integer) + expect(dict['password']).to be < 100 + end + end + + describe '#add_word_list' do + it 'adds a custom dictionary' do + data.add_word_list('custom', %w[foo bar baz]) + expect(data.ranked_dictionaries).to have_key('custom') + end + + it 'ranks the custom dictionary' do + data.add_word_list('custom', %w[foo bar baz]) + dict = data.ranked_dictionaries['custom'] + expect(dict['foo']).to be_a(Integer) + expect(dict['bar']).to be_a(Integer) + expect(dict['baz']).to be_a(Integer) + end + + it 'builds a trie for the custom dictionary' do + data.add_word_list('custom', %w[foo bar baz]) + expect(data.dictionary_tries).to have_key('custom') + expect(data.dictionary_tries['custom']).to be_a(Zxcvbn::Trie) + end + + it 'makes custom words searchable via trie' do + data.add_word_list('custom', %w[test]) + trie = data.dictionary_tries['custom'] + results = trie.search_prefixes('testing', 0) + expect(results).not_to be_empty + expect(results.first[0]).to eq('test') + end + + it 'handles empty word lists' do + data.add_word_list('empty', []) + expect(data.ranked_dictionaries['empty']).to be_empty + end + end + + describe '#graph_stats' do + it 'has correct values for qwerty keyboard' do + stats = data.graph_stats['qwerty'] + expect(stats[:average_degree]).to be_within(0.01).of(4.6) + expect(stats[:starting_positions]).to eq(94) + end + + it 'has correct values for keypad' do + stats = data.graph_stats['keypad'] + expect(stats[:average_degree]).to be_within(0.01).of(5.07) + expect(stats[:starting_positions]).to eq(15) + end + end +end