From 9017a9299f634f11ac09b01ef451825df1e7a2a3 Mon Sep 17 00:00:00 2001 From: Dmytro Horoshko Date: Fri, 25 Apr 2025 13:13:39 +0300 Subject: [PATCH 1/3] Use the same scan capture group behaviour as in ruby --- ext/rust_regexp/src/lib.rs | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/ext/rust_regexp/src/lib.rs b/ext/rust_regexp/src/lib.rs index a3e23f6..f95d5b4 100644 --- a/ext/rust_regexp/src/lib.rs +++ b/ext/rust_regexp/src/lib.rs @@ -33,6 +33,7 @@ impl RustRegexp { let regex = &self.0; let haystack = unsafe { haystack.as_slice() }; + // no capture groups defined except the default one if regex.captures_len() == 1 { // speed optimization, `.find` is faster than `.captures` if let Some(capture) = regex.find(haystack) { @@ -65,17 +66,12 @@ impl RustRegexp { let regex = &self.0; let haystack = unsafe { haystack.as_slice() }; + // no capture groups defined except the default one if regex.captures_len() == 1 { // speed optimization, `.find_iter` is faster than `.captures_iter` for capture in regex.find_iter(haystack) { - let group = RArray::with_capacity(1); - - group - .push(Self::capture_to_ruby_string(&capture)) - .expect("Non-frozen array"); - result - .push(group) + .push(Self::capture_to_ruby_string(&capture)) .expect("Non-frozen array"); } } else { From b627b6818290b91f13d9ffcce75b88c6009c4960 Mon Sep 17 00:00:00 2001 From: Dmytro Horoshko Date: Fri, 25 Apr 2025 13:13:51 +0300 Subject: [PATCH 2/3] Update specs --- spec/rust_regexp_spec.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec/rust_regexp_spec.rb b/spec/rust_regexp_spec.rb index 71ee2c1..d1d5d5f 100644 --- a/spec/rust_regexp_spec.rb +++ b/spec/rust_regexp_spec.rb @@ -33,7 +33,7 @@ describe "#scan" do examples = [ - ['\w+:\d+', "ruby:123, rust:456", [["ruby:123"], ["rust:456"]]], + ['\w+:\d+', "ruby:123, rust:456", ["ruby:123", "rust:456"]], ['(\w+):(\d+)', 'ruby:123, rust:456', [["ruby", "123"], ["rust", "456"]]], ['(\w+):(\d+)', '123', []], ] From 41196df5ce94d24783032c8f9d580a4eead45021 Mon Sep 17 00:00:00 2001 From: Dmytro Horoshko Date: Fri, 25 Apr 2025 13:16:22 +0300 Subject: [PATCH 3/3] Update README --- README.md | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index f8342e8..40fa903 100644 --- a/README.md +++ b/README.md @@ -30,7 +30,7 @@ require "rust_regexp" Regular expressions should be pre-compiled before use: ```ruby -re = RustRegexp.new('(\w+):(\d+)') +re = RustRegexp.new('p.t{2}ern*') # => # ``` @@ -41,31 +41,37 @@ re = RustRegexp.new('(\w+):(\d+)') To find a single match in the haystack: ```ruby -re.match("ruby:123, rust:456") +RustRegexp.new('\w+:\d+').match("ruby:123, rust:456") +# => ["ruby:123"] + +RustRegexp.new('(\w+):(\d+)').match("ruby:123, rust:456") # => ["ruby", "123"] ``` To find all matches in the haystack: ```ruby -re.scan("ruby:123, rust:456") +RustRegexp.new('\w+:\d+').scan("ruby:123, rust:456") +# => ["ruby:123", "rust:456"] + +RustRegexp.new('(\w+):(\d+)').scan("ruby:123, rust:456") # => [["ruby", "123"], ["rust", "456"]] ``` To check whether there is at least one match in the haystack: ```ruby -re.match?("ruby:123") +RustRegexp.new('\w+:\d+').match?("ruby:123") # => true -re.match?("ruby") +RustRegexp.new('\w+:\d+').match?("ruby") # => false ``` Inspect original pattern: ```ruby -re.pattern +RustRegexp.new('\w+:\d+').pattern # => "(\\w+):(\\d+)" ``` @@ -94,18 +100,18 @@ set.match("ghidefabc") # => [0, 1, 2] To check whether at least one pattern from the set matches the haystack: ```ruby -set.match?("abc") +RustRegexp::Set.new(["abc", "def"]).match?("abc") # => true -set.match?("123") +RustRegexp::Set.new(["abc", "def"]).match?("123") # => false ``` Inspect original patterns: ```ruby -set.patterns -# => ["abc", "def", "ghi", "xyz"] +RustRegexp::Set.new(["abc", "def"]).patterns +# => ["abc", "def"] ``` ## Development