-
Notifications
You must be signed in to change notification settings - Fork 488
Closed
Description
"\b" represents word boundaries in regex. But it seems to behave differently in different implementations:
In regex crate, which "can't" was separated
fn main() {
let s = "The quick (\"brown\") fox can't jump 32.3 feet, right?";
let re = Regex::new(r"\b").unwrap();
let res = re.split(s).collect::<Vec<&str>>();
// ["", "The", " ", "quick", " (\"", "brown", "\") ", "fox", " ", "can", "'", "t", " ", "jump", " ", "32", ".", "3", " ", "feet", ", ", "right", "?"]
println!("{res:?}");
}in swift 5.7
let s = "The quick (\"brown\") fox can't jump 32.3 feet, right?"
let words = s.split(separator: /\b/)
// ["The", " ", "quick", " ", "(", "\"", "brown", "\"", ")", " ", "fox", " ", "can\'t", " ", "jump", " ", "32.3", " ", "feet", ",", " ", "right", "?"]
print(words)
// In unicode-segmentation crate, it's same as regex in swift, according to the [Unicode Standard Annex #29(http://www.unicode.org/reports/tr29/) rules.
fn main() {
let s = "The quick (\"brown\") fox can't jump 32.3 feet, right?";
let res = s.split_word_bounds().collect::<Vec<&str>>();
// ["The", " ", "quick", " ", "(", "\"", "brown", "\"", ")", " ", "fox", " ", "can't", " ", "jump", " ", "32.3", " ", "feet", ",", " ", "right", "?"]
println!("{res:?}");
}Metadata
Metadata
Assignees
Labels
No labels