Skip to content

Bugfix: Update for regex 20 lookbehind test. #1407

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 10 additions & 10 deletions include/cpp2regex.h
Original file line number Diff line number Diff line change
Expand Up @@ -261,10 +261,10 @@ template<typename CharT, typename Iter, int max_groups> [[nodiscard]] auto make_
#line 184 "cpp2regex.h2"
// Helpers for creating wrappers of the iterators.
//
template <typename Iter> [[nodiscard]] auto make_forward_iterator(Iter const& pos) -> auto;
template <typename Iter> [[nodiscard]] auto make_forward_iterator(std::reverse_iterator<Iter> const& pos) -> auto;
template <typename Iter> [[nodiscard]] auto make_reverse_iterator(Iter const& pos) -> auto;
template<typename Iter> [[nodiscard]] auto make_reverse_iterator(std::reverse_iterator<Iter> const& pos) -> auto;
template <typename Iter> [[nodiscard]] auto cpp2_make_forward_iterator(Iter const& pos) -> auto;
template<typename Iter> [[nodiscard]] auto cpp2_make_forward_iterator(std::reverse_iterator<Iter> const& pos) -> auto;
template <typename Iter> [[nodiscard]] auto cpp2_make_reverse_iterator(Iter const& pos) -> auto;
template<typename Iter> [[nodiscard]] auto cpp2_make_reverse_iterator(std::reverse_iterator<Iter> const& pos) -> auto;

#line 192 "cpp2regex.h2"
// End function that returns a valid match.
Expand Down Expand Up @@ -927,13 +927,13 @@ template<typename CharT, typename Iter, int max_groups> [[nodiscard]] auto make_
}

#line 186 "cpp2regex.h2"
template <typename Iter> [[nodiscard]] auto make_forward_iterator(Iter const& pos) -> auto { return pos; }
template <typename Iter> [[nodiscard]] auto cpp2_make_forward_iterator(Iter const& pos) -> auto { return pos; }
#line 187 "cpp2regex.h2"
template <typename Iter> [[nodiscard]] auto make_forward_iterator(std::reverse_iterator<Iter> const& pos) -> auto { return CPP2_UFCS(base)(pos); }
template<typename Iter> [[nodiscard]] auto cpp2_make_forward_iterator(std::reverse_iterator<Iter> const& pos) -> auto { return CPP2_UFCS(base)(pos); }
#line 188 "cpp2regex.h2"
template <typename Iter> [[nodiscard]] auto make_reverse_iterator(Iter const& pos) -> auto { return std::make_reverse_iterator(pos); }
template <typename Iter> [[nodiscard]] auto cpp2_make_reverse_iterator(Iter const& pos) -> auto { return std::make_reverse_iterator(pos); }
#line 189 "cpp2regex.h2"
template<typename Iter> [[nodiscard]] auto make_reverse_iterator(std::reverse_iterator<Iter> const& pos) -> auto { return pos; }
template<typename Iter> [[nodiscard]] auto cpp2_make_reverse_iterator(std::reverse_iterator<Iter> const& pos) -> auto { return pos; }

#line 196 "cpp2regex.h2"
[[nodiscard]] auto true_end_func::operator()(auto const& cur, auto& ctx) const& -> decltype(auto) { return ctx.pass(cur); }
Expand Down Expand Up @@ -1153,7 +1153,7 @@ template<typename CharT, bool match_new_line> [[nodiscard]] auto line_start_toke
#line 575 "cpp2regex.h2"
template<typename CharT, bool positive> [[nodiscard]] auto lookahead_token_matcher(auto const& cur, auto& ctx, auto const& func) -> bool
{
auto r {func(make_forward_iterator(cur), make_forward_match_context(ctx), true_end_func())};
auto r {func(cpp2_make_forward_iterator(cur), make_forward_match_context(ctx), true_end_func())};
if (!(positive)) {
r.matched = !(r.matched);
}
Expand All @@ -1164,7 +1164,7 @@ template<typename CharT, bool positive> [[nodiscard]] auto lookahead_token_match
#line 589 "cpp2regex.h2"
template<typename CharT, bool positive> [[nodiscard]] auto lookbehind_token_matcher(auto const& cur, auto& ctx, auto const& func) -> bool
{
auto r {func(make_reverse_iterator(cur), make_reverse_match_context(ctx), true_end_func())};
auto r {func(cpp2_make_reverse_iterator(cur), make_reverse_match_context(ctx), true_end_func())};
if (!(positive)) {
r.matched = !(r.matched);
}
Expand Down
12 changes: 6 additions & 6 deletions include/cpp2regex.h2
Original file line number Diff line number Diff line change
Expand Up @@ -183,10 +183,10 @@ make_reverse_match_context: <CharT, Iter, max_groups: int> (inout ctx: reverse_m

// Helpers for creating wrappers of the iterators.
//
make_forward_iterator: <Iter> (pos: Iter) -> _ = pos;
make_forward_iterator: <Iter> (pos: std::reverse_iterator<Iter>) -> _ = pos.base();
make_reverse_iterator: <Iter> (pos: Iter) -> _ = std::make_reverse_iterator(pos);
make_reverse_iterator: <Iter> (pos: std::reverse_iterator<Iter>) -> _ = pos;
cpp2_make_forward_iterator: <Iter> (pos: Iter) -> _ = pos;
cpp2_make_forward_iterator: <Iter> (pos: std::reverse_iterator<Iter>) -> _ = pos.base();
cpp2_make_reverse_iterator: <Iter> (pos: Iter) -> _ = std::make_reverse_iterator(pos);
cpp2_make_reverse_iterator: <Iter> (pos: std::reverse_iterator<Iter>) -> _ = pos;


// End function that returns a valid match.
Expand Down Expand Up @@ -574,7 +574,7 @@ line_start_token_matcher: <CharT, match_new_line: bool> (cur, inout ctx) -> bool
//
lookahead_token_matcher: <CharT, positive: bool> (cur, inout ctx, func) -> bool =
{
r := func(make_forward_iterator(cur), make_forward_match_context(ctx), true_end_func());
r := func(cpp2_make_forward_iterator(cur), make_forward_match_context(ctx), true_end_func());
if !positive {
r.matched = !r.matched;
}
Expand All @@ -588,7 +588,7 @@ lookahead_token_matcher: <CharT, positive: bool> (cur, inout ctx, func) -> bool
//
lookbehind_token_matcher: <CharT, positive: bool> (cur, inout ctx, func) -> bool =
{
r := func(make_reverse_iterator(cur), make_reverse_match_context(ctx), true_end_func());
r := func(cpp2_make_reverse_iterator(cur), make_reverse_match_context(ctx), true_end_func());
if !positive {
r.matched = !r.matched;
}
Expand Down
286 changes: 286 additions & 0 deletions regression-tests/pure2-regex_20_lookbehind.cpp2
Original file line number Diff line number Diff line change
@@ -0,0 +1,286 @@
create_result: (resultExpr: std::string, r) -> std::string = {
result: std::string = "";

get_next := :(iter) -> _ = {
start := std::distance(resultExpr&$*.cbegin(), iter);
firstDollar := resultExpr&$*.find("$", start);
firstAt := resultExpr&$*.find("@", start);

end := std::min(firstDollar, firstAt);
if end != std::string::npos {
return resultExpr&$*.cbegin() + end;
}
else {
return resultExpr&$*.cend();
}
};
extract_group_and_advance := :(inout iter) -> _ = {
start := iter;

while std::isdigit(iter*) next iter++ {}

return std::stoi(std::string(start, iter));
};
extract_until := :(inout iter, to: char) -> _ = {
start := iter;

while (to != iter*) next iter++ {} // TODO: Without bracket: error: postfix unary * (dereference) cannot be immediately followed by a (, identifier, or literal - add whitespace before * here if you meant binary * (multiplication)

return std::string(start, iter);
};

iter := resultExpr.begin();

while iter != resultExpr.end() {
next := get_next(iter);

if next != iter {
result += std::string(iter, next);
}
if next != resultExpr.end() {
if next* == '$' {
next++;

if next* == '&' {
next++;
result += r.group(0);
}
else if next* == '-' || next* == '+' {
is_start := next* == '-';
next++;
if next* == '{' {
next++; // Skip {
group := extract_until(next, '}');
next++; // Skip }
result += r.group(group);
}
else if next* == '[' {
next++; // Skip [
group := extract_group_and_advance(next);
next++; // Skip ]

if is_start {
result += std::to_string(r.group_start(group));
}
else {
result += std::to_string(r.group_end(group));
}
}
else {
// Return max group
result += r.group(r.group_number() - 1);
}
}
else if std::isdigit(next*) {
group := extract_group_and_advance(next);
result += r.group(group);
}
else {
std::cerr << "Not implemented";
}
}
else if next* == '@' {
next++;

if next* == '-' || next* == '+' {
i := 0;
while i < cpp2::unchecked_narrow<int>(r.group_number()) next i++ {
pos := 0;
if next* == '-' {
pos = r.group_start(i);
}
else {
pos = r.group_end(i);
}
result += std::to_string(pos);
}
next++;
}
else {
std::cerr << "Not implemented";
}
}
else {
std::cerr << "Not implemented.";
}
}
iter = next;
}

return result;
}

sanitize: (copy str: std::string) -> std::string =
{
str = cpp2::string_util::replace_all(str, "\a", "\\a");
str = cpp2::string_util::replace_all(str, "\f", "\\f");
str = cpp2::string_util::replace_all(str, "\x1b", "\\e");
str = cpp2::string_util::replace_all(str, "\n", "\\n");
str = cpp2::string_util::replace_all(str, "\r", "\\r");
str = cpp2::string_util::replace_all(str, "\t", "\\t");

return str;
}

test: <M> (regex: M, id: std::string, regex_str: std::string, str: std::string, kind: std::string, resultExpr: std::string,
resultExpected: std::string) = {

warning: std::string = "";
if regex.to_string() != regex_str {
warning = "Warning: Parsed regex does not match.";
}

status: std::string = "OK";

r := regex.search(str);

if "y" == kind || "yM" == kind || "yS" == kind || "yB" == kind {
if !r.matched {
status = "Failure: Regex should apply.";
}
else {
// Have a match check the result

result := create_result(resultExpr, r);

if result != resultExpected {
status = "Failure: Result is wrong. (is: (sanitize(result))$)";
}
}
}
else if "n" == kind {
if r.matched {
status = "Failure: Regex should not apply. Result is '(r.group(0))$'";
}
} else {
status = "Unknown kind '(kind)$'";
}

if !warning.empty() {
warning += " ";
}
std::cout << "(id)$_(kind)$: (status)$ (warning)$regex: (regex_str)$ parsed_regex: (regex.to_string())$ str: (sanitize(str))$ result_expr: (resultExpr)$ expected_results (sanitize(resultExpected))$" << std::endl;
}


test_tests_20_lookbehind: @regex type = {
regex_01 := R"((?<=a)b)";
regex_02 := R"((?<=af?)b)";
regex_03 := R"((?<=a)b)";
regex_04 := R"((?<=a(?:fo)?)b)";
regex_05 := R"((?<=a)b)";
regex_06 := R"((?<=a(?:foo)?)b)";
regex_07 := R"((?<!c)b)";
regex_08 := R"((?<!c(?:foob)?)b)";
regex_09 := R"((?<!c)b)";
regex_10 := R"((?<!c(?:fooba)?)b)";
regex_11 := R"((?<!c)b)";
regex_12 := R"((?<!c(?:foobar)?)b)";
regex_13 := R"((?<!c)b)";
regex_14 := R"((?<!c(?:foobarb)?)b)";
regex_15 := R"((?<![cd])b)";
regex_16 := R"((?<![cd]e{0,254})b)";
regex_17 := R"((?<![cd])[ab])";
regex_18 := R"((?<![cd]{1,2})[ab])";
regex_19 := R"((?<![cd]{1,3})[ab])";
regex_20 := R"((?<!(c|d))b)";
regex_21 := R"((?<!(c|d))[ab])";
regex_22 := R"((?<!cd)[ab])";
regex_23 := R"($(?<=^(a)))";
regex_24 := R"((.*)c)";
regex_25 := R"((.*)(?<=b))";
regex_26 := R"((.*)(?<=b)c)";
regex_27 := R"((.*)(?<=b|c))";
regex_28 := R"((.*)(?<=b|c)c)";
regex_29 := R"((.*)(?<=c|b))";
regex_30 := R"((.*)(?<=c|b)c)";
regex_31 := R"((.*)(?<=[bc]))";
regex_32 := R"((.*)(?<=[bc])c)";
regex_33 := R"((.*?)c)";
regex_34 := R"((.*?)(?<=b))";
regex_35 := R"((.*?)(?<=b)c)";
regex_36 := R"((.*?)(?<=b|c))";
regex_37 := R"((.*?)(?<=b|c)c)";
regex_38 := R"((.*?)(?<=c|b))";
regex_39 := R"((.*?)(?<=c|b)c)";
regex_40 := R"((.*?)(?<=[bc]))";
regex_41 := R"((.*?)(?<=[bc])c)";
regex_42 := R"((?<=foo))";
regex_43 := R"((?<=foo))";
regex_44 := R"(.*(?<=foo))";
regex_45 := R"(.*(?<=foo))";
regex_46 := R"((?<=foo)Y)";
regex_47 := R"(o(?<=foo)Y)";
regex_48 := R"(X(?<=foo.)[YZ])";
regex_49 := R"((?<=bar>)foo)";
regex_50 := R"((?<!bar>)foo)";
regex_51 := R"((?<=bar>ABC)foo)";
regex_52 := R"((?<!bar>ABC)foo)";
regex_53 := R"((?<=abcd(?<=(aaaabcd))))";
regex_54 := R"((?=xy(?<=(aaxy))))";
regex_55 := R"((?=xy(?<=(aaxyz?))))";
regex_56 := R"((?<=(?=(aaxy))aa))";
run: (this) = {
std::cout << "Running tests_20_lookbehind:"<< std::endl;
test(regex_01, "01", R"((?<=a)b)", "ab", "y", R"($&)", "b");
test(regex_02, "02", R"((?<=af?)b)", "ab", "y", R"($&)", "b");
test(regex_03, "03", R"((?<=a)b)", "cb", "n", R"(-)", "-");
test(regex_04, "04", R"((?<=a(?:fo)?)b)", "cb", "n", R"(-)", "-");
test(regex_05, "05", R"((?<=a)b)", "b", "n", R"(-)", "-");
test(regex_06, "06", R"((?<=a(?:foo)?)b)", "b", "n", R"(-)", "-");
test(regex_07, "07", R"((?<!c)b)", "ab", "y", R"($&)", "b");
test(regex_08, "08", R"((?<!c(?:foob)?)b)", "ab", "y", R"($&)", "b");
test(regex_09, "09", R"((?<!c)b)", "cb", "n", R"(-)", "-");
test(regex_10, "10", R"((?<!c(?:fooba)?)b)", "cb", "n", R"(-)", "-");
test(regex_11, "11", R"((?<!c)b)", "b", "y", R"(-)", "-");
test(regex_12, "12", R"((?<!c(?:foobar)?)b)", "b", "y", R"(-)", "-");
test(regex_13, "13", R"((?<!c)b)", "b", "y", R"($&)", "b");
test(regex_14, "14", R"((?<!c(?:foobarb)?)b)", "b", "y", R"($&)", "b");
test(regex_15, "15", R"((?<![cd])b)", "dbcb", "n", R"(-)", "-");
test(regex_16, "16", R"((?<![cd]e{0,254})b)", "dbcb", "n", R"(-)", "-");
test(regex_17, "17", R"((?<![cd])[ab])", "dbaacb", "y", R"($&)", "a");
test(regex_18, "18", R"((?<![cd]{1,2})[ab])", "dbaacb", "y", R"($&)", "a");
test(regex_19, "19", R"((?<![cd]{1,3})[ab])", "dbaacb", "y", R"($&)", "a");
test(regex_20, "20", R"((?<!(c|d))b)", "dbcb", "n", R"(-)", "-");
test(regex_21, "21", R"((?<!(c|d))[ab])", "dbaacb", "y", R"($&)", "a");
test(regex_22, "22", R"((?<!cd)[ab])", "cdaccb", "y", R"($&)", "b");
test(regex_23, "23", R"($(?<=^(a)))", "a", "y", R"($1)", "a");
test(regex_24, "24", R"((.*)c)", "abcd", "y", R"($1)", "ab");
test(regex_25, "25", R"((.*)(?<=b))", "abcd", "y", R"($1)", "ab");
test(regex_26, "26", R"((.*)(?<=b)c)", "abcd", "y", R"($1)", "ab");
test(regex_27, "27", R"((.*)(?<=b|c))", "abcd", "y", R"($1)", "abc");
test(regex_28, "28", R"((.*)(?<=b|c)c)", "abcd", "y", R"($1)", "ab");
test(regex_29, "29", R"((.*)(?<=c|b))", "abcd", "y", R"($1)", "abc");
test(regex_30, "30", R"((.*)(?<=c|b)c)", "abcd", "y", R"($1)", "ab");
test(regex_31, "31", R"((.*)(?<=[bc]))", "abcd", "y", R"($1)", "abc");
test(regex_32, "32", R"((.*)(?<=[bc])c)", "abcd", "y", R"($1)", "ab");
test(regex_33, "33", R"((.*?)c)", "abcd", "y", R"($1)", "ab");
test(regex_34, "34", R"((.*?)(?<=b))", "abcd", "y", R"($1)", "ab");
test(regex_35, "35", R"((.*?)(?<=b)c)", "abcd", "y", R"($1)", "ab");
test(regex_36, "36", R"((.*?)(?<=b|c))", "abcd", "y", R"($1)", "ab");
test(regex_37, "37", R"((.*?)(?<=b|c)c)", "abcd", "y", R"($1)", "ab");
test(regex_38, "38", R"((.*?)(?<=c|b))", "abcd", "y", R"($1)", "ab");
test(regex_39, "39", R"((.*?)(?<=c|b)c)", "abcd", "y", R"($1)", "ab");
test(regex_40, "40", R"((.*?)(?<=[bc]))", "abcd", "y", R"($1)", "ab");
test(regex_41, "41", R"((.*?)(?<=[bc])c)", "abcd", "y", R"($1)", "ab");
test(regex_42, "42", R"((?<=foo))", "foo", "y", R"(@+)", "3");
test(regex_43, "43", R"((?<=foo))", "XfooY", "y", R"(@+)", "4");
test(regex_44, "44", R"(.*(?<=foo))", "foo", "y", R"(@+)", "3");
test(regex_45, "45", R"(.*(?<=foo))", "XfooY", "y", R"(@+)", "4");
test(regex_46, "46", R"((?<=foo)Y)", "XfooY", "y", R"(@+)", "5");
test(regex_47, "47", R"(o(?<=foo)Y)", "..XfooY..", "y", R"(@+)", "7");
test(regex_48, "48", R"(X(?<=foo.)[YZ])", "..XfooXY..", "y", R"(@+)", "8");
test(regex_49, "49", R"((?<=bar>)foo)", "bar>foo", "y", R"($&)", "foo");
test(regex_50, "50", R"((?<!bar>)foo)", "bar>foo", "n", R"(-)", "-");
test(regex_51, "51", R"((?<=bar>ABC)foo)", "bar>ABCfoo", "y", R"($&)", "foo");
test(regex_52, "52", R"((?<!bar>ABC)foo)", "bar>ABCfoo", "n", R"(-)", "-");
test(regex_53, "53", R"((?<=abcd(?<=(aaaabcd))))", "..aaaabcd..", "y", R"($1)", "aaaabcd");
test(regex_54, "54", R"((?=xy(?<=(aaxy))))", "..aaxy..", "y", R"($1)", "aaxy");
test(regex_55, "55", R"((?=xy(?<=(aaxyz?))))", "..aaxy..", "y", R"($1)", "aaxy");
test(regex_56, "56", R"((?<=(?=(aaxy))aa))", "..aaxy..", "y", R"($1)", "aaxy");
std::cout << std::endl;
}
}
main: () = {
test_tests_20_lookbehind().run();
}
Loading
Loading