Skip to content
This repository was archived by the owner on Jun 1, 2023. It is now read-only.

Commit 7fd83e1

Browse files
committed
Even less m/{}/ Unescaped warnings and errors
Allow ({...}) :{4,a} xa{3\,4}y \b<GCB} Only ({...}) will be magic, the rest is literal.
1 parent 17e2b8d commit 7fd83e1

File tree

2 files changed

+37
-33
lines changed

2 files changed

+37
-33
lines changed

regcomp.c

Lines changed: 25 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -10680,7 +10680,7 @@ S_handle_named_backref(pTHX_ RExC_state_t *pRExC_state,
1068010680
upgraded to UTF-8. Otherwise would only return NULL if regbranch() returns
1068110681
NULL, which cannot happen. */
1068210682
STATIC regnode *
10683-
S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
10683+
S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
1068410684
/* paren: Parenthesized? 0=top; 1,2=inside '(': changed to letter.
1068510685
* 2 is like 1, but indicates that nextchar() has been called to advance
1068610686
* RExC_parse beyond the '('. Things like '(?' are indivisible tokens, and
@@ -11635,7 +11635,11 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
1163511635
}
1163611636
else {
1163711637
/* [cperl #362] */
11638-
if (*RExC_parse == '{' && PASS2 && strpbrk(RExC_parse-1, "NpPbBxog") == RExC_parse-1) {
11638+
if (*RExC_parse == '{' && PASS2
11639+
&& RExC_parse - parse_start >= 2
11640+
&& UCHARAT(RExC_parse-2) == '\\'
11641+
&& strpbrk(RExC_parse-1, "NpPbBxog") == RExC_parse-1)
11642+
{
1163911643
ckWARNregdep(RExC_parse + 1,
1164011644
"Unescaped left brace in regex is "
1164111645
"deprecated here (and will be fatal "
@@ -13280,7 +13284,9 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
1328013284
finish_meta_pat:
1328113285
if ( UCHARAT(RExC_parse + 1) == '{'
1328213286
&& UNLIKELY(! new_regcurly(RExC_parse + 1, RExC_end))
13283-
&& strpbrk(RExC_parse-1, "NpPbBxog") == RExC_parse-1 /* [cperl #362] */
13287+
&& RExC_parse - parse_start >= 1
13288+
&& UCHARAT(RExC_parse-1) == '\\'
13289+
&& strpbrk(RExC_parse, "NpPbBxog") == RExC_parse /* [cperl #362] */
1328413290
)
1328513291
{
1328613292
RExC_parse += 2;
@@ -13856,13 +13862,14 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
1385613862
} /* End of switch on '\' */
1385713863
break;
1385813864
case '{':
13859-
/* Currently we allow an lbrace at the start of a construct
13860-
* without raising a warning.
13861-
* cperl allows all chars besides \NpPbBxog{} [cperl #362]
13862-
* This is because we think we will never want such a brace to be
13863-
* meant to be other than taken literally. */
13864-
if (len || (p > RExC_start && strpbrk(p-1, "NpPbBxog") == p-1)) {
13865-
13865+
/* cperl allows an lbrace and rbrace without raising a warning.
13866+
* cperl special-cases only \NpPbBxog{} [cperl #362], later ({...}),
13867+
* all other cases are legal. */
13868+
#ifndef USE_CPERL
13869+
if (!(p - RExC_parse > 1
13870+
&& UCHARAT(p-2) == '\\'
13871+
&& strpbrk(p-1, "NpPbBxog") == p-1))
13872+
{
1386613873
/* But, we raise a fatal warning otherwise, as the
1386713874
* deprecation cycle has come and gone. Except that it
1386813875
* turns out that some heavily-relied on upstream
@@ -13871,25 +13878,20 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
1387113878
* we anticipate using the '{' for something else.
1387213879
* This happens after any alpha, and for a looser {m,n}
1387313880
* quantifier specification */
13874-
if ( RExC_strict
13875-
|| ( p > parse_start + 1
13876-
&& strpbrk(p-1, "NpPbBxog") == p-1
13877-
&& *(p - 2) == '\\')
13878-
|| new_regcurly(p, RExC_end))
13879-
{
13881+
if (len && (RExC_strict || new_regcurly(p, RExC_end))) {
1388013882
RExC_parse = p + 1;
13881-
vFAIL("Unescaped left brace in regex is "
13882-
"illegal here");
13883+
vFAIL("Unescaped left brace in regex is illegal here");
1388313884
}
1388413885
if (PASS2) {
1388513886
ckWARNregdep(p + 1,
1388613887
"Unescaped left brace in regex is "
1388713888
"deprecated here (and will be fatal "
1388813889
"in Perl 5.30), passed through");
13889-
}
13890+
}
1389013891
}
13892+
#endif
1389113893
goto normal_default;
13892-
case '}':
13894+
/* case '}': */
1389313895
case ']':
1389413896
if (PASS2 && p > RExC_parse && RExC_strict) {
1389513897
ckWARN2reg(p + 1, "Unescaped literal '%c'", *p);
@@ -14393,11 +14395,13 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
1439314395

1439414396
/* Position parse to next real character */
1439514397
skip_to_be_ignored_text(pRExC_state, &RExC_parse,
14396-
FALSE /* Don't force to /x */ );
14398+
FALSE /* Don't force to /x */ );
1439714399
if (PASS2
1439814400
&& *RExC_parse == '{'
1439914401
&& OP(ret) != SBOL
1440014402
&& ! regcurly(RExC_parse)
14403+
&& RExC_parse - parse_start >= 2
14404+
&& UCHARAT(RExC_parse-2) == '\\'
1440114405
&& strpbrk(RExC_parse-1, "NpPbBxog") == RExC_parse-1) /* [cperl #362] */
1440214406
{
1440314407
ckWARNregdep(RExC_parse + 1, "Unescaped left brace in regex is deprecated here (and will be fatal in Perl 5.30), passed through");

t/re/reg_mesg.t

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -397,12 +397,6 @@ my @death_only_under_strict = (
397397
=> 'False [] range "[:digit:]-" {#} m/[[:digit:]-{#}[:alpha:]]\x{100}/',
398398
'/[a\zb]\x{100}/' => 'Unrecognized escape \z in character class passed through {#} m/[a\z{#}b]\x{100}/',
399399
=> 'Unrecognized escape \z in character class {#} m/[a\z{#}b]\x{100}/',
400-
'default_on/:{4,a}/' => 'Unescaped left brace in regex is deprecated here (and will be fatal in Perl 5.30), passed through {#} m/:{{#}4,a}/',
401-
=> 'Unescaped left brace in regex is illegal here {#} m/:{{#}4,a}/',
402-
'default_on/xa{3\,4}y/' => 'Unescaped left brace in regex is deprecated here (and will be fatal in Perl 5.30), passed through {#} m/xa{{#}3\,4}y/',
403-
=> 'Unescaped left brace in regex is illegal here {#} m/xa{{#}3\,4}y/',
404-
'default_on/\\${[^\\}]*}/' => 'Unescaped left brace in regex is deprecated here (and will be fatal in Perl 5.30), passed through {#} m/\\${{#}[^\\}]*}/',
405-
=> 'Unescaped left brace in regex is illegal here {#} m/\\${{#}[^\\}]*}/',
406400
'/[a b]/' => "",
407401
=> 'Literal vertical space in [] is illegal except under /x {#} m/[a {#}b]/',
408402
);
@@ -610,15 +604,21 @@ my @warning = (
610604
'Assuming NOT a POSIX class since a semi-colon was found instead of a colon {#} m/[foo;{#}punct;]]\x{100}/',
611605
'Assuming NOT a POSIX class since a semi-colon was found instead of a colon {#} m/[foo;punct;]{#}]\x{100}/',
612606
],
613-
'/[][[:alpha:]]/' => "", # [perl #127581]
614-
'/[][[:alpha:]\\@\\\\^_?]/' => "", # [perl #131522]
607+
'/[][[:alpha:]]/' => "", # [perl #127581]
608+
'/[][[:alpha:]\\@\\\\^_?]/' => "", # [perl #131522]
615609
'/(?[[:w:]])/' => "",
616610
'/([.].*)[.]/' => "", # [perl #127582]
617611
'/[.].*[.]/' => "", # [perl #127604]
618-
'/abc/xix' => "",
612+
'/abc/xix' => "",
619613
'/(?xmsixp:abc)/' => "",
620-
'/(?xmsixp)abc/' => "",
621-
'/(?xxxx:abc)/' => "",
614+
'/(?xmsixp)abc/' => "",
615+
'/(?xxxx:abc)/' => "",
616+
#cperl-only #362
617+
'/:{4,a}/' => "",
618+
'/xa{3\,4}y/' => "",
619+
#'/\\${[^\\}]*}/' => "",
620+
'/({...})/' => "",
621+
'/\b<GCB}/' => '', # was: 'Unescaped literal \'}\' {#} m/\b<GCB}{#}/',
622622

623623
); # See comments before this for why '\x{100}' is generally needed
624624

@@ -662,7 +662,7 @@ my @warning_only_under_strict = (
662662
"/[A-$B_hex]/" => "Ranges of ASCII printables should be some subset of \"0-9\", \"A-Z\", or \"a-z\" {#} m/[A-$B_hex\{#}]/",
663663
"/[$low_mixed_alpha-$high_mixed_alpha]/" => "Ranges of ASCII printables should be some subset of \"0-9\", \"A-Z\", or \"a-z\" {#} m/[$low_mixed_alpha-$high_mixed_alpha\{#}]/",
664664
"/[$low_mixed_digit-$high_mixed_digit]/" => "Ranges of ASCII printables should be some subset of \"0-9\", \"A-Z\", or \"a-z\" {#} m/[$low_mixed_digit-$high_mixed_digit\{#}]/",
665-
'/\b<GCB}/' => 'Unescaped literal \'}\' {#} m/\b<GCB}{#}/',
665+
#'/\b<GCB}/' => 'Unescaped literal \'}\' {#} m/\b<GCB}{#}/',
666666
'/[ ]def]/' => 'Unescaped literal \']\' {#} m/[ ]def]{#}/',
667667
);
668668

0 commit comments

Comments
 (0)