From 48ccf3e36bed0102b9561d93b5dd6b43394fdcf9 Mon Sep 17 00:00:00 2001 From: Lukas Mai Date: Fri, 11 Jul 2025 09:07:00 +0200 Subject: [PATCH 1/3] newFOROP: fix crash when optimizing 2-var for over builtin::indexed OP_ENTERSUB isn't necessarily a LISTOP, apparently, so we can't just grab its op_last. Instead, copy/paste logic from elsewhere in op.c to find the cvop. Also, avoid crashing on "fake" pad entries that represent lexical subs from outer scopes by climbing up the scope chain until we reach a real pad entry. Fixes #23405. --- op.c | 13 ++++++++++--- t/op/for-many.t | 13 +++++++++++++ 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/op.c b/op.c index d7aaca7fe73c..96f0055cd7c2 100644 --- a/op.c +++ b/op.c @@ -9671,7 +9671,7 @@ S_op_is_cv_xsub(pTHX_ OP *o, XSUBADDR_t xsub) } case OP_PADCV: - cv = (CV *)PAD_SVl(o->op_targ); + cv = find_lexical_cv(o->op_targ); assert(cv && SvTYPE(cv) == SVt_PVCV); break; @@ -9689,10 +9689,17 @@ S_op_is_cv_xsub(pTHX_ OP *o, XSUBADDR_t xsub) static bool S_op_is_call_to_cv_xsub(pTHX_ OP *o, XSUBADDR_t xsub) { - if(o->op_type != OP_ENTERSUB) + if (o->op_type != OP_ENTERSUB) return false; - OP *cvop = cLISTOPx(cUNOPo->op_first)->op_last; + OP *aop = cUNOPo->op_first; + if (!OpHAS_SIBLING(aop)) { + aop = cUNOPx(aop)->op_first; + } + aop = OpSIBLING(aop); + OP *cvop; + for (cvop = aop; OpHAS_SIBLING(cvop); cvop = OpSIBLING(cvop)) ; + return op_is_cv_xsub(cvop, xsub); } diff --git a/t/op/for-many.t b/t/op/for-many.t index 2f6790aee775..035d1da07e91 100644 --- a/t/op/for-many.t +++ b/t/op/for-many.t @@ -498,4 +498,17 @@ is($continue, 'xx', 'continue reached twice'); is("@have", "Pointy end Up Flamey end Down", 'for my ($one, $two)'); } +# GH #23405 - segfaults when compiling 2-var for loops +{ + my $dummy = sub {}; + for my ($x, $y) (main->$dummy) {} + pass '2-var for does not crash on method calls'; + + my sub dummy {} + sub { + for my ($x, $y) (dummy) {} + }->(); + pass '2-var for does not crash on lexical sub calls'; +} + done_testing(); From f5932f871348c73f919ddfd1721ae15c35668547 Mon Sep 17 00:00:00 2001 From: Lukas Mai Date: Thu, 17 Jul 2025 06:56:53 +0200 Subject: [PATCH 2/3] perf/opcount.t: ensure imported builtins are optimized When I saw (in #23429) that use builtin qw(indexed); sub { for my ($x, $y) (indexed) {} } crashes, but sub { for my ($x, $y) (builtin::indexed) {} } works fine, I realized that optrees for calls to lexically imported subs look different from calls to package subs. This commit make sure both call variants are optimized to direct ops. --- t/perf/opcount.t | 129 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 124 insertions(+), 5 deletions(-) diff --git a/t/perf/opcount.t b/t/perf/opcount.t index d3863690323e..a48db773141c 100644 --- a/t/perf/opcount.t +++ b/t/perf/opcount.t @@ -698,6 +698,21 @@ test_opcount(0, "multiconcat: local assign", # builtin:: function calls should be replaced with efficient op implementations no warnings 'experimental::builtin'; +use builtin qw( + blessed + ceil + false + floor + indexed + is_bool + is_tainted + is_weak + refaddr + reftype + true + unweaken + weaken +); test_opcount(0, "builtin::true/false are replaced with constants", sub { my $x = builtin::true(); my $y = builtin::false() }, @@ -706,6 +721,13 @@ test_opcount(0, "builtin::true/false are replaced with constants", const => 2, }); +test_opcount(0, "imported true/false are replaced with constants", + sub { my $x = true(); my $y = false() }, + { + entersub => 0, + const => 2, + }); + test_opcount(0, "builtin::is_bool is replaced with direct opcode", sub { my $x; my $y; $y = builtin::is_bool($x); 1; }, { @@ -715,6 +737,15 @@ test_opcount(0, "builtin::is_bool is replaced with direct opcode", padsv_store => 1, }); +test_opcount(0, "imported is_bool is replaced with direct opcode", + sub { my $x; my $y; $y = is_bool($x); 1; }, + { + entersub => 0, + is_bool => 1, + padsv => 3, + padsv_store => 1, + }); + test_opcount(0, "builtin::is_bool gets constant-folded", sub { builtin::is_bool(123); }, { @@ -723,6 +754,14 @@ test_opcount(0, "builtin::is_bool gets constant-folded", const => 1, }); +test_opcount(0, "imported is_bool gets constant-folded", + sub { is_bool(123); }, + { + entersub => 0, + is_bool => 0, + const => 1, + }); + test_opcount(0, "builtin::weaken is replaced with direct opcode", sub { my $x = []; builtin::weaken($x); }, { @@ -730,6 +769,13 @@ test_opcount(0, "builtin::weaken is replaced with direct opcode", weaken => 1, }); +test_opcount(0, "imported weaken is replaced with direct opcode", + sub { my $x = []; weaken($x); }, + { + entersub => 0, + weaken => 1, + }); + test_opcount(0, "builtin::unweaken is replaced with direct opcode", sub { my $x = []; builtin::unweaken($x); }, { @@ -737,6 +783,13 @@ test_opcount(0, "builtin::unweaken is replaced with direct opcode", unweaken => 1, }); +test_opcount(0, "imported unweaken is replaced with direct opcode", + sub { my $x = []; unweaken($x); }, + { + entersub => 0, + unweaken => 1, + }); + test_opcount(0, "builtin::is_weak is replaced with direct opcode", sub { builtin::is_weak([]); }, { @@ -744,6 +797,13 @@ test_opcount(0, "builtin::is_weak is replaced with direct opcode", is_weak => 1, }); +test_opcount(0, "imported is_weak is replaced with direct opcode", + sub { is_weak([]); }, + { + entersub => 0, + is_weak => 1, + }); + test_opcount(0, "builtin::blessed is replaced with direct opcode", sub { builtin::blessed([]); }, { @@ -751,6 +811,13 @@ test_opcount(0, "builtin::blessed is replaced with direct opcode", blessed => 1, }); +test_opcount(0, "imported blessed is replaced with direct opcode", + sub { blessed([]); }, + { + entersub => 0, + blessed => 1, + }); + test_opcount(0, "builtin::refaddr is replaced with direct opcode", sub { builtin::refaddr([]); }, { @@ -758,6 +825,13 @@ test_opcount(0, "builtin::refaddr is replaced with direct opcode", refaddr => 1, }); +test_opcount(0, "imported refaddr is replaced with direct opcode", + sub { refaddr([]); }, + { + entersub => 0, + refaddr => 1, + }); + test_opcount(0, "builtin::reftype is replaced with direct opcode", sub { builtin::reftype([]); }, { @@ -765,6 +839,13 @@ test_opcount(0, "builtin::reftype is replaced with direct opcode", reftype => 1, }); +test_opcount(0, "imported reftype is replaced with direct opcode", + sub { reftype([]); }, + { + entersub => 0, + reftype => 1, + }); + my $one_point_five = 1.5; # Prevent const-folding. test_opcount(0, "builtin::ceil is replaced with direct opcode", sub { builtin::ceil($one_point_five); }, @@ -773,6 +854,13 @@ test_opcount(0, "builtin::ceil is replaced with direct opcode", ceil => 1, }); +test_opcount(0, "imported ceil is replaced with direct opcode", + sub { ceil($one_point_five); }, + { + entersub => 0, + ceil => 1, + }); + test_opcount(0, "builtin::floor is replaced with direct opcode", sub { builtin::floor($one_point_five); }, { @@ -780,6 +868,13 @@ test_opcount(0, "builtin::floor is replaced with direct opcode", floor => 1, }); +test_opcount(0, "imported floor is replaced with direct opcode", + sub { floor($one_point_five); }, + { + entersub => 0, + floor => 1, + }); + test_opcount(0, "builtin::is_tainted is replaced with direct opcode", sub { builtin::is_tainted($0); }, { @@ -787,6 +882,13 @@ test_opcount(0, "builtin::is_tainted is replaced with direct opcode", is_tainted => 1, }); +test_opcount(0, "imported is_tainted is replaced with direct opcode", + sub { is_tainted($0); }, + { + entersub => 0, + is_tainted => 1, + }); + # void sassign + padsv combinations are replaced by padsv_store test_opcount(0, "sassign + padsv replaced by padsv_store", sub { my $y; my $z = $y = 3; 1; }, @@ -1014,18 +1116,35 @@ test_opcount(0, "Empty anonhash ref and direct lexical assignment", test_opcount(0, "foreach 2 lexicals on builtin::indexed ARRAY", sub { my @input = (); foreach my ($i, $x) (builtin::indexed @input) { } }, { - entersub => 0, # no call to builtin::indexed + entersub => 0, # no call to builtin::indexed enteriter => 1, - iter => 1, - padav => 2, + iter => 1, + padav => 2, + }); + +test_opcount(0, "foreach 2 lexicals on imported indexed ARRAY", + sub { my @input = (); foreach my ($i, $x) (indexed @input) { } }, + { + entersub => 0, # no call to builtin::indexed + enteriter => 1, + iter => 1, + padav => 2, }); test_opcount(0, "foreach 2 lexicals on builtin::indexed LIST", sub { foreach my ($i, $x) (builtin::indexed qw( x y z )) { } }, { - entersub => 0, # no call to builtin::indexed + entersub => 0, # no call to builtin::indexed + enteriter => 1, + iter => 1, + }); + +test_opcount(0, "foreach 2 lexicals on imported indexed LIST", + sub { foreach my ($i, $x) (indexed qw( x y z )) { } }, + { + entersub => 0, # no call to builtin::indexed enteriter => 1, - iter => 1, + iter => 1, }); # substr with const zero offset and "" replacements From 8eda65ecb175ff279ccc52d5fd0ca7c43f83c6be Mon Sep 17 00:00:00 2001 From: Lukas Mai Date: Sun, 13 Jul 2025 00:04:12 +0200 Subject: [PATCH 3/3] perldelta for the 2-var 'for' crash (#23405) --- pod/perldelta.pod | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/pod/perldelta.pod b/pod/perldelta.pod index 100d563284e5..787a624b2ea8 100644 --- a/pod/perldelta.pod +++ b/pod/perldelta.pod @@ -374,7 +374,29 @@ manager will later use a regex to expand these into links. =item * -XXX +Certain constructs involving a two-variable C loop would crash the perl +compiler in v5.42.0: + + # Two-variable for loop over a list returned from a method call: + for my ($x, $y) (Some::Class->foo()) { ... } + for my ($x, $y) ($object->foo()) { ... } + +and + + # Two-variable for loop over a list returned from a call to a + # lexical(ly imported) subroutine, all inside a lexically scoped + # or anonymous subroutine: + my sub foo { ... } + my $fn = sub { + for my ($x, $y) (foo()) { ... } + }; + + use builtin qw(indexed); # lexical import! + my sub bar { + for my ($x, $y) (indexed(...)) { ... } + } + +These have been fixed. [GH #23405] =item *