Skip to content

Commit b2d48c4

Browse files
committed
Add option to pv_uni_display for better tr/// output
tr// has a special malformed UTF-8 character as a sentinel; Teach pv_uni_display about that.
1 parent bdc8044 commit b2d48c4

File tree

2 files changed

+17
-0
lines changed

2 files changed

+17
-0
lines changed

utf8.c

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4748,7 +4748,13 @@ See also L</sv_uni_display>.
47484748
=for apidoc Amnh||UNI_DISPLAY_QQ
47494749
=for apidoc Amnh||UNI_DISPLAY_REGEX
47504750
=cut
4751+
4752+
Undocumented is UNI_DISPLAY_TR_ which is used internally to display an operand
4753+
of the tr/// operation. These operands have a peculiar, deliberate UTF-8
4754+
malformation which this flag enables the proper handling of. It turns on
4755+
ISPRINT and BACKSLASH as well.
47514756
*/
4757+
47524758
char *
47534759
Perl_pv_uni_display(pTHX_ SV *dsv, const U8 *spv, STRLEN len, STRLEN pvlim,
47544760
UV flags)
@@ -4770,6 +4776,14 @@ Perl_pv_uni_display(pTHX_ SV *dsv, const U8 *spv, STRLEN len, STRLEN pvlim,
47704776
break;
47714777
}
47724778

4779+
/* The minus is unambiguously the range indicator within a UTF-8 tr///
4780+
* operand */
4781+
if (UNLIKELY(flags & UNI_DISPLAY_TR_ && *s == ILLEGAL_UTF8_BYTE)) {
4782+
sv_catpvs(dsv, "-");
4783+
next_len = 1;
4784+
continue;
4785+
}
4786+
47734787
u = utf8_to_uvchr_buf(s, e, &next_len);
47744788
assert(next_len > 0);
47754789

utf8.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1318,6 +1318,9 @@ point's representation.
13181318
#define UNI_DISPLAY_BACKSLASH 0x0002
13191319
#define UNI_DISPLAY_BACKSPACE 0x0004 /* Allow \b when also
13201320
UNI_DISPLAY_BACKSLASH */
1321+
#define UNI_DISPLAY_TR_ ( 0x0008 \
1322+
|UNI_DISPLAY_ISPRINT \
1323+
|UNI_DISPLAY_BACKSLASH)
13211324
#define UNI_DISPLAY_QQ (UNI_DISPLAY_ISPRINT \
13221325
|UNI_DISPLAY_BACKSLASH \
13231326
|UNI_DISPLAY_BACKSPACE)

0 commit comments

Comments
 (0)