@@ -2564,22 +2564,13 @@ fn num_decimal_digits(num: usize) -> usize {
25642564
25652565// We replace some characters so the CLI output is always consistent and underlines aligned.
25662566// Keep the following list in sync with `rustc_span::char_width`.
2567+ // ATTENTION: keep lexicografically sorted so that the binary search will work
25672568const OUTPUT_REPLACEMENTS : & [ ( char , & str ) ] = & [
2568- ( '\t' , " " ) , // We do our own tab replacement
2569- ( '\u{200D}' , "" ) , // Replace ZWJ with nothing for consistent terminal output of grapheme clusters.
2570- ( '\u{202A}' , "�" ) , // The following unicode text flow control characters are inconsistently
2571- ( '\u{202B}' , "�" ) , // supported across CLIs and can cause confusion due to the bytes on disk
2572- ( '\u{202D}' , "�" ) , // not corresponding to the visible source code, so we replace them always.
2573- ( '\u{202E}' , "�" ) ,
2574- ( '\u{2066}' , "�" ) ,
2575- ( '\u{2067}' , "�" ) ,
2576- ( '\u{2068}' , "�" ) ,
2577- ( '\u{202C}' , "�" ) ,
2578- ( '\u{2069}' , "�" ) ,
2569+ // tidy-alphabetical-start
25792570 // In terminals without Unicode support the following will be garbled, but in *all* terminals
25802571 // the underlying codepoint will be as well. We could gate this replacement behind a "unicode
25812572 // support" gate.
2582- ( '\u{0000} ' , "␀" ) ,
2573+ ( '\0 ' , "␀" ) ,
25832574 ( '\u{0001}' , "␁" ) ,
25842575 ( '\u{0002}' , "␂" ) ,
25852576 ( '\u{0003}' , "␃" ) ,
@@ -2588,11 +2579,12 @@ const OUTPUT_REPLACEMENTS: &[(char, &str)] = &[
25882579 ( '\u{0006}' , "␆" ) ,
25892580 ( '\u{0007}' , "␇" ) ,
25902581 ( '\u{0008}' , "␈" ) ,
2591- ( '\u{000B}' , "␋" ) ,
2592- ( '\u{000C}' , "␌" ) ,
2593- ( '\u{000D}' , "␍" ) ,
2594- ( '\u{000E}' , "␎" ) ,
2595- ( '\u{000F}' , "␏" ) ,
2582+ ( '\u{0009}' , " " ) , // We do our own tab replacement
2583+ ( '\u{000b}' , "␋" ) ,
2584+ ( '\u{000c}' , "␌" ) ,
2585+ ( '\u{000d}' , "␍" ) ,
2586+ ( '\u{000e}' , "␎" ) ,
2587+ ( '\u{000f}' , "␏" ) ,
25962588 ( '\u{0010}' , "␐" ) ,
25972589 ( '\u{0011}' , "␑" ) ,
25982590 ( '\u{0012}' , "␒" ) ,
@@ -2603,21 +2595,37 @@ const OUTPUT_REPLACEMENTS: &[(char, &str)] = &[
26032595 ( '\u{0017}' , "␗" ) ,
26042596 ( '\u{0018}' , "␘" ) ,
26052597 ( '\u{0019}' , "␙" ) ,
2606- ( '\u{001A}' , "␚" ) ,
2607- ( '\u{001B}' , "␛" ) ,
2608- ( '\u{001C}' , "␜" ) ,
2609- ( '\u{001D}' , "␝" ) ,
2610- ( '\u{001E}' , "␞" ) ,
2611- ( '\u{001F}' , "␟" ) ,
2612- ( '\u{007F}' , "␡" ) ,
2598+ ( '\u{001a}' , "␚" ) ,
2599+ ( '\u{001b}' , "␛" ) ,
2600+ ( '\u{001c}' , "␜" ) ,
2601+ ( '\u{001d}' , "␝" ) ,
2602+ ( '\u{001e}' , "␞" ) ,
2603+ ( '\u{001f}' , "␟" ) ,
2604+ ( '\u{007f}' , "␡" ) ,
2605+ ( '\u{200d}' , "" ) , // Replace ZWJ for consistent terminal output of grapheme clusters.
2606+ ( '\u{202a}' , "�" ) , // The following unicode text flow control characters are inconsistently
2607+ ( '\u{202b}' , "�" ) , // supported across CLIs and can cause confusion due to the bytes on disk
2608+ ( '\u{202c}' , "�" ) , // not corresponding to the visible source code, so we replace them always.
2609+ ( '\u{202d}' , "�" ) ,
2610+ ( '\u{202e}' , "�" ) ,
2611+ ( '\u{2066}' , "�" ) ,
2612+ ( '\u{2067}' , "�" ) ,
2613+ ( '\u{2068}' , "�" ) ,
2614+ ( '\u{2069}' , "�" ) ,
2615+ // tidy-alphabetical-end
26132616] ;
26142617
2615- fn normalize_whitespace ( str : & str ) -> String {
2616- let mut s = str. to_string ( ) ;
2617- for ( c, replacement) in OUTPUT_REPLACEMENTS {
2618- s = s. replace ( * c, replacement) ;
2619- }
2620- s
2618+ fn normalize_whitespace ( s : & str ) -> String {
2619+ // Scan the input string for a character in the ordered table above. If it's present, replace
2620+ // it with it's alternative string (it can be more than 1 char!). Otherwise, retain the input
2621+ // char. At the end, allocate all chars into a string in one operation.
2622+ s. chars ( ) . fold ( String :: with_capacity ( s. len ( ) ) , |mut s, c| {
2623+ match OUTPUT_REPLACEMENTS . binary_search_by_key ( & c, |( k, _) | * k) {
2624+ Ok ( i) => s. push_str ( OUTPUT_REPLACEMENTS [ i] . 1 ) ,
2625+ _ => s. push ( c) ,
2626+ }
2627+ s
2628+ } )
26212629}
26222630
26232631fn draw_col_separator ( buffer : & mut StyledBuffer , line : usize , col : usize ) {
0 commit comments