@@ -21,6 +21,7 @@ use crate::{
2121 SuggestionStyle , TerminalUrl ,
2222} ;
2323use derive_setters:: Setters ;
24+ use either:: Either ;
2425use rustc_data_structures:: fx:: { FxHashMap , FxIndexMap , FxIndexSet } ;
2526use rustc_data_structures:: sync:: { DynSend , IntoDynSyncSend , Lrc } ;
2627use rustc_error_messages:: { FluentArgs , SpanLabel } ;
@@ -2559,60 +2560,65 @@ fn num_decimal_digits(num: usize) -> usize {
25592560
25602561// We replace some characters so the CLI output is always consistent and underlines aligned.
25612562// Keep the following list in sync with `rustc_span::char_width`.
2563+ // ATTENTION: keep lexicografically sorted so that the binary search will work
25622564const OUTPUT_REPLACEMENTS : & [ ( char , & str ) ] = & [
2563- ( '\t' , " " ) , // We do our own tab replacement
2564- ( '\u{200D}' , "" ) , // Replace ZWJ with nothing for consistent terminal output of grapheme clusters.
2565- ( '\u{202A}' , "�" ) , // The following unicode text flow control characters are inconsistently
2566- ( '\u{202B}' , "�" ) , // supported across CLIs and can cause confusion due to the bytes on disk
2567- ( '\u{202D}' , "�" ) , // not corresponding to the visible source code, so we replace them always.
2568- ( '\u{202E}' , "�" ) ,
2565+ // In terminals without Unicode support the following will be garbled, but in *all* terminals
2566+ // the underlying codepoint will be as well. We could gate this replacement behind a "unicode
2567+ // support" gate.
2568+ ( '\0' , "␀" ) ,
2569+ ( '\u{1}' , "␁" ) ,
2570+ ( '\u{2}' , "␂" ) ,
2571+ ( '\u{3}' , "␃" ) ,
2572+ ( '\u{4}' , "␄" ) ,
2573+ ( '\u{5}' , "␅" ) ,
2574+ ( '\u{6}' , "␆" ) ,
2575+ ( '\u{7}' , "␇" ) ,
2576+ ( '\u{8}' , "␈" ) ,
2577+ ( '\t' , " " ) , // We do our own tab replacement
2578+ ( '\u{b}' , "␋" ) ,
2579+ ( '\u{c}' , "␌" ) ,
2580+ ( '\r' , "␍" ) ,
2581+ ( '\u{e}' , "␎" ) ,
2582+ ( '\u{f}' , "␏" ) ,
2583+ ( '\u{10}' , "␐" ) ,
2584+ ( '\u{11}' , "␑" ) ,
2585+ ( '\u{12}' , "␒" ) ,
2586+ ( '\u{13}' , "␓" ) ,
2587+ ( '\u{14}' , "␔" ) ,
2588+ ( '\u{15}' , "␕" ) ,
2589+ ( '\u{16}' , "␖" ) ,
2590+ ( '\u{17}' , "␗" ) ,
2591+ ( '\u{18}' , "␘" ) ,
2592+ ( '\u{19}' , "␙" ) ,
2593+ ( '\u{1a}' , "␚" ) ,
2594+ ( '\u{1b}' , "␛" ) ,
2595+ ( '\u{1c}' , "␜" ) ,
2596+ ( '\u{1d}' , "␝" ) ,
2597+ ( '\u{1e}' , "␞" ) ,
2598+ ( '\u{1f}' , "␟" ) ,
2599+ ( '\u{7f}' , "␡" ) ,
2600+ ( '\u{200d}' , "" ) , // Replace ZWJ for consistent terminal output of grapheme clusters.
2601+ ( '\u{202a}' , "�" ) , // The following unicode text flow control characters are inconsistently
2602+ ( '\u{202b}' , "�" ) , // supported across CLIs and can cause confusion due to the bytes on disk
2603+ ( '\u{202c}' , "�" ) , // not corresponding to the visible source code, so we replace them always.
2604+ ( '\u{202d}' , "�" ) ,
2605+ ( '\u{202e}' , "�" ) ,
25692606 ( '\u{2066}' , "�" ) ,
25702607 ( '\u{2067}' , "�" ) ,
25712608 ( '\u{2068}' , "�" ) ,
2572- ( '\u{202C}' , "�" ) ,
25732609 ( '\u{2069}' , "�" ) ,
2574- // In terminals without Unicode support the following will be garbled, but in *all* terminals
2575- // the underlying codepoint will be as well. We could gate this replacement behind a "unicode
2576- // support" gate.
2577- ( '\u{0000}' , "␀" ) ,
2578- ( '\u{0001}' , "␁" ) ,
2579- ( '\u{0002}' , "␂" ) ,
2580- ( '\u{0003}' , "␃" ) ,
2581- ( '\u{0004}' , "␄" ) ,
2582- ( '\u{0005}' , "␅" ) ,
2583- ( '\u{0006}' , "␆" ) ,
2584- ( '\u{0007}' , "␇" ) ,
2585- ( '\u{0008}' , "␈" ) ,
2586- ( '\u{000B}' , "␋" ) ,
2587- ( '\u{000C}' , "␌" ) ,
2588- ( '\u{000D}' , "␍" ) ,
2589- ( '\u{000E}' , "␎" ) ,
2590- ( '\u{000F}' , "␏" ) ,
2591- ( '\u{0010}' , "␐" ) ,
2592- ( '\u{0011}' , "␑" ) ,
2593- ( '\u{0012}' , "␒" ) ,
2594- ( '\u{0013}' , "␓" ) ,
2595- ( '\u{0014}' , "␔" ) ,
2596- ( '\u{0015}' , "␕" ) ,
2597- ( '\u{0016}' , "␖" ) ,
2598- ( '\u{0017}' , "␗" ) ,
2599- ( '\u{0018}' , "␘" ) ,
2600- ( '\u{0019}' , "␙" ) ,
2601- ( '\u{001A}' , "␚" ) ,
2602- ( '\u{001B}' , "␛" ) ,
2603- ( '\u{001C}' , "␜" ) ,
2604- ( '\u{001D}' , "␝" ) ,
2605- ( '\u{001E}' , "␞" ) ,
2606- ( '\u{001F}' , "␟" ) ,
2607- ( '\u{007F}' , "␡" ) ,
26082610] ;
26092611
26102612fn normalize_whitespace ( str : & str ) -> String {
2611- let mut s = str. to_string ( ) ;
2612- for ( c, replacement) in OUTPUT_REPLACEMENTS {
2613- s = s. replace ( * c, replacement) ;
2614- }
2615- s
2613+ // Scan the input string for a character in the ordered table above. If it's present, replace
2614+ // it with it's alternative string (it can be more than 1 char!). Otherwise, retain the input
2615+ // char. At the end, allocate all chars into a string in one operation.
2616+ str. chars ( )
2617+ . flat_map ( |c| match OUTPUT_REPLACEMENTS . binary_search_by_key ( & c, |( k, _) | * k) {
2618+ Ok ( i) => Either :: Left ( OUTPUT_REPLACEMENTS [ i] . 1 . chars ( ) ) ,
2619+ _ => Either :: Right ( [ c] . into_iter ( ) ) ,
2620+ } )
2621+ . collect ( )
26162622}
26172623
26182624fn draw_col_separator ( buffer : & mut StyledBuffer , line : usize , col : usize ) {
0 commit comments