@@ -2,6 +2,7 @@ use std::borrow::Cow;
22use std:: env;
33use std:: fmt;
44use std:: fmt:: { Debug , Formatter } ;
5+ use std:: ops:: Range ;
56use std:: sync:: atomic:: { AtomicBool , Ordering } ;
67
78use once_cell:: sync:: Lazy ;
@@ -807,77 +808,124 @@ pub(crate) fn char_width(_c: char) -> usize {
807808 1
808809}
809810
810- /// Truncates a string to a certain number of characters.
811+ /// Slice a `&str` in terms of text width. This means that only the text
812+ /// columns strictly between `start` and `stop` will be kept.
811813///
812- /// This ensures that escape codes are not screwed up in the process.
813- /// If the maximum length is hit the string will be truncated but
814- /// escapes code will still be honored. If truncation takes place
815- /// the tail string will be appended.
816- pub fn truncate_str < ' a > ( s : & ' a str , width : usize , tail : & str ) -> Cow < ' a , str > {
817- if measure_text_width ( s) <= width {
818- return Cow :: Borrowed ( s) ;
819- }
820-
814+ /// If a multi-columns character overlaps with the end of the interval it will
815+ /// not be included. In such a case, the result will be less than `end - start`
816+ /// columns wide.
817+ ///
818+ /// This ensures that escape codes are not screwed up in the process. And if
819+ /// non-empty head and tail are specified, they are inserted between the ANSI
820+ /// codes from truncated bounds and the slice.
821+ pub fn slice_str < ' a > ( s : & ' a str , head : & str , bounds : Range < usize > , tail : & str ) -> Cow < ' a , str > {
821822 #[ cfg( feature = "ansi-parsing" ) ]
822823 {
823- use std:: cmp:: Ordering ;
824- let mut iter = AnsiCodeIterator :: new ( s) ;
825- let mut length = 0 ;
826- let mut rv = None ;
827-
828- while let Some ( item) = iter. next ( ) {
829- match item {
830- ( s, false ) => {
831- if rv. is_none ( ) {
832- if str_width ( s) + length > width. saturating_sub ( str_width ( tail) ) {
833- let ts = iter. current_slice ( ) ;
834-
835- let mut s_byte = 0 ;
836- let mut s_width = 0 ;
837- let rest_width = width. saturating_sub ( str_width ( tail) ) . saturating_sub ( length) ;
838- for c in s. chars ( ) {
839- s_byte += c. len_utf8 ( ) ;
840- s_width += char_width ( c) ;
841- match s_width. cmp ( & rest_width) {
842- Ordering :: Equal => break ,
843- Ordering :: Greater => {
844- s_byte -= c. len_utf8 ( ) ;
845- break ;
846- }
847- Ordering :: Less => continue ,
848- }
849- }
850-
851- let idx = ts. len ( ) - s. len ( ) + s_byte;
852- let mut buf = ts[ ..idx] . to_string ( ) ;
853- buf. push_str ( tail) ;
854- rv = Some ( buf) ;
855- }
856- length += str_width ( s) ;
824+ let mut pos = 0 ; // Current search index by width
825+ let mut code_iter = AnsiCodeIterator :: new ( s) . peekable ( ) ;
826+
827+ // Search for the begining of the slice while collecting heading ANSI
828+ // codes
829+ let mut front_ansi = String :: new ( ) ; // ANSI codes found before bound start
830+ let mut slice_start = 0 ; // Current search index by bytes
831+
832+ // Extract the leading slice, which *may be mutated* to remove just its first character.
833+ ' search_slice_start: while pos < bounds. start {
834+ let Some ( ( sub, is_ansi) ) = code_iter. peek_mut ( ) else {
835+ break ;
836+ } ;
837+
838+ if * is_ansi {
839+ // Keep track of leading ANSI for later output.
840+ front_ansi. push_str ( sub) ;
841+ slice_start += sub. len ( ) ;
842+ } else {
843+ for ( c_idx, c) in sub. char_indices ( ) {
844+ if pos >= bounds. start {
845+ // Ensure we don't drop the remaining of the slice before searching for the
846+ // end bound.
847+ * sub = & sub[ c_idx..] ;
848+ break ' search_slice_start;
857849 }
850+
851+ pos += char_width ( c) ;
852+ slice_start += c. len_utf8 ( ) ;
858853 }
859- ( s, true ) => {
860- if let Some ( ref mut rv) = rv {
861- rv. push_str ( s) ;
862- }
854+ }
855+
856+ code_iter. next ( ) ;
857+ }
858+
859+ // Search for the end of the slice. This loop is a bit simpler because we don't need to
860+ // keep track of remaining characters if we cut in the middle of a non-ANSI slice.
861+ let mut slice_end = slice_start;
862+
863+ ' search_slice_end: for ( sub, is_ansi) in & mut code_iter {
864+ if is_ansi {
865+ // Keep ANSI in the output slice but don't account for them in the total width.
866+ slice_end += sub. len ( ) ;
867+ continue ;
868+ }
869+
870+ for c in sub. chars ( ) {
871+ let c_width = char_width ( c) ;
872+
873+ if pos + c_width > bounds. end {
874+ // We will only search for ANSI codes after breaking this
875+ // loop, so we can safely drop the remaining of `sub`
876+ break ' search_slice_end;
863877 }
878+
879+ pos += c_width;
880+ slice_end += c. len_utf8 ( ) ;
864881 }
865882 }
866883
867- if let Some ( buf) = rv {
868- Cow :: Owned ( buf)
869- } else {
870- Cow :: Borrowed ( s)
884+ // Initialise the result (before appending remaining ANSI slices)
885+ let slice = & s[ slice_start..slice_end] ;
886+
887+ let mut result = {
888+ if front_ansi. is_empty ( ) && head. is_empty ( ) && tail. is_empty ( ) {
889+ // No allocation may have to be performed if there are no bounds.
890+ Cow :: Borrowed ( slice)
891+ } else {
892+ Cow :: Owned ( front_ansi + head + slice + tail)
893+ }
894+ } ;
895+
896+ // Push back remaining ANSI codes to result
897+ for ( sub, is_ansi) in code_iter {
898+ if is_ansi {
899+ result. to_mut ( ) . push_str ( sub) ;
900+ }
871901 }
872- }
873902
903+ result
904+ }
874905 #[ cfg( not( feature = "ansi-parsing" ) ) ]
875906 {
876- Cow :: Owned ( format ! (
877- "{}{}" ,
878- & s[ ..width. saturating_sub( tail. len( ) ) ] ,
879- tail
880- ) )
907+ let slice = s. get ( bounds) . unwrap_or ( "" ) ;
908+
909+ if head. is_empty ( ) && tail. is_empty ( ) {
910+ Cow :: Borrowed ( slice)
911+ } else {
912+ Cow :: Owned ( format ! ( "{head}{slice}{tail}" ) )
913+ }
914+ }
915+ }
916+
917+ /// Truncates a string to a certain number of characters.
918+ ///
919+ /// This ensures that escape codes are not screwed up in the process.
920+ /// If the maximum length is hit the string will be truncated but
921+ /// escapes code will still be honored. If truncation takes place
922+ /// the tail string will be appended.
923+ pub fn truncate_str < ' a > ( s : & ' a str , width : usize , tail : & str ) -> Cow < ' a , str > {
924+ if measure_text_width ( s) <= width {
925+ Cow :: Borrowed ( s)
926+ } else {
927+ let tail_width = measure_text_width ( tail) ;
928+ slice_str ( s, "" , 0 ..width. saturating_sub ( tail_width) , tail)
881929 }
882930}
883931
@@ -1005,6 +1053,57 @@ fn test_truncate_str() {
10051053 ) ;
10061054}
10071055
1056+ #[ test]
1057+ fn test_slice_ansi_str ( ) {
1058+ // Note that 🐶 is two columns wide
1059+ let test_str = "Hello\x1b [31m🐶\x1b [1m🐶\x1b [0m world!" ;
1060+ assert_eq ! ( slice_str( test_str, "" , 0 ..test_str. len( ) , "" ) , test_str) ;
1061+
1062+ assert_eq ! (
1063+ slice_str( test_str, ">>>" , 0 ..test_str. len( ) , "<<<" ) ,
1064+ format!( ">>>{test_str}<<<" ) ,
1065+ ) ;
1066+
1067+ if cfg ! ( feature = "unicode-width" ) && cfg ! ( feature = "ansi-parsing" ) {
1068+ assert_eq ! ( measure_text_width( test_str) , 16 ) ;
1069+
1070+ assert_eq ! (
1071+ slice_str( test_str, "" , 5 ..5 , "" ) ,
1072+ "\u{1b} [31m\u{1b} [1m\u{1b} [0m"
1073+ ) ;
1074+
1075+ assert_eq ! (
1076+ slice_str( test_str, "" , 0 ..5 , "" ) ,
1077+ "Hello\x1b [31m\x1b [1m\x1b [0m"
1078+ ) ;
1079+
1080+ assert_eq ! (
1081+ slice_str( test_str, "" , 0 ..6 , "" ) ,
1082+ "Hello\x1b [31m\x1b [1m\x1b [0m"
1083+ ) ;
1084+
1085+ assert_eq ! (
1086+ slice_str( test_str, "" , 0 ..7 , "" ) ,
1087+ "Hello\x1b [31m🐶\x1b [1m\x1b [0m"
1088+ ) ;
1089+
1090+ assert_eq ! (
1091+ slice_str( test_str, "" , 4 ..9 , "" ) ,
1092+ "o\x1b [31m🐶\x1b [1m🐶\x1b [0m"
1093+ ) ;
1094+
1095+ assert_eq ! (
1096+ slice_str( test_str, "" , 7 ..21 , "" ) ,
1097+ "\x1b [31m\x1b [1m🐶\x1b [0m world!"
1098+ ) ;
1099+
1100+ assert_eq ! (
1101+ slice_str( test_str, ">>>" , 7 ..21 , "<<<" ) ,
1102+ "\x1b [31m>>>\x1b [1m🐶\x1b [0m world!<<<"
1103+ ) ;
1104+ }
1105+ }
1106+
10081107#[ test]
10091108fn test_truncate_str_no_ansi ( ) {
10101109 assert_eq ! ( & truncate_str( "foo bar" , 7 , "!" ) , "foo bar" ) ;
0 commit comments