1+ use crate :: { EarlyContext , EarlyLintPass , LintContext } ;
2+ use rustc_ast as ast;
3+ use rustc_errors:: { Applicability , SuggestionStyle } ;
4+ use rustc_span:: { BytePos , Span , Symbol } ;
5+
6+ declare_lint ! {
7+ /// The `text_direction_codepoint_in_literal` lint detects Unicode codepoints that change the
8+ /// visual representation of text on screen in a way that does not correspond to their on
9+ /// memory representation.
10+ ///
11+ /// ### Explanation
12+ ///
13+ /// The unicode characters `\u{202A}`, `\u{202B}`, `\u{202D}`, `\u{202E}`, `\u{2066}`,
14+ /// `\u{2067}`, `\u{2068}`, `\u{202C}` and `\u{2069}` make the flow of text on screen change
15+ /// its direction on software that supports these codepoints. This makes the text "abc" display
16+ /// as "cba" on screen. By leveraging software that supports these, people can write specially
17+ /// crafted literals that make the surrounding code seem like it's performing one action, when
18+ /// in reality it is performing another. Because of this, we proactively lint against their
19+ /// presence to avoid surprises.
20+ ///
21+ /// ### Example
22+ ///
23+ /// ```rust,compile_fail
24+ /// #![deny(text_direction_codepoint_in_literal)]
25+ /// fn main() {
26+ /// println!("{:?}", '');
27+ /// }
28+ /// ```
29+ ///
30+ /// {{produces}}
31+ ///
32+ pub TEXT_DIRECTION_CODEPOINT_IN_LITERAL ,
33+ Deny ,
34+ "detect special Unicode codepoints that affect the visual representation of text on screen, \
35+ changing the direction in which text flows",
36+ }
37+
38+ declare_lint_pass ! ( HiddenUnicodeCodepoints => [ TEXT_DIRECTION_CODEPOINT_IN_LITERAL ] ) ;
39+
40+ crate const UNICODE_TEXT_FLOW_CHARS : & [ char ] = & [
41+ '\u{202A}' , '\u{202B}' , '\u{202D}' , '\u{202E}' , '\u{2066}' , '\u{2067}' , '\u{2068}' , '\u{202C}' ,
42+ '\u{2069}' ,
43+ ] ;
44+
45+ impl HiddenUnicodeCodepoints {
46+ fn lint_text_direction_codepoint (
47+ & self ,
48+ cx : & EarlyContext < ' _ > ,
49+ text : Symbol ,
50+ span : Span ,
51+ padding : u32 ,
52+ point_at_inner_spans : bool ,
53+ label : & str ,
54+ ) {
55+ // Obtain the `Span`s for each of the forbidden chars.
56+ let spans: Vec < _ > = text
57+ . as_str ( )
58+ . char_indices ( )
59+ . filter_map ( |( i, c) | {
60+ UNICODE_TEXT_FLOW_CHARS . contains ( & c) . then ( || {
61+ let lo = span. lo ( ) + BytePos ( i as u32 + padding) ;
62+ ( c, span. with_lo ( lo) . with_hi ( lo + BytePos ( c. len_utf8 ( ) as u32 ) ) )
63+ } )
64+ } )
65+ . collect ( ) ;
66+
67+ cx. struct_span_lint ( TEXT_DIRECTION_CODEPOINT_IN_LITERAL , span, |lint| {
68+ let mut err = lint. build ( & format ! (
69+ "unicode codepoint changing visible direction of text present in {}" ,
70+ label
71+ ) ) ;
72+ let ( an, s) = match spans. len ( ) {
73+ 1 => ( "an " , "" ) ,
74+ _ => ( "" , "s" ) ,
75+ } ;
76+ err. span_label (
77+ span,
78+ & format ! (
79+ "this {} contains {}invisible unicode text flow control codepoint{}" ,
80+ label, an, s,
81+ ) ,
82+ ) ;
83+ if point_at_inner_spans {
84+ for ( c, span) in & spans {
85+ err. span_label ( * span, format ! ( "{:?}" , c) ) ;
86+ }
87+ }
88+ err. note (
89+ "these kind of unicode codepoints change the way text flows on applications that \
90+ support them, but can cause confusion because they change the order of \
91+ characters on the screen",
92+ ) ;
93+ if point_at_inner_spans && !spans. is_empty ( ) {
94+ err. multipart_suggestion_with_style (
95+ "if their presence wasn't intentional, you can remove them" ,
96+ spans. iter ( ) . map ( |( _, span) | ( * span, "" . to_string ( ) ) ) . collect ( ) ,
97+ Applicability :: MachineApplicable ,
98+ SuggestionStyle :: HideCodeAlways ,
99+ ) ;
100+ err. multipart_suggestion (
101+ "if you want to keep them but make them visible in your source code, you can \
102+ escape them",
103+ spans
104+ . into_iter ( )
105+ . map ( |( c, span) | {
106+ let c = format ! ( "{:?}" , c) ;
107+ ( span, c[ 1 ..c. len ( ) - 1 ] . to_string ( ) )
108+ } )
109+ . collect ( ) ,
110+ Applicability :: MachineApplicable ,
111+ ) ;
112+ } else {
113+ // FIXME: in other suggestions we've reversed the inner spans of doc comments. We
114+ // should do the same here to provide the same good suggestions as we do for
115+ // literals above.
116+ err. note ( "if their presence wasn't intentional, you can remove them" ) ;
117+ err. note ( & format ! (
118+ "if you want to keep them but make them visible in your source code, you can \
119+ escape them: {}",
120+ spans
121+ . into_iter( )
122+ . map( |( c, _) | { format!( "{:?}" , c) } )
123+ . collect:: <Vec <String >>( )
124+ . join( ", " ) ,
125+ ) ) ;
126+ }
127+ err. emit ( ) ;
128+ } ) ;
129+ }
130+ }
131+ impl EarlyLintPass for HiddenUnicodeCodepoints {
132+ fn check_attribute ( & mut self , cx : & EarlyContext < ' _ > , attr : & ast:: Attribute ) {
133+ if let ast:: AttrKind :: DocComment ( _, comment) = attr. kind {
134+ if comment. as_str ( ) . contains ( UNICODE_TEXT_FLOW_CHARS ) {
135+ self . lint_text_direction_codepoint ( cx, comment, attr. span , 0 , false , "doc comment" ) ;
136+ }
137+ }
138+ }
139+
140+ fn check_expr ( & mut self , cx : & EarlyContext < ' _ > , expr : & ast:: Expr ) {
141+ // byte strings are already handled well enough by `EscapeError::NonAsciiCharInByteString`
142+ let ( text, span, padding) = match & expr. kind {
143+ ast:: ExprKind :: Lit ( ast:: Lit { token, kind, span } ) => {
144+ let text = token. symbol ;
145+ if !text. as_str ( ) . contains ( UNICODE_TEXT_FLOW_CHARS ) {
146+ return ;
147+ }
148+ let padding = match kind {
149+ // account for `"` or `'`
150+ ast:: LitKind :: Str ( _, ast:: StrStyle :: Cooked ) | ast:: LitKind :: Char ( _) => 1 ,
151+ // account for `r###"`
152+ ast:: LitKind :: Str ( _, ast:: StrStyle :: Raw ( val) ) => * val as u32 + 2 ,
153+ _ => return ,
154+ } ;
155+ ( text, span, padding)
156+ }
157+ _ => return ,
158+ } ;
159+ self . lint_text_direction_codepoint ( cx, text, * span, padding, true , "literal" ) ;
160+ }
161+ }
0 commit comments