Skip to content

Commit 3cb12f4

Browse files
committed
refactor tidy alphabetical lint
This slightly changes alphabetical lint semantics... specifically if an "item" is multiline (when the next line does not have the same indentation) we now consider all lines (ignoring starting whitespace) for ordering, not just the first one.
1 parent bfbe4d4 commit 3cb12f4

File tree

1 file changed

+207
-82
lines changed

1 file changed

+207
-82
lines changed

src/tools/tidy/src/alphabetical.rs

Lines changed: 207 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -9,19 +9,33 @@
99
//! // tidy-alphabetical-end
1010
//! ```
1111
//!
12-
//! The following lines are ignored:
13-
//! - Empty lines
14-
//! - Lines that are indented with more or less spaces than the first line
15-
//! - Lines starting with `//`, `#` (except those starting with `#!`), `)`, `]`, `}` if the comment
16-
//! has the same indentation as the first line
17-
//! - Lines starting with a closing delimiter (`)`, `[`, `}`) are ignored.
12+
//! Empty lines and lines starting (ignoring spaces) with `//` or `#` (except those starting with
13+
//! `#!`) are considered comments are are sorted together with the next line (but do not affect
14+
//! sorting).
1815
//!
19-
//! If a line ends with an opening delimiter, we effectively join the following line to it before
20-
//! checking it. E.g. `foo(\nbar)` is treated like `foo(bar)`.
16+
//! If the following lines have higher indentation we effectively join them with the current line
17+
//! before comparing it. If the next line with the same indentation starts (ignoring spaces) with
18+
//! a closing delimiter (`)`, `[`, `}`) it is joined as well.
19+
//!
20+
//! E.g.
21+
//!
22+
//! ```rust,ignore ilustrative example for sorting mentioning non-existent functions
23+
//! foo(a,
24+
//! b);
25+
//! bar(
26+
//! a,
27+
//! b
28+
//! );
29+
//! // are treated for sorting purposes as
30+
//! foo(a, b);
31+
//! bar(a, b);
32+
//! ```
2133
2234
use std::cmp::Ordering;
23-
use std::fmt::Display;
35+
use std::fs;
36+
use std::io::{Seek, Write};
2437
use std::iter::Peekable;
38+
use std::ops::{Range, RangeBounds};
2539
use std::path::Path;
2640

2741
use crate::diagnostics::{CheckId, RunningCheck, TidyCtx};
@@ -38,108 +52,205 @@ fn is_close_bracket(c: char) -> bool {
3852
matches!(c, ')' | ']' | '}')
3953
}
4054

55+
fn is_empty_or_comment(line: &&str) -> bool {
56+
let trimmed_line = line.trim_start_matches(' ').trim_end_matches('\n');
57+
58+
trimmed_line.is_empty()
59+
|| trimmed_line.starts_with("//")
60+
|| (trimmed_line.starts_with('#') && !trimmed_line.starts_with("#!"))
61+
}
62+
4163
const START_MARKER: &str = "tidy-alphabetical-start";
4264
const END_MARKER: &str = "tidy-alphabetical-end";
4365

44-
fn check_section<'a>(
45-
file: impl Display,
46-
lines: impl Iterator<Item = (usize, &'a str)>,
47-
check: &mut RunningCheck,
48-
) {
49-
let mut prev_line = String::new();
50-
let mut first_indent = None;
51-
let mut in_split_line = None;
52-
53-
for (idx, line) in lines {
54-
if line.is_empty() {
55-
continue;
56-
}
66+
/// Given contents of a section that is enclosed between [`START_MARKER`] and [`END_MARKER`], sorts
67+
/// them according to the rules described at the top of the module.
68+
fn sort_section(section: &str) -> String {
69+
/// A sortable item
70+
struct Item {
71+
/// Full contents including comments and whitespace
72+
full: String,
73+
/// Trimmed contents for sorting
74+
trimmed: String,
75+
}
5776

58-
if line.contains(START_MARKER) {
59-
check.error(format!(
60-
"{file}:{} found `{START_MARKER}` expecting `{END_MARKER}`",
61-
idx + 1
62-
));
63-
return;
64-
}
77+
let mut items = Vec::new();
78+
let mut lines = section.split_inclusive('\n').peekable();
79+
80+
let end_comments = loop {
81+
let mut full = String::new();
82+
let mut trimmed = String::new();
6583

66-
if line.contains(END_MARKER) {
67-
return;
84+
while let Some(comment) = lines.next_if(is_empty_or_comment) {
85+
full.push_str(comment);
6886
}
6987

70-
let indent = first_indent.unwrap_or_else(|| {
71-
let indent = indentation(line);
72-
first_indent = Some(indent);
73-
indent
74-
});
75-
76-
let line = if let Some(prev_split_line) = in_split_line {
77-
// Join the split lines.
78-
in_split_line = None;
79-
format!("{prev_split_line}{}", line.trim_start())
80-
} else {
81-
line.to_string()
88+
let Some(line) = lines.next() else {
89+
// remember comments at the end of a block
90+
break full;
8291
};
8392

84-
if indentation(&line) != indent {
85-
continue;
86-
}
93+
let mut push = |line| {
94+
full.push_str(line);
95+
trimmed.push_str(line.trim_start_matches(' ').trim_end_matches('\n'))
96+
};
8797

88-
let trimmed_line = line.trim_start_matches(' ');
98+
push(line);
8999

90-
if trimmed_line.starts_with("//")
91-
|| (trimmed_line.starts_with('#') && !trimmed_line.starts_with("#!"))
92-
|| trimmed_line.starts_with(is_close_bracket)
100+
let indent = indentation(line);
101+
let mut multiline = false;
102+
103+
// If the item is split between multiple lines...
104+
while let Some(more_indented) =
105+
lines.next_if(|&line: &&_| indent < indentation(line) || line == "\n")
93106
{
94-
continue;
107+
multiline = true;
108+
push(more_indented);
95109
}
96110

97-
if line.trim_end().ends_with('(') {
98-
in_split_line = Some(line);
99-
continue;
111+
if multiline
112+
&& let Some(indented) =
113+
// Only append next indented line if it looks like a closing bracket.
114+
// Otherwise we incorrectly merge code like this (can be seen in
115+
// compiler/rustc_session/src/options.rs):
116+
//
117+
// force_unwind_tables: Option<bool> = (None, parse_opt_bool, [TRACKED],
118+
// "force use of unwind tables"),
119+
// incremental: Option<String> = (None, parse_opt_string, [UNTRACKED],
120+
// "enable incremental compilation"),
121+
lines.next_if(|l| {
122+
indentation(l) == indent
123+
&& l.trim_start_matches(' ').starts_with(is_close_bracket)
124+
})
125+
{
126+
push(indented);
100127
}
101128

102-
let prev_line_trimmed_lowercase = prev_line.trim_start_matches(' ');
129+
items.push(Item { full, trimmed });
130+
};
103131

104-
if version_sort(trimmed_line, prev_line_trimmed_lowercase).is_lt() {
105-
check.error(format!("{file}:{}: line not in alphabetical order", idx + 1));
106-
}
132+
items.sort_by(|a, b| version_sort(&a.trimmed, &b.trimmed));
133+
items.into_iter().map(|l| l.full).chain([end_comments]).collect()
134+
}
107135

108-
prev_line = line;
109-
}
136+
fn check_lines<'a>(path: &Path, content: &'a str, tidy_ctx: &TidyCtx, check: &mut RunningCheck) {
137+
let mut offset = 0;
138+
139+
loop {
140+
let rest = &content[offset..];
141+
let start = rest.find(START_MARKER);
142+
let end = rest.find(END_MARKER);
143+
144+
match (start, end) {
145+
// error handling
146+
147+
// end before start
148+
(Some(start), Some(end)) if end < start => {
149+
check.error(format!(
150+
"{path}:{line_number} found `{END_MARKER}` expecting `{START_MARKER}`",
151+
path = path.display(),
152+
line_number = content[..offset + end].lines().count(),
153+
));
154+
break;
155+
}
110156

111-
check.error(format!("{file}: reached end of file expecting `{END_MARKER}`"));
112-
}
157+
// end without a start
158+
(None, Some(end)) => {
159+
check.error(format!(
160+
"{path}:{line_number} found `{END_MARKER}` expecting `{START_MARKER}`",
161+
path = path.display(),
162+
line_number = content[..offset + end].lines().count(),
163+
));
164+
break;
165+
}
113166

114-
fn check_lines<'a>(
115-
file: &impl Display,
116-
mut lines: impl Iterator<Item = (usize, &'a str)>,
117-
check: &mut RunningCheck,
118-
) {
119-
while let Some((idx, line)) = lines.next() {
120-
if line.contains(END_MARKER) {
121-
check.error(format!(
122-
"{file}:{} found `{END_MARKER}` expecting `{START_MARKER}`",
123-
idx + 1
124-
));
125-
}
167+
// start without an end
168+
(Some(start), None) => {
169+
check.error(format!(
170+
"{path}:{line_number} `{START_MARKER}` without a matching `{END_MARKER}`",
171+
path = path.display(),
172+
line_number = content[..offset + start].lines().count(),
173+
));
174+
break;
175+
}
176+
177+
// a second start in between start/end pair
178+
(Some(start), Some(end))
179+
if rest[start + START_MARKER.len()..end].contains(START_MARKER) =>
180+
{
181+
check.error(format!(
182+
"{path}:{line_number} found `{START_MARKER}` expecting `{END_MARKER}`",
183+
path = path.display(),
184+
line_number = content[..offset
185+
+ sub_find(rest, start + START_MARKER.len()..end, START_MARKER)
186+
.unwrap()
187+
.start]
188+
.lines()
189+
.count()
190+
));
191+
break;
192+
}
126193

127-
if line.contains(START_MARKER) {
128-
check_section(file, &mut lines, check);
194+
// happy happy path :3
195+
(Some(start), Some(end)) => {
196+
assert!(start <= end);
197+
198+
// "...␤// tidy-alphabetical-start␤...␤// tidy-alphabetical-end␤..."
199+
// start_nl_end --^ ^-- end_nl_start ^-- end_nl_end
200+
201+
// Position after the newline after start marker
202+
let start_nl_end = sub_find(rest, start + START_MARKER.len().., "\n").unwrap().end;
203+
204+
// Position before the new line before the end marker
205+
let end_nl_start = rest[..end].rfind('\n').unwrap();
206+
207+
// Position after the newline after end marker
208+
let end_nl_end = sub_find(rest, end + END_MARKER.len().., "\n")
209+
.map(|r| r.end)
210+
.unwrap_or(content.len() - offset);
211+
212+
let section = &rest[start_nl_end..=end_nl_start];
213+
let sorted = sort_section(section);
214+
215+
// oh nyooo :(
216+
if sorted != section {
217+
let base_line_number = content[..offset + start_nl_end].lines().count();
218+
let line_offset = sorted
219+
.lines()
220+
.zip(section.lines())
221+
.enumerate()
222+
.find(|(_, (a, b))| a != b)
223+
.unwrap()
224+
.0;
225+
let line_number = base_line_number + line_offset;
226+
227+
check.error(format!(
228+
"{path}:{line_number}: line not in alphabetical order (tip: use --bless to sort this list)",
229+
path = path.display(),
230+
));
231+
}
232+
233+
// Start the next search after the end section
234+
offset += end_nl_end;
235+
}
236+
237+
// No more alphabetical lists, yay :3
238+
(None, None) => break,
129239
}
130240
}
131241
}
132242

133243
pub fn check(path: &Path, tidy_ctx: TidyCtx) {
134244
let mut check = tidy_ctx.start_check(CheckId::new("alphabetical").path(path));
135245

136-
let skip =
137-
|path: &_, _is_dir| filter_dirs(path) || path.ends_with("tidy/src/alphabetical/tests.rs");
246+
let skip = |path: &_, _is_dir| {
247+
filter_dirs(path)
248+
|| path.ends_with("tidy/src/alphabetical.rs")
249+
|| path.ends_with("tidy/src/alphabetical/tests.rs")
250+
};
138251

139-
walk(path, skip, &mut |entry, contents| {
140-
let file = &entry.path().display();
141-
let lines = contents.lines().enumerate();
142-
check_lines(file, lines, &mut check)
252+
walk(path, skip, &mut |entry, content| {
253+
check_lines(entry.path(), content, &tidy_ctx, &mut check)
143254
});
144255
}
145256

@@ -195,3 +306,17 @@ fn version_sort(a: &str, b: &str) -> Ordering {
195306

196307
it1.next().cmp(&it2.next())
197308
}
309+
310+
/// Finds `pat` in `s[range]` and returns a range such that `s[ret] == pat`.
311+
fn sub_find(s: &str, range: impl RangeBounds<usize>, pat: &str) -> Option<Range<usize>> {
312+
s[(range.start_bound().cloned(), range.end_bound().cloned())]
313+
.find(pat)
314+
.map(|pos| {
315+
pos + match range.start_bound().cloned() {
316+
std::ops::Bound::Included(x) => x,
317+
std::ops::Bound::Excluded(x) => x + 1,
318+
std::ops::Bound::Unbounded => 0,
319+
}
320+
})
321+
.map(|pos| pos..pos + pat.len())
322+
}

0 commit comments

Comments
 (0)