refactor tidy alphabetical lint

WaffleLapkin · WaffleLapkin · commit 3cb12f402dbe · 2025-12-01T18:43:30.000+01:00
This slightly changes alphabetical lint semantics...
specifically if an "item" is multiline (when the next line
does not have the same indentation) we now consider all lines (ignoring
starting whitespace) for ordering, not just the first one.
diff --git a/src/tools/tidy/src/alphabetical.rs b/src/tools/tidy/src/alphabetical.rs
@@ -9,19 +9,33 @@
 //! // tidy-alphabetical-end
 //! ```
 //!
-//! The following lines are ignored:
-//! - Empty lines
-//! - Lines that are indented with more or less spaces than the first line
-//! - Lines starting with `//`, `#` (except those starting with `#!`), `)`, `]`, `}` if the comment
-//!   has the same indentation as the first line
-//! - Lines starting with a closing delimiter (`)`, `[`, `}`) are ignored.
+//! Empty lines and lines starting (ignoring spaces) with `//` or `#` (except those starting with
+//! `#!`) are considered comments are are sorted together with the next line (but do not affect
+//! sorting).
 //!
-//! If a line ends with an opening delimiter, we effectively join the following line to it before
-//! checking it. E.g. `foo(\nbar)` is treated like `foo(bar)`.
+//! If the following lines have higher indentation we effectively join them with the current line
+//! before comparing it. If the next line with the same indentation starts (ignoring spaces) with
+//! a closing delimiter (`)`, `[`, `}`) it is joined as well.
+//!
+//! E.g.
+//!
+//! ```rust,ignore ilustrative example for sorting mentioning non-existent functions
+//! foo(a,
+//!     b);
+//! bar(
+//!   a,
+//!   b
+//! );
+//! // are treated for sorting purposes as
+//! foo(a, b);
+//! bar(a, b);
+//! ```
 
 use std::cmp::Ordering;
-use std::fmt::Display;
+use std::fs;
+use std::io::{Seek, Write};
 use std::iter::Peekable;
+use std::ops::{Range, RangeBounds};
 use std::path::Path;
 
 use crate::diagnostics::{CheckId, RunningCheck, TidyCtx};
@@ -38,108 +52,205 @@ fn is_close_bracket(c: char) -> bool {
     matches!(c, ')' | ']' | '}')
 }
 
+fn is_empty_or_comment(line: &&str) -> bool {
+    let trimmed_line = line.trim_start_matches(' ').trim_end_matches('\n');
+
+    trimmed_line.is_empty()
+        || trimmed_line.starts_with("//")
+        || (trimmed_line.starts_with('#') && !trimmed_line.starts_with("#!"))
+}
+
 const START_MARKER: &str = "tidy-alphabetical-start";
 const END_MARKER: &str = "tidy-alphabetical-end";
 
-fn check_section<'a>(
-    file: impl Display,
-    lines: impl Iterator<Item = (usize, &'a str)>,
-    check: &mut RunningCheck,
-) {
-    let mut prev_line = String::new();
-    let mut first_indent = None;
-    let mut in_split_line = None;
-
-    for (idx, line) in lines {
-        if line.is_empty() {
-            continue;
-        }
+/// Given contents of a section that is enclosed between [`START_MARKER`] and [`END_MARKER`], sorts
+/// them according to the rules described at the top of the module.
+fn sort_section(section: &str) -> String {
+    /// A sortable item
+    struct Item {
+        /// Full contents including comments and whitespace
+        full: String,
+        /// Trimmed contents for sorting
+        trimmed: String,
+    }
 
-        if line.contains(START_MARKER) {
-            check.error(format!(
-                "{file}:{} found `{START_MARKER}` expecting `{END_MARKER}`",
-                idx + 1
-            ));
-            return;
-        }
+    let mut items = Vec::new();
+    let mut lines = section.split_inclusive('\n').peekable();
+
+    let end_comments = loop {
+        let mut full = String::new();
+        let mut trimmed = String::new();
 
-        if line.contains(END_MARKER) {
-            return;
+        while let Some(comment) = lines.next_if(is_empty_or_comment) {
+            full.push_str(comment);
         }
 
-        let indent = first_indent.unwrap_or_else(|| {
-            let indent = indentation(line);
-            first_indent = Some(indent);
-            indent
-        });
-
-        let line = if let Some(prev_split_line) = in_split_line {
-            // Join the split lines.
-            in_split_line = None;
-            format!("{prev_split_line}{}", line.trim_start())
-        } else {
-            line.to_string()
+        let Some(line) = lines.next() else {
+            // remember comments at the end of a block
+            break full;
         };
 
-        if indentation(&line) != indent {
-            continue;
-        }
+        let mut push = |line| {
+            full.push_str(line);
+            trimmed.push_str(line.trim_start_matches(' ').trim_end_matches('\n'))
+        };
 
-        let trimmed_line = line.trim_start_matches(' ');
+        push(line);
 
-        if trimmed_line.starts_with("//")
-            || (trimmed_line.starts_with('#') && !trimmed_line.starts_with("#!"))
-            || trimmed_line.starts_with(is_close_bracket)
+        let indent = indentation(line);
+        let mut multiline = false;
+
+        // If the item is split between multiple lines...
+        while let Some(more_indented) =
+            lines.next_if(|&line: &&_| indent < indentation(line) || line == "\n")
         {
-            continue;
+            multiline = true;
+            push(more_indented);
         }
 
-        if line.trim_end().ends_with('(') {
-            in_split_line = Some(line);
-            continue;
+        if multiline
+            && let Some(indented) =
+                // Only append next indented line if it looks like a closing bracket.
+                // Otherwise we incorrectly merge code like this (can be seen in
+                // compiler/rustc_session/src/options.rs):
+                //
+                // force_unwind_tables: Option<bool> = (None, parse_opt_bool, [TRACKED],
+                //     "force use of unwind tables"),
+                // incremental: Option<String> = (None, parse_opt_string, [UNTRACKED],
+                //     "enable incremental compilation"),
+                lines.next_if(|l| {
+                    indentation(l) == indent
+                        && l.trim_start_matches(' ').starts_with(is_close_bracket)
+                })
+        {
+            push(indented);
         }
 
-        let prev_line_trimmed_lowercase = prev_line.trim_start_matches(' ');
+        items.push(Item { full, trimmed });
+    };
 
-        if version_sort(trimmed_line, prev_line_trimmed_lowercase).is_lt() {
-            check.error(format!("{file}:{}: line not in alphabetical order", idx + 1));
-        }
+    items.sort_by(|a, b| version_sort(&a.trimmed, &b.trimmed));
+    items.into_iter().map(|l| l.full).chain([end_comments]).collect()
+}
 
-        prev_line = line;
-    }
+fn check_lines<'a>(path: &Path, content: &'a str, tidy_ctx: &TidyCtx, check: &mut RunningCheck) {
+    let mut offset = 0;
+
+    loop {
+        let rest = &content[offset..];
+        let start = rest.find(START_MARKER);
+        let end = rest.find(END_MARKER);
+
+        match (start, end) {
+            // error handling
+
+            // end before start
+            (Some(start), Some(end)) if end < start => {
+                check.error(format!(
+                    "{path}:{line_number} found `{END_MARKER}` expecting `{START_MARKER}`",
+                    path = path.display(),
+                    line_number = content[..offset + end].lines().count(),
+                ));
+                break;
+            }
 
-    check.error(format!("{file}: reached end of file expecting `{END_MARKER}`"));
-}
+            // end without a start
+            (None, Some(end)) => {
+                check.error(format!(
+                    "{path}:{line_number} found `{END_MARKER}` expecting `{START_MARKER}`",
+                    path = path.display(),
+                    line_number = content[..offset + end].lines().count(),
+                ));
+                break;
+            }
 
-fn check_lines<'a>(
-    file: &impl Display,
-    mut lines: impl Iterator<Item = (usize, &'a str)>,
-    check: &mut RunningCheck,
-) {
-    while let Some((idx, line)) = lines.next() {
-        if line.contains(END_MARKER) {
-            check.error(format!(
-                "{file}:{} found `{END_MARKER}` expecting `{START_MARKER}`",
-                idx + 1
-            ));
-        }
+            // start without an end
+            (Some(start), None) => {
+                check.error(format!(
+                    "{path}:{line_number} `{START_MARKER}` without a matching `{END_MARKER}`",
+                    path = path.display(),
+                    line_number = content[..offset + start].lines().count(),
+                ));
+                break;
+            }
+
+            // a second start in between start/end pair
+            (Some(start), Some(end))
+                if rest[start + START_MARKER.len()..end].contains(START_MARKER) =>
+            {
+                check.error(format!(
+                    "{path}:{line_number} found `{START_MARKER}` expecting `{END_MARKER}`",
+                    path = path.display(),
+                    line_number = content[..offset
+                        + sub_find(rest, start + START_MARKER.len()..end, START_MARKER)
+                            .unwrap()
+                            .start]
+                        .lines()
+                        .count()
+                ));
+                break;
+            }
 
-        if line.contains(START_MARKER) {
-            check_section(file, &mut lines, check);
+            // happy happy path :3
+            (Some(start), Some(end)) => {
+                assert!(start <= end);
+
+                // "...␤// tidy-alphabetical-start␤...␤// tidy-alphabetical-end␤..."
+                //                  start_nl_end --^  ^-- end_nl_start          ^-- end_nl_end
+
+                // Position after the newline after start marker
+                let start_nl_end = sub_find(rest, start + START_MARKER.len().., "\n").unwrap().end;
+
+                // Position before the new line before the end marker
+                let end_nl_start = rest[..end].rfind('\n').unwrap();
+
+                // Position after the newline after end marker
+                let end_nl_end = sub_find(rest, end + END_MARKER.len().., "\n")
+                    .map(|r| r.end)
+                    .unwrap_or(content.len() - offset);
+
+                let section = &rest[start_nl_end..=end_nl_start];
+                let sorted = sort_section(section);
+
+                // oh nyooo :(
+                if sorted != section {
+                    let base_line_number = content[..offset + start_nl_end].lines().count();
+                    let line_offset = sorted
+                        .lines()
+                        .zip(section.lines())
+                        .enumerate()
+                        .find(|(_, (a, b))| a != b)
+                        .unwrap()
+                        .0;
+                    let line_number = base_line_number + line_offset;
+
+                    check.error(format!(
+                        "{path}:{line_number}: line not in alphabetical order (tip: use --bless to sort this list)",
+                        path = path.display(),
+                    ));
+                }
+
+                // Start the next search after the end section
+                offset += end_nl_end;
+            }
+
+            // No more alphabetical lists, yay :3
+            (None, None) => break,
         }
     }
 }
 
 pub fn check(path: &Path, tidy_ctx: TidyCtx) {
     let mut check = tidy_ctx.start_check(CheckId::new("alphabetical").path(path));
 
-    let skip =
-        |path: &_, _is_dir| filter_dirs(path) || path.ends_with("tidy/src/alphabetical/tests.rs");
+    let skip = |path: &_, _is_dir| {
+        filter_dirs(path)
+            || path.ends_with("tidy/src/alphabetical.rs")
+            || path.ends_with("tidy/src/alphabetical/tests.rs")
+    };
 
-    walk(path, skip, &mut |entry, contents| {
-        let file = &entry.path().display();
-        let lines = contents.lines().enumerate();
-        check_lines(file, lines, &mut check)
+    walk(path, skip, &mut |entry, content| {
+        check_lines(entry.path(), content, &tidy_ctx, &mut check)
     });
 }
 
@@ -195,3 +306,17 @@ fn version_sort(a: &str, b: &str) -> Ordering {
 
     it1.next().cmp(&it2.next())
 }
+
+/// Finds `pat` in `s[range]` and returns a range such that `s[ret] == pat`.
+fn sub_find(s: &str, range: impl RangeBounds<usize>, pat: &str) -> Option<Range<usize>> {
+    s[(range.start_bound().cloned(), range.end_bound().cloned())]
+        .find(pat)
+        .map(|pos| {
+            pos + match range.start_bound().cloned() {
+                std::ops::Bound::Included(x) => x,
+                std::ops::Bound::Excluded(x) => x + 1,
+                std::ops::Bound::Unbounded => 0,
+            }
+        })
+        .map(|pos| pos..pos + pat.len())
+}