diff --git a/clippy_dev/src/deprecate_lint.rs b/clippy_dev/src/deprecate_lint.rs index 3bdc5b277232..4d99eb91e6f9 100644 --- a/clippy_dev/src/deprecate_lint.rs +++ b/clippy_dev/src/deprecate_lint.rs @@ -1,4 +1,5 @@ -use crate::update_lints::{DeprecatedLint, Lint, find_lint_decls, generate_lint_files, read_deprecated_lints}; +use crate::parse::{DeprecatedLint, Lint, find_lint_decls, read_deprecated_lints}; +use crate::update_lints::generate_lint_files; use crate::utils::{UpdateMode, Version}; use std::ffi::OsStr; use std::path::{Path, PathBuf}; @@ -14,10 +15,6 @@ use std::{fs, io}; /// /// If a file path could not read from or written to pub fn deprecate(clippy_version: Version, name: &str, reason: &str) { - if let Some((prefix, _)) = name.split_once("::") { - panic!("`{name}` should not contain the `{prefix}` prefix"); - } - let mut lints = find_lint_decls(); let (mut deprecated_lints, renamed_lints) = read_deprecated_lints(); @@ -135,14 +132,14 @@ fn remove_lint_declaration(name: &str, path: &Path, lints: &mut Vec) -> io ); assert!( - content[lint.declaration_range.clone()].contains(&name.to_uppercase()), + content[lint.declaration_range].contains(&name.to_uppercase()), "error: `{}` does not contain lint `{}`'s declaration", path.display(), lint.name ); // Remove lint declaration (declare_clippy_lint!) - content.replace_range(lint.declaration_range.clone(), ""); + content.replace_range(lint.declaration_range, ""); // Remove the module declaration (mod xyz;) let mod_decl = format!("\nmod {name};"); diff --git a/clippy_dev/src/lib.rs b/clippy_dev/src/lib.rs index 16f413e0c862..fb8b2e1c91c1 100644 --- a/clippy_dev/src/lib.rs +++ b/clippy_dev/src/lib.rs @@ -1,9 +1,11 @@ #![feature( - rustc_private, exit_status_error, if_let_guard, + new_range, + new_range_api, os_str_slice, os_string_truncate, + rustc_private, slice_split_once )] #![warn( @@ -34,3 +36,5 @@ pub mod update_lints; mod utils; pub use utils::{ClippyInfo, UpdateMode}; + +mod parse; diff --git a/clippy_dev/src/main.rs b/clippy_dev/src/main.rs index 1b6a590b896f..78fb44d7ad1b 100644 --- a/clippy_dev/src/main.rs +++ b/clippy_dev/src/main.rs @@ -7,7 +7,6 @@ use clippy_dev::{ ClippyInfo, UpdateMode, deprecate_lint, dogfood, fmt, lint, new_lint, release, rename_lint, serve, setup, sync, update_lints, }; -use std::convert::Infallible; use std::env; fn main() { @@ -95,6 +94,20 @@ fn main() { } } +fn lint_name(name: &str) -> Result { + let name = name.replace('-', "_"); + if let Some((pre, _)) = name.split_once("::") { + Err(format!("lint name should not contain the `{pre}` prefix")) + } else if name + .bytes() + .any(|x| !matches!(x, b'_' | b'0'..=b'9' | b'a'..=b'z' | b'A'..=b'Z')) + { + Err("lint name contains invalid characters".to_owned()) + } else { + Ok(name) + } +} + #[derive(Parser)] #[command(name = "dev", about)] struct Dev { @@ -150,7 +163,7 @@ enum DevCommand { #[arg( short, long, - value_parser = |name: &str| Ok::<_, Infallible>(name.replace('-', "_")), + value_parser = lint_name, )] /// Name of the new lint in snake case, ex: `fn_too_long` name: String, @@ -223,8 +236,12 @@ enum DevCommand { /// Rename a lint RenameLint { /// The name of the lint to rename + #[arg(value_parser = lint_name)] old_name: String, - #[arg(required_unless_present = "uplift")] + #[arg( + required_unless_present = "uplift", + value_parser = lint_name, + )] /// The new name of the lint new_name: Option, #[arg(long)] @@ -234,6 +251,7 @@ enum DevCommand { /// Deprecate the given lint Deprecate { /// The name of the lint to deprecate + #[arg(value_parser = lint_name)] name: String, #[arg(long, short)] /// The reason for deprecation diff --git a/clippy_dev/src/new_lint.rs b/clippy_dev/src/new_lint.rs index a14afd8c5f41..a180db6ad062 100644 --- a/clippy_dev/src/new_lint.rs +++ b/clippy_dev/src/new_lint.rs @@ -1,4 +1,5 @@ -use crate::utils::{RustSearcher, Token, Version}; +use crate::parse::cursor::{self, Capture, Cursor}; +use crate::utils::Version; use clap::ValueEnum; use indoc::{formatdoc, writedoc}; use std::fmt::{self, Write as _}; @@ -516,22 +517,22 @@ fn setup_mod_file(path: &Path, lint: &LintData<'_>) -> io::Result<&'static str> // Find both the last lint declaration (declare_clippy_lint!) and the lint pass impl fn parse_mod_file(path: &Path, contents: &str) -> (&'static str, usize) { #[allow(clippy::enum_glob_use)] - use Token::*; + use cursor::Pat::*; let mut context = None; let mut decl_end = None; - let mut searcher = RustSearcher::new(contents); - while let Some(name) = searcher.find_capture_token(CaptureIdent) { - match name { + let mut cursor = Cursor::new(contents); + let mut captures = [Capture::EMPTY]; + while let Some(name) = cursor.find_any_ident() { + match cursor.get_text(name) { "declare_clippy_lint" => { - if searcher.match_tokens(&[Bang, OpenBrace], &mut []) && searcher.find_token(CloseBrace) { - decl_end = Some(searcher.pos()); + if cursor.match_all(&[Bang, OpenBrace], &mut []) && cursor.find_pat(CloseBrace) { + decl_end = Some(cursor.pos()); } }, "impl" => { - let mut capture = ""; - if searcher.match_tokens(&[Lt, Lifetime, Gt, CaptureIdent], &mut [&mut capture]) { - match capture { + if cursor.match_all(&[Lt, Lifetime, Gt, CaptureIdent], &mut captures) { + match cursor.get_text(captures[0]) { "LateLintPass" => context = Some("LateContext"), "EarlyLintPass" => context = Some("EarlyContext"), _ => {}, diff --git a/clippy_dev/src/parse.rs b/clippy_dev/src/parse.rs new file mode 100644 index 000000000000..5cea73d34af5 --- /dev/null +++ b/clippy_dev/src/parse.rs @@ -0,0 +1,218 @@ +pub mod cursor; + +use self::cursor::{Capture, Cursor}; +use crate::utils::{ErrAction, File, expect_action}; +use core::range::Range; +use std::fs; +use std::path::{Path, PathBuf}; +use walkdir::{DirEntry, WalkDir}; + +pub struct Lint { + pub name: String, + pub group: String, + pub module: String, + pub path: PathBuf, + pub declaration_range: Range, +} + +pub struct DeprecatedLint { + pub name: String, + pub reason: String, + pub version: String, +} + +pub struct RenamedLint { + pub old_name: String, + pub new_name: String, + pub version: String, +} + +/// Finds all lint declarations (`declare_clippy_lint!`) +#[must_use] +pub fn find_lint_decls() -> Vec { + let mut lints = Vec::with_capacity(1000); + let mut contents = String::new(); + for e in expect_action(fs::read_dir("."), ErrAction::Read, ".") { + let e = expect_action(e, ErrAction::Read, "."); + if !expect_action(e.file_type(), ErrAction::Read, ".").is_dir() { + continue; + } + let Ok(mut name) = e.file_name().into_string() else { + continue; + }; + if name.starts_with("clippy_lints") && name != "clippy_lints_internal" { + name.push_str("/src"); + for (file, module) in read_src_with_module(name.as_ref()) { + parse_clippy_lint_decls( + file.path(), + File::open_read_to_cleared_string(file.path(), &mut contents), + &module, + &mut lints, + ); + } + } + } + lints.sort_by(|lhs, rhs| lhs.name.cmp(&rhs.name)); + lints +} + +/// Reads the source files from the given root directory +fn read_src_with_module(src_root: &Path) -> impl use<'_> + Iterator { + WalkDir::new(src_root).into_iter().filter_map(move |e| { + let e = expect_action(e, ErrAction::Read, src_root); + let path = e.path().as_os_str().as_encoded_bytes(); + if let Some(path) = path.strip_suffix(b".rs") + && let Some(path) = path.get(src_root.as_os_str().len() + 1..) + { + if path == b"lib" { + Some((e, String::new())) + } else { + let path = if let Some(path) = path.strip_suffix(b"mod") + && let Some(path) = path.strip_suffix(b"/").or_else(|| path.strip_suffix(b"\\")) + { + path + } else { + path + }; + if let Ok(path) = str::from_utf8(path) { + let path = path.replace(['/', '\\'], "::"); + Some((e, path)) + } else { + None + } + } + } else { + None + } + }) +} + +/// Parse a source file looking for `declare_clippy_lint` macro invocations. +fn parse_clippy_lint_decls(path: &Path, contents: &str, module: &str, lints: &mut Vec) { + #[allow(clippy::enum_glob_use)] + use cursor::Pat::*; + #[rustfmt::skip] + static DECL_TOKENS: &[cursor::Pat<'_>] = &[ + // !{ /// docs + Bang, OpenBrace, AnyComment, + // #[clippy::version = "version"] + Pound, OpenBracket, Ident("clippy"), DoubleColon, Ident("version"), Eq, LitStr, CloseBracket, + // pub NAME, GROUP, + Ident("pub"), CaptureIdent, Comma, AnyComment, CaptureIdent, Comma, + ]; + + let mut cursor = Cursor::new(contents); + let mut captures = [Capture::EMPTY; 2]; + while let Some(start) = cursor.find_ident("declare_clippy_lint") { + if cursor.match_all(DECL_TOKENS, &mut captures) && cursor.find_pat(CloseBrace) { + lints.push(Lint { + name: cursor.get_text(captures[0]).to_lowercase(), + group: cursor.get_text(captures[1]).into(), + module: module.into(), + path: path.into(), + declaration_range: start as usize..cursor.pos() as usize, + }); + } + } +} + +#[must_use] +pub fn read_deprecated_lints() -> (Vec, Vec) { + #[allow(clippy::enum_glob_use)] + use cursor::Pat::*; + #[rustfmt::skip] + static DECL_TOKENS: &[cursor::Pat<'_>] = &[ + // #[clippy::version = "version"] + Pound, OpenBracket, Ident("clippy"), DoubleColon, Ident("version"), Eq, CaptureLitStr, CloseBracket, + // ("first", "second"), + OpenParen, CaptureLitStr, Comma, CaptureLitStr, CloseParen, Comma, + ]; + #[rustfmt::skip] + static DEPRECATED_TOKENS: &[cursor::Pat<'_>] = &[ + // !{ DEPRECATED(DEPRECATED_VERSION) = [ + Bang, OpenBrace, Ident("DEPRECATED"), OpenParen, Ident("DEPRECATED_VERSION"), CloseParen, Eq, OpenBracket, + ]; + #[rustfmt::skip] + static RENAMED_TOKENS: &[cursor::Pat<'_>] = &[ + // !{ RENAMED(RENAMED_VERSION) = [ + Bang, OpenBrace, Ident("RENAMED"), OpenParen, Ident("RENAMED_VERSION"), CloseParen, Eq, OpenBracket, + ]; + + let path = "clippy_lints/src/deprecated_lints.rs"; + let mut deprecated = Vec::with_capacity(30); + let mut renamed = Vec::with_capacity(80); + let mut contents = String::new(); + File::open_read_to_cleared_string(path, &mut contents); + + let mut cursor = Cursor::new(&contents); + let mut captures = [Capture::EMPTY; 3]; + + // First instance is the macro definition. + assert!( + cursor.find_ident("declare_with_version").is_some(), + "error reading deprecated lints" + ); + + if cursor.find_ident("declare_with_version").is_some() && cursor.match_all(DEPRECATED_TOKENS, &mut []) { + while cursor.match_all(DECL_TOKENS, &mut captures) { + deprecated.push(DeprecatedLint { + name: parse_str_single_line(path.as_ref(), cursor.get_text(captures[1])), + reason: parse_str_single_line(path.as_ref(), cursor.get_text(captures[2])), + version: parse_str_single_line(path.as_ref(), cursor.get_text(captures[0])), + }); + } + } else { + panic!("error reading deprecated lints"); + } + + if cursor.find_ident("declare_with_version").is_some() && cursor.match_all(RENAMED_TOKENS, &mut []) { + while cursor.match_all(DECL_TOKENS, &mut captures) { + renamed.push(RenamedLint { + old_name: parse_str_single_line(path.as_ref(), cursor.get_text(captures[1])), + new_name: parse_str_single_line(path.as_ref(), cursor.get_text(captures[2])), + version: parse_str_single_line(path.as_ref(), cursor.get_text(captures[0])), + }); + } + } else { + panic!("error reading renamed lints"); + } + + deprecated.sort_by(|lhs, rhs| lhs.name.cmp(&rhs.name)); + renamed.sort_by(|lhs, rhs| lhs.old_name.cmp(&rhs.old_name)); + (deprecated, renamed) +} + +/// Removes the line splices and surrounding quotes from a string literal +fn parse_str_lit(s: &str) -> String { + let (s, is_raw) = if let Some(s) = s.strip_prefix("r") { + (s.trim_matches('#'), true) + } else { + (s, false) + }; + let s = s + .strip_prefix('"') + .and_then(|s| s.strip_suffix('"')) + .unwrap_or_else(|| panic!("expected quoted string, found `{s}`")); + + if is_raw { + s.into() + } else { + let mut res = String::with_capacity(s.len()); + rustc_literal_escaper::unescape_str(s, &mut |_, ch| { + if let Ok(ch) = ch { + res.push(ch); + } + }); + res + } +} + +fn parse_str_single_line(path: &Path, s: &str) -> String { + let value = parse_str_lit(s); + assert!( + !value.contains('\n'), + "error parsing `{}`: `{s}` should be a single line string", + path.display(), + ); + value +} diff --git a/clippy_dev/src/parse/cursor.rs b/clippy_dev/src/parse/cursor.rs new file mode 100644 index 000000000000..6dc003f326de --- /dev/null +++ b/clippy_dev/src/parse/cursor.rs @@ -0,0 +1,263 @@ +use core::slice; +use rustc_lexer::{self as lex, LiteralKind, Token, TokenKind}; + +/// A token pattern used for searching and matching by the [`Cursor`]. +/// +/// In the event that a pattern is a multi-token sequence, earlier tokens will be consumed +/// even if the pattern ultimately isn't matched. e.g. With the sequence `:*` matching +/// `DoubleColon` will consume the first `:` and then fail to match, leaving the cursor at +/// the `*`. +#[derive(Clone, Copy)] +pub enum Pat<'a> { + /// Matches any number of comments and doc comments. + AnyComment, + Ident(&'a str), + CaptureIdent, + LitStr, + CaptureLitStr, + Bang, + CloseBrace, + CloseBracket, + CloseParen, + Comma, + DoubleColon, + Eq, + Lifetime, + Lt, + Gt, + OpenBrace, + OpenBracket, + OpenParen, + Pound, + Semi, +} + +#[derive(Clone, Copy)] +pub struct Capture { + pub pos: u32, + pub len: u32, +} +impl Capture { + pub const EMPTY: Self = Self { pos: 0, len: 0 }; +} + +/// A unidirectional cursor over a token stream that is lexed on demand. +pub struct Cursor<'txt> { + next_token: Token, + pos: u32, + inner: lex::Cursor<'txt>, + text: &'txt str, +} +impl<'txt> Cursor<'txt> { + #[must_use] + pub fn new(text: &'txt str) -> Self { + let mut inner = lex::Cursor::new(text, lex::FrontmatterAllowed::Yes); + Self { + next_token: inner.advance_token(), + pos: 0, + inner, + text, + } + } + + /// Gets the text of the captured token assuming it came from this cursor. + #[must_use] + pub fn get_text(&self, capture: Capture) -> &'txt str { + &self.text[capture.pos as usize..(capture.pos + capture.len) as usize] + } + + /// Gets the text that makes up the next token in the stream, or the empty string if + /// stream is exhausted. + #[must_use] + pub fn peek_text(&self) -> &'txt str { + &self.text[self.pos as usize..(self.pos + self.next_token.len) as usize] + } + + /// Gets the length of the next token in bytes, or zero if the stream is exhausted. + #[must_use] + pub fn peek_len(&self) -> u32 { + self.next_token.len + } + + /// Gets the next token in the stream, or [`TokenKind::Eof`] if the stream is + /// exhausted. + #[must_use] + pub fn peek(&self) -> TokenKind { + self.next_token.kind + } + + /// Gets the offset of the next token in the source string, or the string's length if + /// the stream is exhausted. + #[must_use] + pub fn pos(&self) -> u32 { + self.pos + } + + /// Gets whether the cursor has exhausted its input. + #[must_use] + pub fn at_end(&self) -> bool { + self.next_token.kind == TokenKind::Eof + } + + /// Advances the cursor to the next token. If the stream is exhausted this will set + /// the next token to [`TokenKind::Eof`]. + pub fn step(&mut self) { + // `next_token.len` is zero for the eof marker. + self.pos += self.next_token.len; + self.next_token = self.inner.advance_token(); + } + + /// Consumes tokens until the given pattern is either fully matched of fails to match. + /// Returns whether the pattern was fully matched. + /// + /// For each capture made by the pattern one item will be taken from the capture + /// sequence with the result placed inside. + fn match_impl(&mut self, pat: Pat<'_>, captures: &mut slice::IterMut<'_, Capture>) -> bool { + loop { + match (pat, self.next_token.kind) { + #[rustfmt::skip] // rustfmt bug: https://github.com/rust-lang/rustfmt/issues/6697 + (_, TokenKind::Whitespace) + | ( + Pat::AnyComment, + TokenKind::BlockComment { terminated: true, .. } | TokenKind::LineComment { .. }, + ) => self.step(), + (Pat::AnyComment, _) => return true, + (Pat::Bang, TokenKind::Bang) + | (Pat::CloseBrace, TokenKind::CloseBrace) + | (Pat::CloseBracket, TokenKind::CloseBracket) + | (Pat::CloseParen, TokenKind::CloseParen) + | (Pat::Comma, TokenKind::Comma) + | (Pat::Eq, TokenKind::Eq) + | (Pat::Lifetime, TokenKind::Lifetime { .. }) + | (Pat::Lt, TokenKind::Lt) + | (Pat::Gt, TokenKind::Gt) + | (Pat::OpenBrace, TokenKind::OpenBrace) + | (Pat::OpenBracket, TokenKind::OpenBracket) + | (Pat::OpenParen, TokenKind::OpenParen) + | (Pat::Pound, TokenKind::Pound) + | (Pat::Semi, TokenKind::Semi) + | ( + Pat::LitStr, + TokenKind::Literal { + kind: LiteralKind::Str { terminated: true } | LiteralKind::RawStr { .. }, + .. + }, + ) => { + self.step(); + return true; + }, + (Pat::Ident(x), TokenKind::Ident) if x == self.peek_text() => { + self.step(); + return true; + }, + (Pat::DoubleColon, TokenKind::Colon) => { + self.step(); + if !self.at_end() && matches!(self.next_token.kind, TokenKind::Colon) { + self.step(); + return true; + } + return false; + }, + #[rustfmt::skip] + ( + Pat::CaptureLitStr, + TokenKind::Literal { + kind: + LiteralKind::Str { terminated: true } + | LiteralKind::RawStr { n_hashes: Some(_) }, + .. + }, + ) + | (Pat::CaptureIdent, TokenKind::Ident) => { + *captures.next().unwrap() = Capture { pos: self.pos, len: self.next_token.len }; + self.step(); + return true; + }, + _ => return false, + } + } + } + + /// Consumes all tokens until the specified identifier is found and returns its + /// position. Returns `None` if the identifier could not be found. + /// + /// The cursor will be positioned immediately after the identifier, or at the end if + /// it is not. + pub fn find_ident(&mut self, ident: &str) -> Option { + loop { + match self.next_token.kind { + TokenKind::Ident if self.peek_text() == ident => { + let pos = self.pos; + self.step(); + return Some(pos); + }, + TokenKind::Eof => return None, + _ => self.step(), + } + } + } + + /// Consumes all tokens until the next identifier is found and captures it. Returns + /// `None` if no identifier could be found. + /// + /// The cursor will be positioned immediately after the identifier, or at the end if + /// it is not. + pub fn find_any_ident(&mut self) -> Option { + loop { + match self.next_token.kind { + TokenKind::Ident => { + let res = Capture { + pos: self.pos, + len: self.next_token.len, + }; + self.step(); + return Some(res); + }, + TokenKind::Eof => return None, + _ => self.step(), + } + } + } + + /// Continually attempt to match the pattern on subsequent tokens until a match is + /// found. Returns whether the pattern was successfully matched. + /// + /// Not generally suitable for multi-token patterns or patterns that can match + /// nothing. + #[must_use] + pub fn find_pat(&mut self, pat: Pat<'_>) -> bool { + let mut capture = [].iter_mut(); + while !self.match_impl(pat, &mut capture) { + self.step(); + if self.at_end() { + return false; + } + } + true + } + + /// Attempts to match a sequence of patterns at the current position. Returns whether + /// all patterns were successfully matched. + /// + /// Captures will be written to the given slice in the order they're matched. If a + /// capture is matched, but there are no more capture slots this will panic. If the + /// match is completed without filling all the capture slots they will be left + /// unmodified. + /// + /// If the match fails the cursor will be positioned at the first failing token. + #[must_use] + pub fn match_all(&mut self, pats: &[Pat<'_>], captures: &mut [Capture]) -> bool { + let mut captures = captures.iter_mut(); + pats.iter().all(|&p| self.match_impl(p, &mut captures)) + } + + /// Attempts to match a single pattern at the current position. Returns whether the + /// pattern was successfully matched. + /// + /// If the pattern attempts to capture anything this will panic. If the match fails + /// the cursor will be positioned at the first failing token. + #[must_use] + pub fn match_pat(&mut self, pat: Pat<'_>) -> bool { + self.match_impl(pat, &mut [].iter_mut()) + } +} diff --git a/clippy_dev/src/release.rs b/clippy_dev/src/release.rs index 15392dd1d292..d11070bab85b 100644 --- a/clippy_dev/src/release.rs +++ b/clippy_dev/src/release.rs @@ -23,7 +23,7 @@ pub fn bump_version(mut version: Version) { dst.push_str(&src[..package.version_range.start]); write!(dst, "\"{}\"", version.toml_display()).unwrap(); dst.push_str(&src[package.version_range.end..]); - UpdateStatus::from_changed(src.get(package.version_range.clone()) != dst.get(package.version_range)) + UpdateStatus::from_changed(src.get(package.version_range) != dst.get(package.version_range)) } }); } diff --git a/clippy_dev/src/rename_lint.rs b/clippy_dev/src/rename_lint.rs index d62597428e21..207c9b2ff596 100644 --- a/clippy_dev/src/rename_lint.rs +++ b/clippy_dev/src/rename_lint.rs @@ -1,7 +1,9 @@ -use crate::update_lints::{RenamedLint, find_lint_decls, generate_lint_files, read_deprecated_lints}; +use crate::parse::cursor::{self, Capture, Cursor}; +use crate::parse::{RenamedLint, find_lint_decls, read_deprecated_lints}; +use crate::update_lints::generate_lint_files; use crate::utils::{ - ErrAction, FileUpdater, RustSearcher, Token, UpdateMode, UpdateStatus, Version, delete_dir_if_exists, - delete_file_if_exists, expect_action, try_rename_dir, try_rename_file, walk_dir_no_dot_or_target, + ErrAction, FileUpdater, UpdateMode, UpdateStatus, Version, delete_dir_if_exists, delete_file_if_exists, + expect_action, try_rename_dir, try_rename_file, walk_dir_no_dot_or_target, }; use rustc_lexer::TokenKind; use std::ffi::OsString; @@ -25,13 +27,6 @@ use std::path::Path; /// * If `old_name` names a deprecated or renamed lint. #[expect(clippy::too_many_lines)] pub fn rename(clippy_version: Version, old_name: &str, new_name: &str, uplift: bool) { - if let Some((prefix, _)) = old_name.split_once("::") { - panic!("`{old_name}` should not contain the `{prefix}` prefix"); - } - if let Some((prefix, _)) = new_name.split_once("::") { - panic!("`{new_name}` should not contain the `{prefix}` prefix"); - } - let mut updater = FileUpdater::default(); let mut lints = find_lint_decls(); let (deprecated_lints, mut renamed_lints) = read_deprecated_lints(); @@ -285,47 +280,46 @@ fn file_update_fn<'a, 'b>( move |_, src, dst| { let mut copy_pos = 0u32; let mut changed = false; - let mut searcher = RustSearcher::new(src); - let mut capture = ""; + let mut cursor = Cursor::new(src); + let mut captures = [Capture::EMPTY]; loop { - match searcher.peek() { + match cursor.peek() { TokenKind::Eof => break, TokenKind::Ident => { - let match_start = searcher.pos(); - let text = searcher.peek_text(); - searcher.step(); + let match_start = cursor.pos(); + let text = cursor.peek_text(); + cursor.step(); match text { // clippy::line_name or clippy::lint-name "clippy" => { - if searcher.match_tokens(&[Token::DoubleColon, Token::CaptureIdent], &mut [&mut capture]) - && capture == old_name + if cursor.match_all(&[cursor::Pat::DoubleColon, cursor::Pat::CaptureIdent], &mut captures) + && cursor.get_text(captures[0]) == old_name { - dst.push_str(&src[copy_pos as usize..searcher.pos() as usize - capture.len()]); + dst.push_str(&src[copy_pos as usize..captures[0].pos as usize]); dst.push_str(new_name); - copy_pos = searcher.pos(); + copy_pos = cursor.pos(); changed = true; } }, // mod lint_name "mod" => { if !matches!(mod_edit, ModEdit::None) - && searcher.match_tokens(&[Token::CaptureIdent], &mut [&mut capture]) - && capture == old_name + && let Some(pos) = cursor.find_ident(old_name) { match mod_edit { ModEdit::Rename => { - dst.push_str(&src[copy_pos as usize..searcher.pos() as usize - capture.len()]); + dst.push_str(&src[copy_pos as usize..pos as usize]); dst.push_str(new_name); - copy_pos = searcher.pos(); + copy_pos = cursor.pos(); changed = true; }, - ModEdit::Delete if searcher.match_tokens(&[Token::Semi], &mut []) => { + ModEdit::Delete if cursor.match_pat(cursor::Pat::Semi) => { let mut start = &src[copy_pos as usize..match_start as usize]; if start.ends_with("\n\n") { start = &start[..start.len() - 1]; } dst.push_str(start); - copy_pos = searcher.pos(); + copy_pos = cursor.pos(); if src[copy_pos as usize..].starts_with("\n\n") { copy_pos += 1; } @@ -337,8 +331,8 @@ fn file_update_fn<'a, 'b>( }, // lint_name:: name if matches!(mod_edit, ModEdit::Rename) && name == old_name => { - let name_end = searcher.pos(); - if searcher.match_tokens(&[Token::DoubleColon], &mut []) { + let name_end = cursor.pos(); + if cursor.match_pat(cursor::Pat::DoubleColon) { dst.push_str(&src[copy_pos as usize..match_start as usize]); dst.push_str(new_name); copy_pos = name_end; @@ -356,36 +350,36 @@ fn file_update_fn<'a, 'b>( }; dst.push_str(&src[copy_pos as usize..match_start as usize]); dst.push_str(replacement); - copy_pos = searcher.pos(); + copy_pos = cursor.pos(); changed = true; }, } }, // //~ lint_name TokenKind::LineComment { doc_style: None } => { - let text = searcher.peek_text(); + let text = cursor.peek_text(); if text.starts_with("//~") && let Some(text) = text.strip_suffix(old_name) && !text.ends_with(|c| matches!(c, 'a'..='z' | 'A'..='Z' | '0'..='9' | '_')) { - dst.push_str(&src[copy_pos as usize..searcher.pos() as usize + text.len()]); + dst.push_str(&src[copy_pos as usize..cursor.pos() as usize + text.len()]); dst.push_str(new_name); - copy_pos = searcher.pos() + searcher.peek_len(); + copy_pos = cursor.pos() + cursor.peek_len(); changed = true; } - searcher.step(); + cursor.step(); }, // ::lint_name TokenKind::Colon - if searcher.match_tokens(&[Token::DoubleColon, Token::CaptureIdent], &mut [&mut capture]) - && capture == old_name => + if cursor.match_all(&[cursor::Pat::DoubleColon, cursor::Pat::CaptureIdent], &mut captures) + && cursor.get_text(captures[0]) == old_name => { - dst.push_str(&src[copy_pos as usize..searcher.pos() as usize - capture.len()]); + dst.push_str(&src[copy_pos as usize..captures[0].pos as usize]); dst.push_str(new_name); - copy_pos = searcher.pos(); + copy_pos = cursor.pos(); changed = true; }, - _ => searcher.step(), + _ => cursor.step(), } } diff --git a/clippy_dev/src/update_lints.rs b/clippy_dev/src/update_lints.rs index 5f6e874ffe25..ef841891721d 100644 --- a/clippy_dev/src/update_lints.rs +++ b/clippy_dev/src/update_lints.rs @@ -1,13 +1,10 @@ -use crate::utils::{ - ErrAction, File, FileUpdater, RustSearcher, Token, UpdateMode, UpdateStatus, expect_action, update_text_region_fn, -}; +use crate::parse::cursor::Cursor; +use crate::parse::{DeprecatedLint, Lint, RenamedLint, find_lint_decls, read_deprecated_lints}; +use crate::utils::{FileUpdater, UpdateMode, UpdateStatus, update_text_region_fn}; use itertools::Itertools; use std::collections::HashSet; use std::fmt::Write; -use std::fs; -use std::ops::Range; -use std::path::{self, Path, PathBuf}; -use walkdir::{DirEntry, WalkDir}; +use std::path::{self, Path}; const GENERATED_FILE_COMMENT: &str = "// This file was generated by `cargo dev update_lints`.\n\ // Use that command to update this file and do not edit by hand.\n\ @@ -79,13 +76,13 @@ pub fn generate_lint_files( update_mode, "clippy_lints/src/deprecated_lints.rs", &mut |_, src, dst| { - let mut searcher = RustSearcher::new(src); + let mut cursor = Cursor::new(src); assert!( - searcher.find_token(Token::Ident("declare_with_version")) - && searcher.find_token(Token::Ident("declare_with_version")), + cursor.find_ident("declare_with_version").is_some() + && cursor.find_ident("declare_with_version").is_some(), "error reading deprecated lints" ); - dst.push_str(&src[..searcher.pos() as usize]); + dst.push_str(&src[..cursor.pos() as usize]); dst.push_str("! { DEPRECATED(DEPRECATED_VERSION) = [\n"); for lint in deprecated { write!( @@ -200,260 +197,3 @@ pub fn generate_lint_files( fn round_to_fifty(count: usize) -> usize { count / 50 * 50 } - -/// Lint data parsed from the Clippy source code. -#[derive(PartialEq, Eq, Debug)] -pub struct Lint { - pub name: String, - pub group: String, - pub module: String, - pub path: PathBuf, - pub declaration_range: Range, -} - -pub struct DeprecatedLint { - pub name: String, - pub reason: String, - pub version: String, -} - -pub struct RenamedLint { - pub old_name: String, - pub new_name: String, - pub version: String, -} - -/// Finds all lint declarations (`declare_clippy_lint!`) -#[must_use] -pub fn find_lint_decls() -> Vec { - let mut lints = Vec::with_capacity(1000); - let mut contents = String::new(); - for e in expect_action(fs::read_dir("."), ErrAction::Read, ".") { - let e = expect_action(e, ErrAction::Read, "."); - if !expect_action(e.file_type(), ErrAction::Read, ".").is_dir() { - continue; - } - let Ok(mut name) = e.file_name().into_string() else { - continue; - }; - if name.starts_with("clippy_lints") && name != "clippy_lints_internal" { - name.push_str("/src"); - for (file, module) in read_src_with_module(name.as_ref()) { - parse_clippy_lint_decls( - file.path(), - File::open_read_to_cleared_string(file.path(), &mut contents), - &module, - &mut lints, - ); - } - } - } - lints.sort_by(|lhs, rhs| lhs.name.cmp(&rhs.name)); - lints -} - -/// Reads the source files from the given root directory -fn read_src_with_module(src_root: &Path) -> impl use<'_> + Iterator { - WalkDir::new(src_root).into_iter().filter_map(move |e| { - let e = expect_action(e, ErrAction::Read, src_root); - let path = e.path().as_os_str().as_encoded_bytes(); - if let Some(path) = path.strip_suffix(b".rs") - && let Some(path) = path.get(src_root.as_os_str().len() + 1..) - { - if path == b"lib" { - Some((e, String::new())) - } else { - let path = if let Some(path) = path.strip_suffix(b"mod") - && let Some(path) = path.strip_suffix(b"/").or_else(|| path.strip_suffix(b"\\")) - { - path - } else { - path - }; - if let Ok(path) = str::from_utf8(path) { - let path = path.replace(['/', '\\'], "::"); - Some((e, path)) - } else { - None - } - } - } else { - None - } - }) -} - -/// Parse a source file looking for `declare_clippy_lint` macro invocations. -fn parse_clippy_lint_decls(path: &Path, contents: &str, module: &str, lints: &mut Vec) { - #[allow(clippy::enum_glob_use)] - use Token::*; - #[rustfmt::skip] - static DECL_TOKENS: &[Token<'_>] = &[ - // !{ /// docs - Bang, OpenBrace, AnyComment, - // #[clippy::version = "version"] - Pound, OpenBracket, Ident("clippy"), DoubleColon, Ident("version"), Eq, LitStr, CloseBracket, - // pub NAME, GROUP, - Ident("pub"), CaptureIdent, Comma, AnyComment, CaptureIdent, Comma, - ]; - - let mut searcher = RustSearcher::new(contents); - while searcher.find_token(Ident("declare_clippy_lint")) { - let start = searcher.pos() as usize - "declare_clippy_lint".len(); - let (mut name, mut group) = ("", ""); - if searcher.match_tokens(DECL_TOKENS, &mut [&mut name, &mut group]) && searcher.find_token(CloseBrace) { - lints.push(Lint { - name: name.to_lowercase(), - group: group.into(), - module: module.into(), - path: path.into(), - declaration_range: start..searcher.pos() as usize, - }); - } - } -} - -#[must_use] -pub fn read_deprecated_lints() -> (Vec, Vec) { - #[allow(clippy::enum_glob_use)] - use Token::*; - #[rustfmt::skip] - static DECL_TOKENS: &[Token<'_>] = &[ - // #[clippy::version = "version"] - Pound, OpenBracket, Ident("clippy"), DoubleColon, Ident("version"), Eq, CaptureLitStr, CloseBracket, - // ("first", "second"), - OpenParen, CaptureLitStr, Comma, CaptureLitStr, CloseParen, Comma, - ]; - #[rustfmt::skip] - static DEPRECATED_TOKENS: &[Token<'_>] = &[ - // !{ DEPRECATED(DEPRECATED_VERSION) = [ - Bang, OpenBrace, Ident("DEPRECATED"), OpenParen, Ident("DEPRECATED_VERSION"), CloseParen, Eq, OpenBracket, - ]; - #[rustfmt::skip] - static RENAMED_TOKENS: &[Token<'_>] = &[ - // !{ RENAMED(RENAMED_VERSION) = [ - Bang, OpenBrace, Ident("RENAMED"), OpenParen, Ident("RENAMED_VERSION"), CloseParen, Eq, OpenBracket, - ]; - - let path = "clippy_lints/src/deprecated_lints.rs"; - let mut deprecated = Vec::with_capacity(30); - let mut renamed = Vec::with_capacity(80); - let mut contents = String::new(); - File::open_read_to_cleared_string(path, &mut contents); - - let mut searcher = RustSearcher::new(&contents); - - // First instance is the macro definition. - assert!( - searcher.find_token(Ident("declare_with_version")), - "error reading deprecated lints" - ); - - if searcher.find_token(Ident("declare_with_version")) && searcher.match_tokens(DEPRECATED_TOKENS, &mut []) { - let mut version = ""; - let mut name = ""; - let mut reason = ""; - while searcher.match_tokens(DECL_TOKENS, &mut [&mut version, &mut name, &mut reason]) { - deprecated.push(DeprecatedLint { - name: parse_str_single_line(path.as_ref(), name), - reason: parse_str_single_line(path.as_ref(), reason), - version: parse_str_single_line(path.as_ref(), version), - }); - } - } else { - panic!("error reading deprecated lints"); - } - - if searcher.find_token(Ident("declare_with_version")) && searcher.match_tokens(RENAMED_TOKENS, &mut []) { - let mut version = ""; - let mut old_name = ""; - let mut new_name = ""; - while searcher.match_tokens(DECL_TOKENS, &mut [&mut version, &mut old_name, &mut new_name]) { - renamed.push(RenamedLint { - old_name: parse_str_single_line(path.as_ref(), old_name), - new_name: parse_str_single_line(path.as_ref(), new_name), - version: parse_str_single_line(path.as_ref(), version), - }); - } - } else { - panic!("error reading renamed lints"); - } - - deprecated.sort_by(|lhs, rhs| lhs.name.cmp(&rhs.name)); - renamed.sort_by(|lhs, rhs| lhs.old_name.cmp(&rhs.old_name)); - (deprecated, renamed) -} - -/// Removes the line splices and surrounding quotes from a string literal -fn parse_str_lit(s: &str) -> String { - let s = s.strip_prefix("r").unwrap_or(s).trim_matches('#'); - let s = s - .strip_prefix('"') - .and_then(|s| s.strip_suffix('"')) - .unwrap_or_else(|| panic!("expected quoted string, found `{s}`")); - let mut res = String::with_capacity(s.len()); - rustc_literal_escaper::unescape_str(s, &mut |_, ch| { - if let Ok(ch) = ch { - res.push(ch); - } - }); - res -} - -fn parse_str_single_line(path: &Path, s: &str) -> String { - let value = parse_str_lit(s); - assert!( - !value.contains('\n'), - "error parsing `{}`: `{s}` should be a single line string", - path.display(), - ); - value -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_parse_clippy_lint_decls() { - static CONTENTS: &str = r#" - declare_clippy_lint! { - #[clippy::version = "Hello Clippy!"] - pub PTR_ARG, - style, - "really long \ - text" - } - - declare_clippy_lint!{ - #[clippy::version = "Test version"] - pub DOC_MARKDOWN, - pedantic, - "single line" - } - "#; - let mut result = Vec::new(); - parse_clippy_lint_decls("".as_ref(), CONTENTS, "module_name", &mut result); - for r in &mut result { - r.declaration_range = Range::default(); - } - - let expected = vec![ - Lint { - name: "ptr_arg".into(), - group: "style".into(), - module: "module_name".into(), - path: PathBuf::new(), - declaration_range: Range::default(), - }, - Lint { - name: "doc_markdown".into(), - group: "pedantic".into(), - module: "module_name".into(), - path: PathBuf::new(), - declaration_range: Range::default(), - }, - ]; - assert_eq!(expected, result); - } -} diff --git a/clippy_dev/src/utils.rs b/clippy_dev/src/utils.rs index 057951d0e33b..526613a53c77 100644 --- a/clippy_dev/src/utils.rs +++ b/clippy_dev/src/utils.rs @@ -1,9 +1,7 @@ use core::fmt::{self, Display}; use core::num::NonZero; -use core::ops::Range; -use core::slice; +use core::range::Range; use core::str::FromStr; -use rustc_lexer::{self as lexer, FrontmatterAllowed}; use std::ffi::OsStr; use std::fs::{self, OpenOptions}; use std::io::{self, Read as _, Seek as _, SeekFrom, Write}; @@ -410,179 +408,6 @@ pub fn update_text_region_fn( move |path, src, dst| update_text_region(path, start, end, src, dst, &mut insert) } -#[derive(Clone, Copy)] -pub enum Token<'a> { - /// Matches any number of comments / doc comments. - AnyComment, - Ident(&'a str), - CaptureIdent, - LitStr, - CaptureLitStr, - Bang, - CloseBrace, - CloseBracket, - CloseParen, - /// This will consume the first colon even if the second doesn't exist. - DoubleColon, - Comma, - Eq, - Lifetime, - Lt, - Gt, - OpenBrace, - OpenBracket, - OpenParen, - Pound, - Semi, -} - -pub struct RustSearcher<'txt> { - text: &'txt str, - cursor: lexer::Cursor<'txt>, - pos: u32, - next_token: lexer::Token, -} -impl<'txt> RustSearcher<'txt> { - #[must_use] - #[expect(clippy::inconsistent_struct_constructor)] - pub fn new(text: &'txt str) -> Self { - let mut cursor = lexer::Cursor::new(text, FrontmatterAllowed::Yes); - Self { - text, - pos: 0, - next_token: cursor.advance_token(), - cursor, - } - } - - #[must_use] - pub fn peek_text(&self) -> &'txt str { - &self.text[self.pos as usize..(self.pos + self.next_token.len) as usize] - } - - #[must_use] - pub fn peek_len(&self) -> u32 { - self.next_token.len - } - - #[must_use] - pub fn peek(&self) -> lexer::TokenKind { - self.next_token.kind - } - - #[must_use] - pub fn pos(&self) -> u32 { - self.pos - } - - #[must_use] - pub fn at_end(&self) -> bool { - self.next_token.kind == lexer::TokenKind::Eof - } - - pub fn step(&mut self) { - // `next_len` is zero for the sentinel value and the eof marker. - self.pos += self.next_token.len; - self.next_token = self.cursor.advance_token(); - } - - /// Consumes the next token if it matches the requested value and captures the value if - /// requested. Returns true if a token was matched. - fn read_token(&mut self, token: Token<'_>, captures: &mut slice::IterMut<'_, &mut &'txt str>) -> bool { - loop { - match (token, self.next_token.kind) { - (_, lexer::TokenKind::Whitespace) - | ( - Token::AnyComment, - lexer::TokenKind::BlockComment { terminated: true, .. } | lexer::TokenKind::LineComment { .. }, - ) => self.step(), - (Token::AnyComment, _) => return true, - (Token::Bang, lexer::TokenKind::Bang) - | (Token::CloseBrace, lexer::TokenKind::CloseBrace) - | (Token::CloseBracket, lexer::TokenKind::CloseBracket) - | (Token::CloseParen, lexer::TokenKind::CloseParen) - | (Token::Comma, lexer::TokenKind::Comma) - | (Token::Eq, lexer::TokenKind::Eq) - | (Token::Lifetime, lexer::TokenKind::Lifetime { .. }) - | (Token::Lt, lexer::TokenKind::Lt) - | (Token::Gt, lexer::TokenKind::Gt) - | (Token::OpenBrace, lexer::TokenKind::OpenBrace) - | (Token::OpenBracket, lexer::TokenKind::OpenBracket) - | (Token::OpenParen, lexer::TokenKind::OpenParen) - | (Token::Pound, lexer::TokenKind::Pound) - | (Token::Semi, lexer::TokenKind::Semi) - | ( - Token::LitStr, - lexer::TokenKind::Literal { - kind: lexer::LiteralKind::Str { terminated: true } | lexer::LiteralKind::RawStr { .. }, - .. - }, - ) => { - self.step(); - return true; - }, - (Token::Ident(x), lexer::TokenKind::Ident) if x == self.peek_text() => { - self.step(); - return true; - }, - (Token::DoubleColon, lexer::TokenKind::Colon) => { - self.step(); - if !self.at_end() && matches!(self.next_token.kind, lexer::TokenKind::Colon) { - self.step(); - return true; - } - return false; - }, - ( - Token::CaptureLitStr, - lexer::TokenKind::Literal { - kind: lexer::LiteralKind::Str { terminated: true } | lexer::LiteralKind::RawStr { .. }, - .. - }, - ) - | (Token::CaptureIdent, lexer::TokenKind::Ident) => { - **captures.next().unwrap() = self.peek_text(); - self.step(); - return true; - }, - _ => return false, - } - } - } - - #[must_use] - pub fn find_token(&mut self, token: Token<'_>) -> bool { - let mut capture = [].iter_mut(); - while !self.read_token(token, &mut capture) { - self.step(); - if self.at_end() { - return false; - } - } - true - } - - #[must_use] - pub fn find_capture_token(&mut self, token: Token<'_>) -> Option<&'txt str> { - let mut res = ""; - let mut capture = &mut res; - let mut capture = slice::from_mut(&mut capture).iter_mut(); - while !self.read_token(token, &mut capture) { - self.step(); - if self.at_end() { - return None; - } - } - Some(res) - } - - #[must_use] - pub fn match_tokens(&mut self, tokens: &[Token<'_>], captures: &mut [&mut &'txt str]) -> bool { - let mut captures = captures.iter_mut(); - tokens.iter().all(|&t| self.read_token(t, &mut captures)) - } -} - #[track_caller] pub fn try_rename_file(old_name: &Path, new_name: &Path) -> bool { match OpenOptions::new().create_new(true).write(true).open(new_name) {