diff --git a/harper-asciidoc/tests/test_sources/comment.adoc b/harper-asciidoc/tests/test_sources/comment.adoc index 52fe8c313..493d4a3a2 100644 --- a/harper-asciidoc/tests/test_sources/comment.adoc +++ b/harper-asciidoc/tests/test_sources/comment.adoc @@ -1,2 +1,2 @@ // This is a comment with a typo: spelll -// Another line of comment. +// Another line of the same comment. diff --git a/harper-asciidoc/tests/test_sources/table.adoc b/harper-asciidoc/tests/test_sources/table.adoc index d0ad346dd..30a995987 100644 --- a/harper-asciidoc/tests/test_sources/table.adoc +++ b/harper-asciidoc/tests/test_sources/table.adoc @@ -1,3 +1,3 @@ |=== -| Cell 1 | Cell 2 with typo: errorr +| Cell 1 | Cell 2, but with a typo: errorr |=== diff --git a/harper-comments/tests/language_support_sources/basic.clj b/harper-comments/tests/language_support_sources/basic.clj index bf233632d..492cb47c8 100644 --- a/harper-comments/tests/language_support_sources/basic.clj +++ b/harper-comments/tests/language_support_sources/basic.clj @@ -57,7 +57,7 @@ ;;;; Frob Grovel -;;; This section of code has some important implications: +;;; This section of the code has some important implications: ;;; 1. Foo. ;;; 2. Bar. ;;; 3. Baz. diff --git a/harper-comments/tests/language_support_sources/clean.rs b/harper-comments/tests/language_support_sources/clean.rs index 3eab8feae..d79e8b11c 100644 --- a/harper-comments/tests/language_support_sources/clean.rs +++ b/harper-comments/tests/language_support_sources/clean.rs @@ -11,14 +11,14 @@ impl TestStruct { /// It has another [link](https://example.com) embedded inside fn test_function() {} - /// This is some gibberish to try to trigger a lint for sentences that continue for too long + /// This is some gibberish to try to trigger a lint for the sentences that continue for too long /// - /// This is some gibberish to try to trigger a lint for sentences that continue for too long + /// This is some gibberish to try to trigger a lint for the sentences that continue for too long /// - /// This is some gibberish to try to trigger a lint for sentences that continue for too long + /// This is some gibberish to try to trigger a lint for the sentences that continue for too long /// - /// This is some gibberish to try to trigger a lint for sentences that continue for too long + /// This is some gibberish to try to trigger a lint for the sentences that continue for too long /// - /// This is some gibberish to try to trigger a lint for sentences that continue for too long + /// This is some gibberish to try to trigger a lint for the sentences that continue for too long } diff --git a/harper-comments/tests/language_support_sources/clean.sol b/harper-comments/tests/language_support_sources/clean.sol index 6783503fb..5fe225aaa 100644 --- a/harper-comments/tests/language_support_sources/clean.sol +++ b/harper-comments/tests/language_support_sources/clean.sol @@ -14,13 +14,13 @@ contract TestContract { */ function testFunction2(uint256 p) external {} - // This is some gibberish to try to trigger a lint for sentences that continue for too long + // This is some gibberish to try to trigger a lint for the sentences that continue for too long // - // This is some gibberish to try to trigger a lint for sentences that continue for too long + // This is some gibberish to try to trigger a lint for the sentences that continue for too long // - // This is some gibberish to try to trigger a lint for sentences that continue for too long + // This is some gibberish to try to trigger a lint for the sentences that continue for too long // - // This is some gibberish to try to trigger a lint for sentences that continue for too long + // This is some gibberish to try to trigger a lint for the sentences that continue for too long // - // This is some gibberish to try to trigger a lint for sentences that continue for too long + // This is some gibberish to try to trigger a lint for the sentences that continue for too long } diff --git a/harper-core/src/linting/weir_rules/MissingDeterminer.weir b/harper-core/src/linting/weir_rules/MissingDeterminer.weir new file mode 100644 index 000000000..00eb45626 --- /dev/null +++ b/harper-core/src/linting/weir_rules/MissingDeterminer.weir @@ -0,0 +1,73 @@ +expr techNoun [bug, case, change, comment, feature, fix, log, note, problem, reason, report, repro, reproduction, request, response, scenario, screenshot, solution, step, summary, test, ticket, answer, example, explanation, idea, issue, update] +expr requestNounHead @techNoun +expr requestBareNounPhrase [@requestNounHead, (ADJ @requestNounHead), (ADJ ADJ @requestNounHead), (ADV ADJ @requestNounHead), (ADV ADJ ADJ @requestNounHead), (@requestNounHead @requestNounHead), (ADJ @requestNounHead @requestNounHead), (ADV ADJ @requestNounHead @requestNounHead)] +expr narrativeNoun [portrait, nest, glass, hand, coin, toy, victor] +expr narrativeBareNounPhrase [@narrativeNoun, (ADJ @narrativeNoun)] + +expr requestMissingDet <([get, provide, give, send, share, attach, include, add, need, want, see, submit, create, report, file, reproduce] @requestBareNounPhrase), ( )> +expr narrativeVerbObjectMissingDet <([painted, built, dropped, raised, found, hid, cheered] @narrativeBareNounPhrase), ( )> +expr narrativePrepObjectMissingDet <([in, on] [studio, tree, floor]), ( )> + +expr main [@requestMissingDet, @narrativeVerbObjectMissingDet, @narrativePrepObjectMissingDet] + +let message "Add a determiner before this noun phrase." +let description "Detects likely missing determiners in common request phrases and offers to insert one where necessary." +let kind "Grammar" +let becomes [" the ", " a ", " an "] + +test "would it be possible to get reproducible example of this?" "would it be possible to get a reproducible example of this?" +test "Would it be possible to get reproducible bug report?" "Would it be possible to get a reproducible bug report?" +test "Please provide detailed reproduction of this issue." "Please provide a detailed reproduction of this issue." +test "Can you send minimal test case?" "Can you send a minimal test case?" +test "We need quick response." "We need a quick response." +test "I can attach short log." "I can attach a short log." +test "Please share minimal repro." "Please share a minimal repro." +test "Could you submit small change?" "Could you submit a small change?" +test "Please provide reproducible example, thanks." "Please provide a reproducible example, thanks." +test "We should create clear summary." "We should create a clear summary." +test "Please provide the report." "Please provide the report." +test "Please provide your report." "Please provide your report." +test "Please provide another report." "Please provide another report." +test "Please provide more detailed report." "Please provide a more detailed report." +test "We can file short ticket." "We can file a short ticket." +test "Could you reproduce minimal scenario?" "Could you reproduce a minimal scenario?" +test "Please send clear explanation." "Please send a clear explanation." +test "We need simple fix." "We need a simple fix." +test "I want quick update." "I want a quick update." +test "Could you share detailed response?" "Could you share a detailed response?" +test "Please attach short screenshot." "Please attach a short screenshot." +test "We should include short note." "We should include a short note." +test "Please give minimal reproduction." "Please give a minimal reproduction." +test "Can you add brief comment?" "Can you add a brief comment?" +test "We want new feature." "We want a new feature." +test "They need clear solution." "They need a clear solution." + +allows "Please provide an example of this." +allows "Please provide the example." +allows "Please provide your example." +allows "Please provide another example." +test "We want detailed explanation." "We want a detailed explanation." +test "We need quick answer." "We need a quick answer." +test "Please send clear update." "Please send a clear update." +test "Please provide brief summary." "Please provide a brief summary." + +test "The artist painted portrait in studio." "The artist painted a portrait in the studio." +test "The bird built nest in tree." "The bird built a nest in the tree." +test "The child dropped glass on floor." "The child dropped the glass on the floor." +test "The student raised hand quietly." "The student raised a hand quietly." +test "The child found coin outside." "The child found a coin outside." +test "The child hid toy nearby." "The child hid a toy nearby." +test "The crowd cheered victor loudly." "The crowd cheered the victor loudly." + +allows "Let's do this for good measure." +allows "This is a test to make sure we don't split up paragraphs on newlines." +allows "This URL is used by the console to properly generate URLs when using the Artisan command line tool." +allows "The timezone is set to \"UTC\" by default as it is suitable for most use cases." +allows "This option can be set to any locale for which you plan to have translation strings." +allows "Use it to show ownership." +allows "This rule attempts to find common errors with redundancy and contractions that may lead to confusion for readers." +allows "ACF is a powerful tool that allows you to add custom fields to your content, providing greater flexibility in how you manage and display information." +allows "Historical records, colonial archives (however problematic their provenance), and oral histories from surviving communities, even if fragmented and distorted, provide crucial data points." +allows "Traditional cartography relies on observable features – mountains, rivers, coastlines – to create representations of space." +allows "My grandfather built timepieces to mark the passage of moments." +allows "I made a note to encourage Eleanor to share more stories with him; reminiscing often proved beneficial for patients struggling with respiratory distress." diff --git a/harper-core/src/weir/ast.rs b/harper-core/src/weir/ast.rs index 5d7bcc985..d22028c74 100644 --- a/harper-core/src/weir/ast.rs +++ b/harper-core/src/weir/ast.rs @@ -1,10 +1,13 @@ use harper_brill::UPOS; +use hashbrown::HashMap; use is_macro::Is; use itertools::Itertools; use crate::expr::{Expr, Filter, FirstMatchOf, SequenceExpr, UnlessStep}; use crate::patterns::{AnyPattern, DerivedFrom, UPOSSet, WhitespacePattern, Word}; -use crate::{CharString, Punctuation, Token}; +use crate::{CharString, CharStringExt, Lrc, Punctuation, Token}; + +use super::Error; #[derive(Debug, Clone, Eq, PartialEq)] pub struct Ast { @@ -62,6 +65,18 @@ impl Ast { _ => None, }) } + + /// Iterate through all the expressions in the tree, starting with the one first declared in the + /// tree. + pub fn iter_exprs(&self) -> impl Iterator { + self.stmts.iter().filter_map(|stmt| { + if let AstStmtNode::SetExpr { name, value } = stmt { + Some((name.as_str(), value)) + } else { + None + } + }) + } } /// A node that represents an expression that can be used to search through natural language. @@ -78,53 +93,66 @@ pub enum AstExprNode { Seq(Vec), Arr(Vec), Filter(Vec), + ExprRef(CharString), Anything, } impl AstExprNode { /// Create an actual expression that fulfills the pattern matching contract defined by this tree. - pub fn to_expr(&self) -> Box { + /// + /// Requires a map of all expressions currently in the context. + pub fn to_expr( + &self, + ctx_exprs: &HashMap>>, + ) -> Result, Error> { match self { - AstExprNode::Anything => Box::new(AnyPattern), - AstExprNode::Progressive => { - Box::new(|tok: &Token, _: &[char]| tok.kind.is_verb_progressive_form()) - } - AstExprNode::UPOSSet(upos) => Box::new(UPOSSet::new(upos)), - AstExprNode::Whitespace => Box::new(WhitespacePattern), - AstExprNode::Word(word) => Box::new(Word::from_chars(word)), - AstExprNode::DerivativeOf(word) => Box::new(DerivedFrom::new_from_chars(word)), - AstExprNode::Not(ast_node) => Box::new(UnlessStep::new( - ast_node.to_expr(), + AstExprNode::Anything => Ok(Box::new(AnyPattern)), + AstExprNode::Progressive => Ok(Box::new(|tok: &Token, _: &[char]| { + tok.kind.is_verb_progressive_form() + })), + AstExprNode::UPOSSet(upos) => Ok(Box::new(UPOSSet::new(upos))), + AstExprNode::Whitespace => Ok(Box::new(WhitespacePattern)), + AstExprNode::Word(word) => Ok(Box::new(Word::from_chars(word))), + AstExprNode::DerivativeOf(word) => Ok(Box::new(DerivedFrom::new_from_chars(word))), + AstExprNode::Not(ast_node) => Ok(Box::new(UnlessStep::new( + ast_node.to_expr(ctx_exprs)?, |_tok: &Token, _: &[char]| true, - )), + )) as Box), AstExprNode::Seq(children) => { let mut expr = SequenceExpr::default(); for node in children { - expr = expr.then_boxed(node.to_expr()); + expr = expr.then_boxed(node.to_expr(ctx_exprs)?); } - Box::new(expr) + Ok(Box::new(expr)) } AstExprNode::Arr(children) => { let mut expr = FirstMatchOf::default(); for node in children { - expr.add_boxed(node.to_expr()); + expr.add_boxed(node.to_expr(ctx_exprs)?); } - Box::new(expr) - } - AstExprNode::Filter(children) => { - Box::new(Filter::new(children.iter().map(|n| n.to_expr()).collect())) + Ok(Box::new(expr)) } + AstExprNode::Filter(children) => Ok(Box::new(Filter::new( + children + .iter() + .map(|n| n.to_expr(ctx_exprs)) + .process_results(|iter| iter.collect())?, + ))), AstExprNode::Punctuation(punct) => { let punct = *punct; - Box::new(move |tok: &Token, _: &[char]| { + Ok(Box::new(move |tok: &Token, _: &[char]| { tok.kind.as_punctuation().is_some_and(|p| *p == punct) - }) + })) } + AstExprNode::ExprRef(name) => ctx_exprs + .get(&name.to_string()) + .map(|e| Box::new(e.clone()) as Box) + .ok_or_else(|| Error::UnableToResolveExpr(name.to_string())), } } } diff --git a/harper-core/src/weir/error.rs b/harper-core/src/weir/error.rs index 9322bf040..c60650a3b 100644 --- a/harper-core/src/weir/error.rs +++ b/harper-core/src/weir/error.rs @@ -10,7 +10,7 @@ pub enum Error { UnmatchedBrace, #[error("Expected a comma here.")] ExpectedComma, - #[error("Expected a valid keyword.")] + #[error("Expected a valid keyword. Got: {0}")] UnexpectedToken(String), #[error("Expected a value to be defined.")] ExpectedVariableUndefined, @@ -20,4 +20,6 @@ pub enum Error { InvalidReplacementStrategy, #[error("Expected a variable type other than the one provided.")] ExpectedDifferentVariableType, + #[error("Unable to resolve expression reference {0}")] + UnableToResolveExpr(String), } diff --git a/harper-core/src/weir/mod.rs b/harper-core/src/weir/mod.rs index ccc797c62..fb4caf682 100644 --- a/harper-core/src/weir/mod.rs +++ b/harper-core/src/weir/mod.rs @@ -6,10 +6,12 @@ mod error; mod optimize; mod parsing; +use std::collections::VecDeque; use std::str::FromStr; use std::sync::Arc; pub use error::Error; +use hashbrown::{HashMap, HashSet}; use is_macro::Is; use parsing::{parse_expr_str, parse_str}; use strum_macros::{AsRefStr, EnumString}; @@ -18,13 +20,13 @@ use crate::expr::Expr; use crate::linting::{Chunk, ExprLinter, Lint, LintKind, Linter, Suggestion}; use crate::parsers::Markdown; use crate::spell::FstDictionary; -use crate::{Document, Token, TokenStringExt}; +use crate::{Document, Lrc, Token, TokenStringExt}; use self::ast::{Ast, AstVariable}; pub(crate) fn weir_expr_to_expr(weir_code: &str) -> Result, Error> { let ast = parse_expr_str(weir_code, true)?; - Ok(ast.to_expr()) + ast.to_expr(&HashMap::new()) } #[derive(Debug, Is, EnumString, AsRefStr)] @@ -40,7 +42,7 @@ pub struct TestResult { } pub struct WeirLinter { - expr: Box, + expr: Lrc>, description: String, message: String, strategy: ReplacementStrategy, @@ -60,10 +62,11 @@ impl WeirLinter { let replacement_name = "becomes"; let replacement_strat_name = "strategy"; - let expr = ast - .get_expr(main_expr_name) - .ok_or(Error::ExpectedVariableUndefined)? - .to_expr(); + let resolved = resolve_exprs(&ast)?; + + let expr = resolved + .get(main_expr_name) + .ok_or(Error::ExpectedVariableUndefined)?; let description = ast .get_variable_value(description_name) @@ -124,7 +127,7 @@ impl WeirLinter { let linter = WeirLinter { strategy: replacement_strat, ast, - expr, + expr: expr.clone(), lint_kind, description, message, @@ -141,6 +144,54 @@ impl WeirLinter { /// Runs the tests defined in the source code, returning any failing results. pub fn run_tests(&mut self) -> Vec { + fn apply_nth_suggestion(text: &str, lint: &Lint, n: usize) -> Option { + let suggestion = lint.suggestions.get(n)?; + let mut text_chars: Vec = text.chars().collect(); + suggestion.apply(lint.span, &mut text_chars); + Some(text_chars.iter().collect()) + } + + fn transform_top3_to_expected( + text: &str, + expected: &str, + linter: &mut impl Linter, + ) -> Option { + let mut queue: VecDeque<(String, usize)> = VecDeque::new(); + let mut seen: HashSet = HashSet::new(); + + queue.push_back((text.to_string(), 0)); + seen.insert(text.to_string()); + + while let Some((current, depth)) = queue.pop_front() { + if current == expected { + return Some(current); + } + + if depth >= 100 { + continue; + } + + let doc = Document::new_from_vec( + current.chars().collect::>().into(), + &Markdown::default(), + &FstDictionary::curated(), + ); + let lints = linter.lint(&doc); + + if let Some(lint) = lints.first() { + for i in 0..3 { + if let Some(next) = apply_nth_suggestion(¤t, lint, i) + && seen.insert(next.clone()) + { + queue.push_back((next, depth + 1)); + } + } + } + } + + None + } + fn transform_nth_str(text: &str, linter: &mut impl Linter, n: usize) -> String { let mut text_chars: Vec = text.chars().collect(); let mut iter_count = 0; @@ -190,27 +241,7 @@ impl WeirLinter { .collect(); for (text, expected) in tests { - if text == expected && lint_count(&text, self) != 0 { - results.push(TestResult { - expected: text.to_string(), - got: text.to_string(), - }); - continue; - } - - let zeroth = transform_nth_str(&text, self, 0); - let first = transform_nth_str(&text, self, 1); - let second = transform_nth_str(&text, self, 2); - - let matched = if zeroth == expected { - Some(zeroth.clone()) - } else if first == expected { - Some(first.clone()) - } else if second == expected { - Some(second.clone()) - } else { - None - }; + let matched = transform_top3_to_expected(&text, &expected, self); match matched { Some(result) => { @@ -225,7 +256,7 @@ impl WeirLinter { } None => results.push(TestResult { expected: expected.to_string(), - got: zeroth, + got: transform_nth_str(&text, self, 0), }), } } @@ -272,6 +303,17 @@ impl ExprLinter for WeirLinter { } } +fn resolve_exprs(ast: &Ast) -> Result>>, Error> { + let mut resolved_exprs = HashMap::new(); + + for (name, val) in ast.iter_exprs() { + let expr = val.to_expr(&resolved_exprs)?; + resolved_exprs.insert(name.to_owned(), Lrc::new(expr)); + } + + Ok(resolved_exprs) +} + #[cfg(test)] pub mod tests { use quickcheck_macros::quickcheck; @@ -335,6 +377,51 @@ pub mod tests { assert_eq!(5, linter.count_tests()); } + #[test] + fn g_suite_with_refs() { + let source = r#" + expr a (G [Suite, Suit]) + expr b (Google Apps For Work) + expr incorrect [@a, @b] + + expr main @incorrect + let message "Use the updated brand." + let description "`G Suite` or `Google Apps for Work` is now called `Google Workspace`" + let kind "Miscellaneous" + let becomes "Google Workspace" + let strategy "Exact" + + test "We migrated from G Suite last year." "We migrated from Google Workspace last year." + test "This account is still labeled as Google Apps for Work." "This account is still labeled as Google Workspace." + test "The pricing page mentions G Suit for legacy plans." "The pricing page mentions Google Workspace for legacy plans." + test "New customers sign up for Google Workspace." "New customers sign up for Google Workspace." + "#; + + let mut linter = WeirLinter::new(source).unwrap(); + + assert_passes_all(&mut linter); + assert_eq!(4, linter.count_tests()); + } + + #[test] + fn fails_on_unresolved_expr() { + let source = r#" + expr main @missing + let message "" + let description "" + let kind "Miscellaneous" + let becomes "" + let strategy "Exact" + "#; + + let res = WeirLinter::new(source); + + assert_eq!( + res.err().unwrap(), + Error::UnableToResolveExpr("missing".to_string()) + ) + } + #[test] fn wildcard() { let source = r#" diff --git a/harper-core/src/weir/parsing/expr.rs b/harper-core/src/weir/parsing/expr.rs index e12727290..642d83863 100644 --- a/harper-core/src/weir/parsing/expr.rs +++ b/harper-core/src/weir/parsing/expr.rs @@ -47,6 +47,12 @@ fn parse_single_expr(tokens: &[Token], source: &[char]) -> Result Ok(FoundNode::new(AstExprNode::Whitespace, 1)), + // The expr ref notation + TokenKind::Punctuation(Punctuation::At) => { + let name_tok = tokens.get(1).ok_or(Error::EndOfInput)?; + let name = name_tok.span.get_content(source); + Ok(FoundNode::new(AstExprNode::ExprRef(name.into()), 2)) + } // The derivation notation. TokenKind::Punctuation(Punctuation::Currency(Currency::Dollar)) => { let word_tok = tokens.get(1).ok_or(Error::EndOfInput)?; @@ -458,6 +464,26 @@ mod tests { ) } + #[test] + fn parses_expr_ref() { + assert_eq!( + parse_expr_str("@test", true).unwrap(), + AstExprNode::ExprRef(char_string!("test")) + ) + } + + #[test] + fn parses_expr_ref_array() { + assert_eq!( + parse_expr_str("[@a, @b, @c]", true).unwrap(), + AstExprNode::Arr(vec![ + AstExprNode::ExprRef(char_string!("a")), + AstExprNode::ExprRef(char_string!("b")), + AstExprNode::ExprRef(char_string!("c")) + ]) + ) + } + #[test] fn parses_anything() { assert_eq!(parse_expr_str("*", true).unwrap(), AstExprNode::Anything) diff --git a/packages/web/src/routes/docs/weir/+page.md b/packages/web/src/routes/docs/weir/+page.md index ae224e70e..15ebaa115 100644 --- a/packages/web/src/routes/docs/weir/+page.md +++ b/packages/web/src/routes/docs/weir/+page.md @@ -221,6 +221,16 @@ let kind "Punctuation" let becomes "-" ``` +### Expression References + +You can refer back to a previous expression you've defined using the `@` symbol. +This is useful for creating lists of words or patterns that might be used in multiple places in the rule. + +``` +expr vehicles [bikes, trains, automobiles] +expr main @vehicles aren't fast enough +``` + ## Replacement Strategies You can dictate how Harper will suggest a replacement using the `strategy` tag.