From ce545f47f8f59b4b04c9a0f02fffb999686e07cc Mon Sep 17 00:00:00 2001 From: hippietrail Date: Sun, 30 Nov 2025 15:43:09 +0800 Subject: [PATCH 1/6] =?UTF-8?q?feat:=20will=20ran=20=E2=86=92=20will=20run?= =?UTF-8?q?=20/=20ran=20work=20in=20progress?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- harper-core/src/expr/sequence_expr.rs | 24 +++- harper-core/src/linting/lint_group.rs | 10 +- harper-core/src/linting/mod.rs | 1 + harper-core/src/linting/will_non_lemma.rs | 145 ++++++++++++++++++++++ 4 files changed, 170 insertions(+), 10 deletions(-) create mode 100644 harper-core/src/linting/will_non_lemma.rs diff --git a/harper-core/src/expr/sequence_expr.rs b/harper-core/src/expr/sequence_expr.rs index 88dd06a66..ed23c6f8e 100644 --- a/harper-core/src/expr/sequence_expr.rs +++ b/harper-core/src/expr/sequence_expr.rs @@ -170,11 +170,6 @@ impl SequenceExpr { self.then(WordSet::new(words)) } - /// Matches any token whose `Kind` exactly matches. - pub fn then_strict(self, kind: TokenKind) -> Self { - self.then(move |tok: &Token, _source: &[char]| tok.kind == kind) - } - /// Match against one or more whitespace tokens. pub fn then_whitespace(self) -> Self { self.then(WhitespacePattern) @@ -229,7 +224,7 @@ impl SequenceExpr { /// Matches any word. pub fn then_any_word(self) -> Self { - self.then(|tok: &Token, _source: &[char]| tok.kind.is_word()) + self.then_kind_where(|kind| kind.is_word()) } /// Match examples of `word` that have any capitalization. @@ -266,6 +261,23 @@ impl SequenceExpr { // One kind + /// Matches any token whose `Kind` exactly matches. + pub fn then_kind(self, kind: TokenKind) -> Self { + self.then(move |tok: &Token, _source: &[char]| tok.kind == kind) + } + + /// Matches a token where the provided closure returns true for the token's kind. + pub fn then_kind_where(mut self, predicate: F) -> Self + where + F: Fn(&TokenKind) -> bool + Send + Sync + 'static, + { + self.exprs + .push(Box::new(move |tok: &Token, _source: &[char]| { + predicate(&tok.kind) + })); + self + } + /// Match a token of a given kind which is not in the list of words. pub fn then_kind_except(self, pred_is: F, ex: &'static [&'static str]) -> Self where diff --git a/harper-core/src/linting/lint_group.rs b/harper-core/src/linting/lint_group.rs index 506b2bff0..d82dbebe8 100644 --- a/harper-core/src/linting/lint_group.rs +++ b/harper-core/src/linting/lint_group.rs @@ -183,9 +183,11 @@ use super::way_too_adjective::WayTooAdjective; use super::well_educated::WellEducated; use super::whereas::Whereas; use super::widely_accepted::WidelyAccepted; +use super::will_non_lemma::WillNonLemma; use super::win_prize::WinPrize; use super::wordpress_dotcom::WordPressDotcom; use super::would_never_have::WouldNeverHave; + use super::{ExprLinter, Lint}; use super::{HtmlDescriptionLinter, Linter}; use crate::linting::dashes::Dashes; @@ -467,12 +469,12 @@ impl LintGroup { )); out.merge_from(&mut closed_compounds::lint_group()); out.merge_from(&mut initialisms::lint_group()); - // out.merge_from(&mut update_place_names::lint_group()); // Add all the more complex rules to the group. // Please maintain alphabetical order. // On *nix you can maintain sort order with `sort -t'(' -k2` insert_expr_rule!(APart, true); + insert_expr_rule!(AWhile, true); insert_expr_rule!(Addicting, true); insert_expr_rule!(AdjectiveDoubleDegree, true); insert_struct_rule!(AdjectiveOfA, true); @@ -498,9 +500,6 @@ impl LintGroup { insert_expr_rule!(CautionaryTale, true); insert_expr_rule!(ChangeTack, true); insert_expr_rule!(ChockFull, true); - insert_expr_rule!(AWhile, true); - insert_struct_rule!(SubjectPronoun, true); - insert_struct_rule!(FindFine, true); insert_struct_rule!(CommaFixes, true); insert_struct_rule!(CompoundNouns, true); insert_expr_rule!(CompoundSubjectI, true); @@ -524,6 +523,7 @@ impl LintGroup { insert_expr_rule!(FeelFell, true); insert_expr_rule!(FewUnitsOfTimeAgo, true); insert_expr_rule!(FillerWords, true); + insert_struct_rule!(FindFine, true); insert_expr_rule!(FirstAidKit, true); insert_expr_rule!(ForNoun, true); insert_expr_rule!(FreePredicate, true); @@ -611,6 +611,7 @@ impl LintGroup { insert_struct_rule!(Spaces, true); insert_struct_rule!(SpelledNumbers, false); insert_expr_rule!(SplitWords, true); + insert_struct_rule!(SubjectPronoun, true); insert_expr_rule!(ThatThan, true); insert_expr_rule!(ThatWhich, true); insert_expr_rule!(TheHowWhy, true); @@ -636,6 +637,7 @@ impl LintGroup { insert_expr_rule!(WellEducated, true); insert_expr_rule!(Whereas, true); insert_expr_rule!(WidelyAccepted, true); + insert_expr_rule!(WillNonLemma, true); insert_expr_rule!(WinPrize, true); insert_struct_rule!(WordPressDotcom, true); insert_expr_rule!(WouldNeverHave, true); diff --git a/harper-core/src/linting/mod.rs b/harper-core/src/linting/mod.rs index 52347c41b..53ed56e13 100644 --- a/harper-core/src/linting/mod.rs +++ b/harper-core/src/linting/mod.rs @@ -195,6 +195,7 @@ mod way_too_adjective; mod well_educated; mod whereas; mod widely_accepted; +mod will_non_lemma; mod win_prize; mod wordpress_dotcom; mod would_never_have; diff --git a/harper-core/src/linting/will_non_lemma.rs b/harper-core/src/linting/will_non_lemma.rs new file mode 100644 index 000000000..5612b5fd0 --- /dev/null +++ b/harper-core/src/linting/will_non_lemma.rs @@ -0,0 +1,145 @@ +use crate::Token; +use crate::TokenStringExt; +use crate::expr::{Expr, SequenceExpr}; +use crate::linting::expr_linter::Chunk; +use crate::linting::{ExprLinter, Lint, LintKind, Suggestion}; + +pub struct WillNonLemma { + expr: Box, +} + +impl Default for WillNonLemma { + fn default() -> Self { + Self { + expr: Box::new( + SequenceExpr::word_set(&["will", "shall"]) + .t_ws() + .then_kind_where(|kind| { + kind.is_verb() && !kind.is_verb_lemma() && !kind.is_noun() + }), + ), + } + } +} + +impl ExprLinter for WillNonLemma { + type Unit = Chunk; + + fn expr(&self) -> &dyn Expr { + self.expr.as_ref() + } + + fn match_to_lint_with_context( + &self, + toks: &[Token], + src: &[char], + ctx: Option<(&[Token], &[Token])>, + ) -> Option { + let (pre, post) = match ctx { + Some((prev, next)) => ( + prev.iter() + .map(|t| t.span.get_content_string(src)) + .collect::(), + next.iter() + .map(|t| t.span.get_content_string(src)) + .collect::(), + ), + None => (String::new(), String::new()), + }; + + eprintln!( + "❤️ \x1b[31m{}\x1b[0m{}\x1b[32m{}\x1b[0m", + pre, + toks.span()?.get_content_string(src), + post + ); + + Some(Lint { + span: toks.span()?, + lint_kind: LintKind::Grammar, + message: "`Will` should be followed by a verb in its lemma form.".to_string(), + ..Default::default() + }) + } + + fn description(&self) -> &str { + "Flags wrong verb forms after `will` or `shall`" + } +} + +#[cfg(test)] +mod tests { + use super::WillNonLemma; + use crate::linting::tests::{ + assert_good_and_bad_suggestions, assert_lint_count, assert_suggestion_result, + }; + + #[test] + fn fix_will_ran() { + assert_good_and_bad_suggestions( + "The brown fox will ran thru the meadow.", + WillNonLemma::default(), + &[ + "The brown fox will run thru the meadow.", + "The brown fox ran thru the meadow.", + ], + &[], + ); + } + + #[test] + fn fix_will_exists() { + assert_good_and_bad_suggestions( + "there is a good chance duplicate Rule IDs will exists.", + WillNonLemma::default(), + &[ + "there is a good chance duplicate Rule IDs will exist.", + "there is a good chance duplicate Rule IDs exists.", + "there is a good chance duplicate Rule IDs exist.", + ], + &[], + ); + } + + #[test] + fn ignore_shall_vessels() { + assert_lint_count( + "No Preference shall be given by any Regulation of Commerce or Revenue to the Ports of one State over those of another; nor shall Vessels bound to, or from, one State, be obliged to enter, clear, or pay Duties in another.", + WillNonLemma::default(), + 0, + ); + } + + #[test] + fn ignore_will_tools() { + assert_lint_count("Give your AI free will tools.", WillNonLemma::default(), 0); + } + + #[test] + fn fix_will_coming_soon() { + assert_good_and_bad_suggestions( + "More advanced features will coming soon, so stay tuned!", + WillNonLemma::default(), + &[], + &[ + "More advanced features will come soon, so stay tuned!", + "More advanced features coming soon, so stay tuned!", + "More advanced features will be coming soon, so stay tuned!", + ], + ); + } + + // on CPU and GPU (NPU support will coming next) + fn fix_will_coming_next() { + assert_good_and_bad_suggestions( + "on CPU and GPU (NPU support will coming next)", + WillNonLemma::default(), + &[ + "on CPU and GPU (NPU support will come next)", + "on CPU and GPU (NPU support coming next)", + "on CPU and GPU (NPU support will be coming next)", + ], + &[], + ); + } +} From b4e7f1c1c859cec95e67f545be4b9a5c2b882e7f Mon Sep 17 00:00:00 2001 From: hippietrail Date: Sun, 30 Nov 2025 18:52:27 +0800 Subject: [PATCH 2/6] fix: appease clippy --- harper-core/src/linting/will_non_lemma.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/harper-core/src/linting/will_non_lemma.rs b/harper-core/src/linting/will_non_lemma.rs index 5612b5fd0..6b6ddd2c4 100644 --- a/harper-core/src/linting/will_non_lemma.rs +++ b/harper-core/src/linting/will_non_lemma.rs @@ -2,7 +2,7 @@ use crate::Token; use crate::TokenStringExt; use crate::expr::{Expr, SequenceExpr}; use crate::linting::expr_linter::Chunk; -use crate::linting::{ExprLinter, Lint, LintKind, Suggestion}; +use crate::linting::{ExprLinter, Lint, LintKind}; pub struct WillNonLemma { expr: Box, From 6e03402929ee2726035b64b39d4590d16b21e12c Mon Sep 17 00:00:00 2001 From: hippietrail Date: Sun, 30 Nov 2025 20:02:19 +0800 Subject: [PATCH 3/6] fix: appease clippy a little bit more --- harper-core/src/linting/will_non_lemma.rs | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/harper-core/src/linting/will_non_lemma.rs b/harper-core/src/linting/will_non_lemma.rs index 6b6ddd2c4..e425267d2 100644 --- a/harper-core/src/linting/will_non_lemma.rs +++ b/harper-core/src/linting/will_non_lemma.rs @@ -37,12 +37,16 @@ impl ExprLinter for WillNonLemma { ) -> Option { let (pre, post) = match ctx { Some((prev, next)) => ( - prev.iter() - .map(|t| t.span.get_content_string(src)) - .collect::(), - next.iter() - .map(|t| t.span.get_content_string(src)) - .collect::(), + if let Some(s) = prev.span() { + s.get_content_string(src) + } else { + String::new() + }, + if let Some(n) = next.span() { + n.get_content_string(src) + } else { + String::new() + }, ), None => (String::new(), String::new()), }; @@ -70,9 +74,7 @@ impl ExprLinter for WillNonLemma { #[cfg(test)] mod tests { use super::WillNonLemma; - use crate::linting::tests::{ - assert_good_and_bad_suggestions, assert_lint_count, assert_suggestion_result, - }; + use crate::linting::tests::{assert_good_and_bad_suggestions, assert_lint_count}; #[test] fn fix_will_ran() { @@ -129,7 +131,7 @@ mod tests { ); } - // on CPU and GPU (NPU support will coming next) + #[test] fn fix_will_coming_next() { assert_good_and_bad_suggestions( "on CPU and GPU (NPU support will coming next)", From 3ccd580694f8f1bdc967fb685d2380053515a743 Mon Sep 17 00:00:00 2001 From: hippietrail Date: Mon, 1 Dec 2025 03:55:54 +0800 Subject: [PATCH 4/6] refactor: all tests passing on my collection of examples --- harper-core/dictionary.dict | 2 +- harper-core/src/linting/lint_group.rs | 4 +- harper-core/src/linting/mod.rs | 2 - harper-core/src/linting/will_non_lemma.rs | 177 ++++++++++++++---- .../Alice's Adventures in Wonderland.md | 60 +++--- .../tests/text/tagged/Difficult sentences.md | 8 +- .../tests/text/tagged/The Great Gatsby.md | 52 ++--- 7 files changed, 206 insertions(+), 99 deletions(-) diff --git a/harper-core/dictionary.dict b/harper-core/dictionary.dict index 43e458ec7..458c3210b 100644 --- a/harper-core/dictionary.dict +++ b/harper-core/dictionary.dict @@ -41084,7 +41084,7 @@ ramrod/NSgV ramrodded/VtT ramrodding/V6 ramshackle/JV -ran/~VtNr +ran/~Vtr # removed nautical noun sense ranch/~NgSVd>GZ rancher/~Ng ranching/~VNg diff --git a/harper-core/src/linting/lint_group.rs b/harper-core/src/linting/lint_group.rs index d82dbebe8..1be5cc2c1 100644 --- a/harper-core/src/linting/lint_group.rs +++ b/harper-core/src/linting/lint_group.rs @@ -637,7 +637,6 @@ impl LintGroup { insert_expr_rule!(WellEducated, true); insert_expr_rule!(Whereas, true); insert_expr_rule!(WidelyAccepted, true); - insert_expr_rule!(WillNonLemma, true); insert_expr_rule!(WinPrize, true); insert_struct_rule!(WordPressDotcom, true); insert_expr_rule!(WouldNeverHave, true); @@ -672,6 +671,9 @@ impl LintGroup { out.add("MassPlurals", MassPlurals::new(dictionary.clone())); out.config.set_rule_enabled("MassPlurals", true); + out.add("WillNonLemma", WillNonLemma::new(dictionary.clone())); + out.config.set_rule_enabled("WillNonLemma", true); + out } diff --git a/harper-core/src/linting/mod.rs b/harper-core/src/linting/mod.rs index 53ed56e13..8d11f8e8d 100644 --- a/harper-core/src/linting/mod.rs +++ b/harper-core/src/linting/mod.rs @@ -427,7 +427,6 @@ pub mod tests { if !found_bad.is_empty() || !unseen_good.is_empty() { eprintln!("\n=== Test Summary ==="); - // In the summary section, change these loops: if !found_bad.is_empty() { eprintln!("\n❌ Found {} bad suggestions:", found_bad.len()); for (i, j, text) in &found_bad { @@ -435,7 +434,6 @@ pub mod tests { } } - // And for the good suggestions: if !unseen_good.is_empty() { eprintln!( "\n❌ Missing {} expected good suggestions:", diff --git a/harper-core/src/linting/will_non_lemma.rs b/harper-core/src/linting/will_non_lemma.rs index e425267d2..56bc959d4 100644 --- a/harper-core/src/linting/will_non_lemma.rs +++ b/harper-core/src/linting/will_non_lemma.rs @@ -1,28 +1,52 @@ -use crate::Token; -use crate::TokenStringExt; +use hashbrown::HashMap; + use crate::expr::{Expr, SequenceExpr}; use crate::linting::expr_linter::Chunk; -use crate::linting::{ExprLinter, Lint, LintKind}; +use crate::linting::{ExprLinter, LintKind, Suggestion}; +use crate::spell::Dictionary; +use crate::{Lint, Token, TokenStringExt}; + +/// Maps irregular simple past verb forms to their lemma forms +const IRREGULAR_VERBS: &[(&str, &str)] = &[ + ("ate", "eat"), + ("ran", "run"), + // Add more irregular verbs here as needed +]; + +lazy_static::lazy_static! { + static ref IRREGULAR_VERB_MAP: HashMap<&'static str, &'static str> = + IRREGULAR_VERBS.iter().copied().collect(); +} -pub struct WillNonLemma { +pub struct WillNonLemma +where + D: Dictionary, +{ expr: Box, + dict: D, } -impl Default for WillNonLemma { - fn default() -> Self { +impl WillNonLemma +where + D: Dictionary, +{ + pub fn new(dict: D) -> Self { Self { expr: Box::new( SequenceExpr::word_set(&["will", "shall"]) .t_ws() .then_kind_where(|kind| { - kind.is_verb() && !kind.is_verb_lemma() && !kind.is_noun() + kind.is_verb() + && !kind.is_verb_lemma() + && (!kind.is_noun() || kind.is_verb_progressive_form()) }), ), + dict, } } } -impl ExprLinter for WillNonLemma { +impl ExprLinter for WillNonLemma { type Unit = Chunk; fn expr(&self) -> &dyn Expr { @@ -35,33 +59,105 @@ impl ExprLinter for WillNonLemma { src: &[char], ctx: Option<(&[Token], &[Token])>, ) -> Option { - let (pre, post) = match ctx { - Some((prev, next)) => ( - if let Some(s) = prev.span() { - s.get_content_string(src) - } else { - String::new() - }, - if let Some(n) = next.span() { - n.get_content_string(src) + let matched_chars = toks.span()?.get_content(src); + + // 'modal' is the 3rd last token, verb is the last token + let verb_idx = toks.len() - 1; + let verb_tok = &toks[verb_idx]; + let verb_str = verb_tok.span.get_content_string(src); + + let suggest = + |text: &str| Suggestion::replace_with_match_case(text.chars().collect(), matched_chars); + + let maybe_prev_word_tok: Option<&Token> = match ctx { + Some((prev, _)) if prev.len() >= 2 => { + let last = &prev[prev.len() - 1]; + let potential_word = &prev[prev.len() - 2]; + if last.kind.is_whitespace() && potential_word.kind.is_word() { + Some(potential_word) } else { - String::new() - }, - ), - None => (String::new(), String::new()), + None + } + } + _ => None, }; - eprintln!( - "❤️ \x1b[31m{}\x1b[0m{}\x1b[32m{}\x1b[0m", - pre, - toks.span()?.get_content_string(src), - post - ); + let mut suggestions = vec![]; + + if verb_tok.kind.is_verb_simple_past_form() + && let Some(&lemma) = IRREGULAR_VERB_MAP.get(verb_str.as_str()) + && self + .dict + .get_word_metadata_str(lemma) + .is_some_and(|m| m.is_verb_lemma()) + { + suggestions.push(suggest(&format!("will {}", lemma))); + suggestions.push(suggest(&verb_str)); + } + if verb_tok.kind.is_verb_third_person_singular_present_form() { + let candidate = &verb_str[..verb_str.len() - 1]; + if self + .dict + .get_word_metadata_str(candidate) + .is_some_and(|m| m.is_verb_lemma()) + { + suggestions.push(suggest(&format!("will {}", candidate))); + suggestions.push(suggest(&verb_str)); + + // Add suggestion for plural nouns + if maybe_prev_word_tok.is_some_and(|tok| tok.kind.is_plural_nominal()) { + suggestions.push(suggest(candidate)); + } + } + } + if verb_tok.kind.is_verb_progressive_form() { + if let Some(stem) = verb_str.strip_suffix("ing") { + // Check regular form (e.g., 'walking' -> 'walk') + if self + .dict + .get_word_metadata_str(stem) + .is_some_and(|m| m.is_verb_lemma()) + { + suggestions.push(Suggestion::replace_with_match_case( + format!("will {}", stem).chars().collect(), + matched_chars, + )); + } + + // Check form that adds 'e' (e.g., 'coming' -> 'come') + let stem_with_e = format!("{}e", stem); + if self + .dict + .get_word_metadata_str(&stem_with_e) + .is_some_and(|m| m.is_verb_lemma()) + { + suggestions.push(Suggestion::replace_with_match_case( + format!("will {}", stem_with_e).chars().collect(), + matched_chars, + )); + } + } + + let v_ing = Suggestion::replace_with_match_case( + verb_tok.span.get_content(src).to_vec(), + toks.span()?.get_content(src), + ); + suggestions.push(v_ing); + let will_be_v_ing = Suggestion::replace_with_match_case( + format!("will be {}", verb_str) + .chars() + .collect::>(), + toks.span()?.get_content(src), + ); + suggestions.push(will_be_v_ing); + } Some(Lint { span: toks.span()?, lint_kind: LintKind::Grammar, - message: "`Will` should be followed by a verb in its lemma form.".to_string(), + suggestions, + message: "`Will` and `shall` should be followed by a verb in its base form." + .to_string(), ..Default::default() }) } @@ -75,12 +171,14 @@ impl ExprLinter for WillNonLemma { mod tests { use super::WillNonLemma; use crate::linting::tests::{assert_good_and_bad_suggestions, assert_lint_count}; + use crate::spell::FstDictionary; #[test] fn fix_will_ran() { + // singular + will + irregular preterite assert_good_and_bad_suggestions( "The brown fox will ran thru the meadow.", - WillNonLemma::default(), + WillNonLemma::new(FstDictionary::curated()), &[ "The brown fox will run thru the meadow.", "The brown fox ran thru the meadow.", @@ -91,9 +189,10 @@ mod tests { #[test] fn fix_will_exists() { + // plural + will + 3rd person singular present assert_good_and_bad_suggestions( "there is a good chance duplicate Rule IDs will exists.", - WillNonLemma::default(), + WillNonLemma::new(FstDictionary::curated()), &[ "there is a good chance duplicate Rule IDs will exist.", "there is a good chance duplicate Rule IDs exists.", @@ -105,37 +204,45 @@ mod tests { #[test] fn ignore_shall_vessels() { + // "nor" + shall + (3rd person singular present == plural noun) assert_lint_count( "No Preference shall be given by any Regulation of Commerce or Revenue to the Ports of one State over those of another; nor shall Vessels bound to, or from, one State, be obliged to enter, clear, or pay Duties in another.", - WillNonLemma::default(), + WillNonLemma::new(FstDictionary::curated()), 0, ); } #[test] fn ignore_will_tools() { - assert_lint_count("Give your AI free will tools.", WillNonLemma::default(), 0); + // "free will" + (3rd person singular present == plural noun) + assert_lint_count( + "Give your AI free will tools.", + WillNonLemma::new(FstDictionary::curated()), + 0, + ); } #[test] fn fix_will_coming_soon() { + // plural + will + progressive assert_good_and_bad_suggestions( "More advanced features will coming soon, so stay tuned!", - WillNonLemma::default(), - &[], + WillNonLemma::new(FstDictionary::curated()), &[ "More advanced features will come soon, so stay tuned!", "More advanced features coming soon, so stay tuned!", "More advanced features will be coming soon, so stay tuned!", ], + &[], ); } #[test] fn fix_will_coming_next() { + // singular + will + progressive assert_good_and_bad_suggestions( "on CPU and GPU (NPU support will coming next)", - WillNonLemma::default(), + WillNonLemma::new(FstDictionary::curated()), &[ "on CPU and GPU (NPU support will come next)", "on CPU and GPU (NPU support coming next)", diff --git a/harper-core/tests/text/tagged/Alice's Adventures in Wonderland.md b/harper-core/tests/text/tagged/Alice's Adventures in Wonderland.md index 0ad2f112f..a77412287 100644 --- a/harper-core/tests/text/tagged/Alice's Adventures in Wonderland.md +++ b/harper-core/tests/text/tagged/Alice's Adventures in Wonderland.md @@ -30,8 +30,8 @@ # VB ISg/D$+ NSg/I/VB J/R NSg/J VB/C NSg/J . . I/C D NSg/VB P Nᴹ/Vg/J D/P+ > daisy - chain would be worth the trouble of getting up and picking the daisies , # NPr+ . N🅪Sg/VB+ VXB NSg/VXB NSg/VB/J D N🅪Sg/VB P NSg/Vg NSg/VB/J/P VB/C Nᴹ/Vg/J D NPl . -> when suddenly a White Rabbit with pink eyes ran close by her . -# NSg/I/C R D/P NPr🅪Sg/VB/J NSg/VB+ P N🅪Sg/VB/J NPl/V3+ NSg/VPt NSg/VB/J NSg/J/P ISg/D$+ . +> when suddenly a White Rabbit with pink eyes ran close by her . +# NSg/I/C R D/P NPr🅪Sg/VB/J NSg/VB+ P N🅪Sg/VB/J NPl/V3+ VPt NSg/VB/J NSg/J/P ISg/D$+ . > # > There was nothing so very remarkable in that ; nor did Alice think it so very @@ -50,8 +50,8 @@ # NSg/P ISg/D$+ NSg/VB+ NSg/I/C/Ddem+ ISg+ VB R C/P NSg/VPp D/P NSg/VB+ P I/C D/P > waistcoat - pocket , or a watch to take out of it , and burning with curiosity , she # NSg . NSg/VB/J+ . NPr/C D/P NSg/VB P NSg/VB NSg/VB/J/R/P P NPr/ISg+ . VB/C Nᴹ/Vg/J P NSg+ . ISg+ -> ran across the field after it , and fortunately was just in time to see it pop -# NSg/VPt NSg/P D NSg/VB+ P NPr/ISg+ . VB/C R VPt J NPr/J/R/P N🅪Sg/VB/J+ P NSg/VB NPr/ISg+ N🅪Sg/VB/J+ +> ran across the field after it , and fortunately was just in time to see it pop +# VPt NSg/P D NSg/VB+ P NPr/ISg+ . VB/C R VPt J NPr/J/R/P N🅪Sg/VB/J+ P NSg/VB NPr/ISg+ N🅪Sg/VB/J+ > down a large rabbit - hole under the hedge . # N🅪Sg/VB/J/P D/P NSg/J NSg/VB+ . NSg/VB+ NSg/J/P D NSg/VB+ . > @@ -558,8 +558,8 @@ # . NSg/I/C/Ddem+ VPt D/P NSg/VB/J N🅪Sg/VB . . VP/J NPr+ . D/P+ NPr/VB/J+ NSg/VB/J+ VP/J NSg/P D+ NSg/J+ > change , but very glad to find herself still in existence ; “ and now for the # N🅪Sg/VB+ . NSg/C/P J/R NSg/VB/J P NSg/VB ISg+ NSg/VB/J NPr/J/R/P NSg+ . . VB/C NSg/J/R/C R/C/P D+ -> garden ! ” and she ran with all speed back to the little door : but , alas ! the -# NSg/VB/J+ . . VB/C ISg+ NSg/VPt P NSg/I/J/C/Dq+ N🅪Sg/VB+ NSg/VB/J P D+ NPr/I/J/Dq+ NSg/VB+ . NSg/C/P . NPl . D+ +> garden ! ” and she ran with all speed back to the little door : but , alas ! the +# NSg/VB/J+ . . VB/C ISg+ VPt P NSg/I/J/C/Dq+ N🅪Sg/VB+ NSg/VB/J P D+ NPr/I/J/Dq+ NSg/VB+ . NSg/C/P . NPl . D+ > little door was shut again , and the little golden key was lying on the glass # NPr/I/J/Dq+ NSg/VB+ VPt NSg/VBP/J P . VB/C D+ NPr/I/J/Dq+ NPr/VB/J+ NPr/VB/J+ VPt Nᴹ/Vg/J J/P D+ NPr🅪Sg/VB+ > table as before , “ and things are worse than ever , ” thought the poor child , “ for @@ -1144,14 +1144,14 @@ # ISg/D$+ NPr/J/R/P D/P+ VB/J+ N🅪Sg/I/VB+ . . NSg/VB . NPr+ NPr/J+ . NSg/I+ VB ISgPl+ Nᴹ/Vg/J NSg/VB/J/R/P NSg/J/R . NSg/VBPp NSg/VB/J+ I/Ddem+ > moment , and fetch me a pair of gloves and a fan ! Quick , now ! ” And Alice was so # NSg+ . VB/C NSg/VB NPr/ISg+ D/P NSg/VB P NPl/V3 VB/C D/P+ NSg/VB+ . NSg/VB/J . NSg/J/R/C . . VB/C NPr+ VPt NSg/I/J/R/C -> much frightened that she ran off at once in the direction it pointed to , without -# NSg/I/J/R/Dq VP/J NSg/I/C/Ddem ISg+ NSg/VPt NSg/VB/J/P NSg/P NSg/C NPr/J/R/P D+ N🅪Sg+ NPr/ISg+ VP/J P . C/P +> much frightened that she ran off at once in the direction it pointed to , without +# NSg/I/J/R/Dq VP/J NSg/I/C/Ddem ISg+ VPt NSg/VB/J/P NSg/P NSg/C NPr/J/R/P D+ N🅪Sg+ NPr/ISg+ VP/J P . C/P > trying to explain the mistake it had made . # Nᴹ/Vg/J P VB D+ NSg/VB+ NPr/ISg+ VB VB . > # -> “ He took me for his housemaid , ” she said to herself as she ran . “ How surprised -# . NPr/ISg+ VPt NPr/ISg+ R/C/P ISg/D$+ NSg/VB . . ISg+ VP/J P ISg+ NSg/R ISg+ NSg/VPt . . NSg/C VP/J +> “ He took me for his housemaid , ” she said to herself as she ran . “ How surprised +# . NPr/ISg+ VPt NPr/ISg+ R/C/P ISg/D$+ NSg/VB . . ISg+ VP/J P ISg+ NSg/R ISg+ VPt . . NSg/C VP/J > he’ll be when he finds out who I am ! But I’d better take him his fan and # K NSg/VXB NSg/I/C NPr/ISg+ NPl/V3 NSg/VB/J/R/P NPr/I+ ISg/#r+ NPr/VB/J . NSg/C/P K NSg/VXB/JC NSg/VB ISg+ ISg/D$+ NSg/VB VB/C > gloves — that is , if I can find them . ” As she said this , she came upon a neat @@ -1476,14 +1476,14 @@ # NSg/I/J/R/C ISg+ VP/J NSg/I/J P D+ NPl/V3+ . VB/C VPt VP/J P NSg/VB NSg/I/C/Ddem ISg+ VPt > shrinking directly . As soon as she was small enough to get through the door , she # Nᴹ/Vg/J R/C . NSg/R J/R NSg/R ISg+ VPt NPr/VB/J NSg/I P NSg/VB NSg/J/P D+ NSg/VB+ . ISg+ -> ran out of the house , and found quite a crowd of little animals and birds -# NSg/VPt NSg/VB/J/R/P P D+ NPr/VB+ . VB/C NSg/VB R D/P NSg/VB P NPr/I/J/Dq NPl VB/C NPl/V3+ +> ran out of the house , and found quite a crowd of little animals and birds +# VPt NSg/VB/J/R/P P D+ NPr/VB+ . VB/C NSg/VB R D/P NSg/VB P NPr/I/J/Dq NPl VB/C NPl/V3+ > waiting outside . The poor little Lizard , Bill , was in the middle , being held up # Nᴹ/Vg/J Nᴹ/VB/J/P . D NSg/VB/J NPr/I/J/Dq NSg . NPr/VB+ . VPt NPr/J/R/P D NSg/VB/J . N🅪Sg/Vg/J/C VB NSg/VB/J/P > by two guinea - pigs , who were giving it something out of a bottle . They all made # NSg/J/P NSg NPr+ . NPl/V3+ . NPr/I+ NSg/VPt Nᴹ/Vg/J NPr/ISg+ NSg/I/J+ NSg/VB/J/R/P P D/P NSg/VB+ . IPl+ NSg/I/J/C/Dq VB -> a rush at Alice the moment she appeared ; but she ran off as hard as she could , -# D/P NPr/VB/J+ NSg/P NPr+ D+ NSg+ ISg+ VP/J . NSg/C/P ISg+ NSg/VPt NSg/VB/J/P NSg/R N🅪Sg/J/R NSg/R ISg+ NSg/VXB . +> a rush at Alice the moment she appeared ; but she ran off as hard as she could , +# D/P NPr/VB/J+ NSg/P NPr+ D+ NSg+ ISg+ VP/J . NSg/C/P ISg+ VPt NSg/VB/J/P NSg/R N🅪Sg/J/R NSg/R ISg+ NSg/VXB . > and soon found herself safe in a thick wood . # VB/C J/R NSg/VB ISg+ NSg/VB/J NPr/J/R/P D/P+ NSg/VB/J+ NPr🅪Sg/VB/J+ . > @@ -1532,8 +1532,8 @@ # NSg/P D NSg/VB/J+ . VB/C VP/J NPr/VB/J+ NSg/J/P NPl/V3+ NPr/J/R/P ISg/D$+ NSg/VB+ P NSg/VB NSg/VB/J P NPr/ISg+ . NSg/J/C > Alice , thinking it was very like having a game of play with a cart - horse , and # NPr+ . Nᴹ/Vg/J NPr/ISg+ VPt J/R NSg/VB/J/C/P Nᴹ/Vg/J D/P NSg/VB/J+ P N🅪Sg/VB P D/P NSg/VB+ . NSg/VB+ . VB/C -> expecting every moment to be trampled under its feet , ran round the thistle -# Nᴹ/Vg/J Dq NSg+ P NSg/VXB VP/J NSg/J/P ISg/D$+ NPl+ . NSg/VPt NSg/VB/J/P D NSg +> expecting every moment to be trampled under its feet , ran round the thistle +# Nᴹ/Vg/J Dq NSg+ P NSg/VXB VP/J NSg/J/P ISg/D$+ NPl+ . VPt NSg/VB/J/P D NSg > again ; then the puppy began a series of short charges at the stick , running a # P . NSg/J/C D NSg/VB+ VPt D/P NSgPl P NPr/VB/J/P NPl/V3+ NSg/P D NSg/VB/J+ . Nᴹ/Vg/J/P D/P > very little way forwards each time and a long way back , and barking hoarsely all @@ -1546,8 +1546,8 @@ # > This seemed to Alice a good opportunity for making her escape ; so she set off at # I/Ddem VP/J P NPr+ D/P+ NPr/VB/J+ N🅪Sg+ R/C/P Nᴹ/Vg/J ISg/D$+ N🅪Sg/VB . NSg/I/J/R/C ISg+ NPr/VBP/J NSg/VB/J/P NSg/P -> once , and ran till she was quite tired and out of breath , and till the puppy’s -# NSg/C . VB/C NSg/VPt NSg/VB/C/P ISg+ VPt R VP/J VB/C NSg/VB/J/R/P P N🅪Sg/VB/J+ . VB/C NSg/VB/C/P D NSg$ +> once , and ran till she was quite tired and out of breath , and till the puppy’s +# NSg/C . VB/C VPt NSg/VB/C/P ISg+ VPt R VP/J VB/C NSg/VB/J/R/P P N🅪Sg/VB/J+ . VB/C NSg/VB/C/P D NSg$ > bark sounded quite faint in the distance . # N🅪Sg/VB+ VP/J R NSg/VB/J NPr/J/R/P D N🅪Sg/VB+ . > @@ -3538,8 +3538,8 @@ # . ISg/#r+ NSg/VB . . VP/J D+ NPr/VB/J+ . NPr/I+ VB NSg NSg/VPp Nᴹ/Vg/J D+ NPl/V3+ . . NSg/VB/J/P P > their heads ! ” and the procession moved on , three of the soldiers remaining # D$+ NPl/V3+ . . VB/C D+ NSg/VB+ VP/J J/P . NSg P D+ NPl/V3+ Nᴹ/Vg/J -> behind to execute the unfortunate gardeners , who ran to Alice for protection . -# NSg/J/P P VB D+ NSg/J+ NPl+ . NPr/I+ NSg/VPt P NPr R/C/P N🅪Sg+ . +> behind to execute the unfortunate gardeners , who ran to Alice for protection . +# NSg/J/P P VB D+ NSg/J+ NPl+ . NPr/I+ VPt P NPr R/C/P N🅪Sg+ . > # > “ You shan’t be beheaded ! ” said Alice , and she put them into a large flower - pot @@ -3726,8 +3726,8 @@ # J . R/C/P NSg/VB+ . K D NSg/VB/J K VP P NSg/VB/J NSg/J/P NSg/J/P Nᴹ/Vg/J J/P > at the other end of the ground — and I should have croqueted the Queen’s hedgehog # NSg/P D NSg/VB/J NSg/VB P D N🅪Sg/VB/J+ . VB/C ISg/#r+ VXB NSg/VXB VP/J D NSg$ NSg/VB+ -> just now , only it ran away when it saw mine coming ! ” -# J NSg/J/R/C . J/R/C NPr/ISg+ NSg/VPt VB/J NSg/I/C NPr/ISg+ NSg/VPt NSg/I/VB+ Nᴹ/Vg/J . . +> just now , only it ran away when it saw mine coming ! ” +# J NSg/J/R/C . J/R/C NPr/ISg+ VPt VB/J NSg/I/C NPr/ISg+ NSg/VPt NSg/I/VB+ Nᴹ/Vg/J . . > # > “ How do you like the Queen ? ” said the Cat in a low voice . @@ -3890,8 +3890,8 @@ # D NSg$ NPr/VB/J+ VPt Nᴹ/Vg/J VB/J D NSg+ NPr/ISg+ VPt VPp/J/P . VB/C . NSg/J/P D N🅪Sg/VB/J+ NPr/ISg+ VB > come back with the Duchess , it had entirely disappeared ; so the King and the # NSg/VBPp/P NSg/VB/J P D NSg/VB . NPr/ISg+ VB R VP/J . NSg/I/J/R/C D NPr/VB/J+ VB/C D -> executioner ran wildly up and down looking for it , while the rest of the party -# NSg NSg/VPt R NSg/VB/J/P VB/C N🅪Sg/VB/J/P Nᴹ/Vg/J R/C/P NPr/ISg+ . NSg/VB/C/P D NSg/VB P D NSg/VB/J+ +> executioner ran wildly up and down looking for it , while the rest of the party +# NSg VPt R NSg/VB/J/P VB/C N🅪Sg/VB/J/P Nᴹ/Vg/J R/C/P NPr/ISg+ . NSg/VB/C/P D NSg/VB P D NSg/VB/J+ > went back to the game . # NSg/VPt NSg/VB/J P D NSg/VB/J+ . > @@ -4996,10 +4996,10 @@ # C/P Nᴹ/Vg/J R/C/P D NSg/VB P D N🅪Sg+ . > # -> “ What trial is it ? ” Alice panted as she ran ; but the Gryphon only answered “ Come -# . NSg/I+ NSg/VB/J+ VL3 NPr/ISg+ . . NPr+ VP/J NSg/R ISg+ NSg/VPt . NSg/C/P D ? J/R/C VP/J . NSg/VBPp/P -> on ! ” and ran the faster , while more and more faintly came , carried on the breeze -# J/P . . VB/C NSg/VPt D NSg/JC . NSg/VB/C/P NPr/I/J/R/Dq VB/C NPr/I/J/R/Dq R NSg/VPt/P . VP/J J/P D+ NSg/VB+ +> “ What trial is it ? ” Alice panted as she ran ; but the Gryphon only answered “ Come +# . NSg/I+ NSg/VB/J+ VL3 NPr/ISg+ . . NPr+ VP/J NSg/R ISg+ VPt . NSg/C/P D ? J/R/C VP/J . NSg/VBPp/P +> on ! ” and ran the faster , while more and more faintly came , carried on the breeze +# J/P . . VB/C VPt D NSg/JC . NSg/VB/C/P NPr/I/J/R/Dq VB/C NPr/I/J/R/Dq R NSg/VPt/P . VP/J J/P D+ NSg/VB+ > that followed them , the melancholy words : — # NSg/I/C/Ddem+ VP/J NSg/IPl+ . D NSg/J NPl/V3+ . . > @@ -5908,8 +5908,8 @@ # NSg/VXB J NSg/VPp NPrᴹ/Vg/J J/P . VB/C NSg/I/C ISg+ VB VP/J . ISg/D$+ NSg/VB+ VP/J ISg/D$+ . > and said , “ It was a curious dream , dear , certainly : but now run in to your tea ; # VB/C VP/J . . NPr/ISg+ VPt D/P J NSg/VB/J . NSg/VB/J . R . NSg/C/P NSg/J/R/C NSg/VBPp NPr/J/R/P P D$+ N🅪Sg/VB+ . -> it’s getting late . ” So Alice got up and ran off , thinking while she ran , as well -# K NSg/Vg NSg/J . . NSg/I/J/R/C NPr+ VP NSg/VB/J/P VB/C NSg/VPt NSg/VB/J/P . Nᴹ/Vg/J NSg/VB/C/P ISg+ NSg/VPt . NSg/R NSg/VB/J/R +> it’s getting late . ” So Alice got up and ran off , thinking while she ran , as well +# K NSg/Vg NSg/J . . NSg/I/J/R/C NPr+ VP NSg/VB/J/P VB/C VPt NSg/VB/J/P . Nᴹ/Vg/J NSg/VB/C/P ISg+ VPt . NSg/R NSg/VB/J/R > she might , what a wonderful dream it had been . # ISg+ Nᴹ/VXB/J . NSg/I+ D/P+ J+ NSg/VB/J+ NPr/ISg+ VB NSg/VPp . > diff --git a/harper-core/tests/text/tagged/Difficult sentences.md b/harper-core/tests/text/tagged/Difficult sentences.md index 9d15fb9c7..1b06f9c86 100644 --- a/harper-core/tests/text/tagged/Difficult sentences.md +++ b/harper-core/tests/text/tagged/Difficult sentences.md @@ -96,8 +96,8 @@ # D NSg VL3 NSg/J/P D NSg/VB+ NSg/VB . > The stream runs by our back door . # D+ NSg/VB+ NPl/V3 NSg/J/P D$+ NSg/VB/J NSg/VB+ . -> He ran straight by me . -# NPr/ISg+ NSg/VPt NSg/VB/J/R NSg/J/P NPr/ISg+ . +> He ran straight by me . +# NPr/ISg+ VPt NSg/VB/J/R NSg/J/P NPr/ISg+ . > Be back by ten o'clock ! . # NSg/VXB NSg/VB/J NSg/J/P NSg R . . > We'll find someone by the end of March . @@ -466,8 +466,8 @@ # R D/P+ NSg/VB/J+ NPr/VB/J+ VP/J NPr/J/R/P . > Would you like that to take away or eat in ? # VXB ISgPl+ NSg/VB/J/C/P NSg/I/C/Ddem+ P NSg/VB VB/J NPr/C VB NPr/J/R/P . -> He ran to the edge of the swimming pool and dived in . -# NPr/ISg+ NSg/VPt P D NSg/VB P D+ NSg/VB NSg/VB+ VB/C VP/J NPr/J/R/P . +> He ran to the edge of the swimming pool and dived in . +# NPr/ISg+ VPt P D NSg/VB P D+ NSg/VB NSg/VB+ VB/C VP/J NPr/J/R/P . > They flew in from London last night . # IPl+ NSg/VPt/J NPr/J/R/P P NPr+ NSg/VB/J+ N🅪Sg/VB+ . > For six hours the tide flows in , then for another six hours it flows out . diff --git a/harper-core/tests/text/tagged/The Great Gatsby.md b/harper-core/tests/text/tagged/The Great Gatsby.md index e73916ac9..765ef10e6 100644 --- a/harper-core/tests/text/tagged/The Great Gatsby.md +++ b/harper-core/tests/text/tagged/The Great Gatsby.md @@ -146,8 +146,8 @@ # NSg NSg/P NSg D/P NSg/J+ . NSg/C/P NSg/P D NSg/VB/J NSg/VB/J+ D NSg/VB/J+ VP/J ISg+ P > Washington , and I went out to the country alone . I had a dog — at least I had him # NPr+ . VB/C ISg/#r+ NSg/VPt NSg/VB/J/R/P P D NSg/J+ J . ISg/#r+ VB D/P+ NSg/VB/J+ . NSg/P NSg/J/Dq ISg/#r+ VB ISg+ -> for a few days until he ran away — and an old Dodge and a Finnish woman , who made -# R/C/P D/P+ NSg/I/Dq+ NPl+ C/P NPr/ISg+ NSg/VPt VB/J . VB/C D/P NSg/J NPr/VB/J VB/C D/P+ NSg/J+ NSg/VB+ . NPr/I+ VB +> for a few days until he ran away — and an old Dodge and a Finnish woman , who made +# R/C/P D/P+ NSg/I/Dq+ NPl+ C/P NPr/ISg+ VPt VB/J . VB/C D/P NSg/J NPr/VB/J VB/C D/P+ NSg/J+ NSg/VB+ . NPr/I+ VB > my bed and cooked breakfast and muttered Finnish wisdom to herself over the # D$+ NSg/VBP/J+ VB/C VP/J N🅪Sg/VB+ VB/C VP/J NSg/J+ Nᴹ+ P ISg+ NSg/J/P D+ > electric stove . @@ -304,8 +304,8 @@ # NSg+ NSg/J+ NPl/V3+ I+ ISg/#r+ R VPt NSg/P NSg/I/J/C/Dq . D$+ NPr/VB+ VPt NSg/VB/J NPr/I/J/R/Dq VB/J > than I expected , a cheerful red - and - white Georgian Colonial mansion , overlooking # C/P ISg/#r+ NSg/VP/J . D/P J N🅪Sg/J . VB/C . NPr🅪Sg/VB/J NSg/J NSg/J+ NSg+ . Nᴹ/Vg/J -> the bay . The lawn started at the beach and ran toward the front door for a -# D NSg/VB/J+ . D+ NSg/VB+ VP/J NSg/P D+ NPr/VB+ VB/C NSg/VPt J/P D+ NSg/VB/J+ NSg/VB+ R/C/P D/P +> the bay . The lawn started at the beach and ran toward the front door for a +# D NSg/VB/J+ . D+ NSg/VB+ VP/J NSg/P D+ NPr/VB+ VB/C VPt J/P D+ NSg/VB/J+ NSg/VB+ R/C/P D/P > quarter of a mile , jumping over sun - dials and brick walks and burning # NSg/VB/J P D/P+ NSg+ . Nᴹ/Vg/J NSg/J/P NPr/VB+ . NPl/V3 VB/C N🅪Sg/VB/J+ NPl/V3 VB/C Nᴹ/Vg/J > gardens — finally when it reached the house drifting up the side in bright vines @@ -1256,8 +1256,8 @@ # R NPr/ISg+ VPt NSg/J NPr🅪Sg/VB J/P NSg+ NPl/V3+ VB/C NPr/J/R/P NSg/VB/J+ P NSg/J NPl/V3 . > where new red gaspumps sat out in pools of light , and when I reached my estate # NSg/R/C NSg/J N🅪Sg/J ? NSg/VP/J NSg/VB/J/R/P NPr/J/R/P NPl/V3 P N🅪Sg/VB/J+ . VB/C NSg/I/C ISg/#r+ VP/J D$+ NSg/VB/J+ -> at West Egg I ran the car under its shed and sat for a while on an abandoned -# NSg/P NPr/VB/J+ N🅪Sg/VB+ ISg/#r+ NSg/VPt D NSg+ NSg/J/P ISg/D$+ NSg/VP+ VB/C NSg/VP/J R/C/P D/P NSg/VB/C/P+ J/P D/P VP/J +> at West Egg I ran the car under its shed and sat for a while on an abandoned +# NSg/P NPr/VB/J+ N🅪Sg/VB+ ISg/#r+ VPt D NSg+ NSg/J/P ISg/D$+ NSg/VP+ VB/C NSg/VP/J R/C/P D/P NSg/VB/C/P+ J/P D/P VP/J > grass roller in the yard . The wind had blown off , leaving a loud , bright night , # NPr🅪Sg/VB+ NSg/VB NPr/J/R/P D NSg/VB+ . D+ N🅪Sg/VB+ VB VPp/J NSg/VB/J/P . Nᴹ/Vg/J D/P NSg/J . NPr/VB/J N🅪Sg/VB+ . > with wings beating in the trees and a persistent organ sound as the full bellows @@ -3718,8 +3718,8 @@ # NSg/VB/J NPr+ NPrPl P NPr+ . VB/C D ? VB/C D NPr ? . > Snell was there three days before he went to the penitentiary , so drunk out on # NPr VPt R+ NSg NPl+ C/P NPr/ISg+ NSg/VPt P D NSg/J+ . NSg/I/J/R/C NSg/VPp/J NSg/VB/J/R/P J/P -> the gravel drive that Mrs . Ulysses Swett’s automobile ran over his right hand . -# D Nᴹ/VB/J+ N🅪Sg/VB NSg/I/C/Ddem NPl+ . NPr+ ? NSg/VB/J NSg/VPt NSg/J/P ISg/D$+ NPr/VB/J NSg/VB+ . +> the gravel drive that Mrs . Ulysses Swett’s automobile ran over his right hand . +# D Nᴹ/VB/J+ N🅪Sg/VB NSg/I/C/Ddem NPl+ . NPr+ ? NSg/VB/J VPt NSg/J/P ISg/D$+ NPr/VB/J NSg/VB+ . > The Dancies came , too , and S. B. Whitebait , who was well over sixty , and Maurice # D ? NSg/VPt/P . R . VB/C ? ? NSg/VB . NPr/I+ VPt NSg/VB/J/R NSg/J/P NSg . VB/C NPr > A. Flink , and the Hammerheads , and Beluga the tobacco importer , and Beluga’s @@ -4008,8 +4008,8 @@ # . NSg$ D NSg/I/J P NPr+ . . > # -> To my astonishment , the thing had an authentic look . “ Orderi di Danilo , ” ran the -# P D$+ Nᴹ+ . D+ NSg+ VB D/P+ J+ NSg/VB+ . . ? NPr/#r+ ? . . NSg/VPt D +> To my astonishment , the thing had an authentic look . “ Orderi di Danilo , ” ran the +# P D$+ Nᴹ+ . D+ NSg+ VB D/P+ J+ NSg/VB+ . . ? NPr/#r+ ? . . VPt D > circular legend , “ Montenegro , Nicolas Rex . ” # NSg/VB/J N🅪Sg/VB+ . . NPr+ . NPrPl NPr . . > @@ -4742,8 +4742,8 @@ # Nᴹ/Vg/J NSg/P ISg+ P J+ N🅪Sg/VB/J+ . NPr/ISg+ VPt Nᴹ/Vg/J/P P NSg/VB NSg/IPl+ > together — it made you laugh in a hushed , fascinated way . That was in August . A # J . NPr/ISg+ VB ISgPl+ NSg/VB NPr/J/R/P D/P VP/J . VP/J NSg/J+ . NSg/I/C/Ddem+ VPt NPr/J/R/P NPr/VB/J+ . D/P+ -> week after I left Santa Barbara Tom ran into a wagon on the Ventura road one -# NSg/J+ P ISg/#r+ NPr/VB/J NPr+ NPr+ NPr/VB+ NSg/VPt P D/P+ NSg/VB+ J/P D ? N🅪Sg/J+ NSg/I/J +> week after I left Santa Barbara Tom ran into a wagon on the Ventura road one +# NSg/J+ P ISg/#r+ NPr/VB/J NPr+ NPr+ NPr/VB+ VPt P D/P+ NSg/VB+ J/P D ? N🅪Sg/J+ NSg/I/J > night , and ripped a front wheel off his car . The girl who was with him got into # N🅪Sg/VB+ . VB/C VP/J D/P NSg/VB/J+ NSg/VB+ NSg/VB/J/P ISg/D$+ NSg+ . D+ NSg/VB+ NPr/I+ VPt P ISg+ VP P > the papers , too , because her arm was broken — she was one of the chambermaids in @@ -5472,8 +5472,8 @@ # > I walked out the back way — just as Gatsby had when he had made his nervous # ISg/#r+ VP/J NSg/VB/J/R/P D+ NSg/VB/J+ NSg/J+ . J NSg/R NPr VB NSg/I/C NPr/ISg+ VB VB ISg/D$+ J -> circuit of the house half an hour before — and ran for a huge black knotted tree , -# NSg/VB P D NPr/VB+ N🅪Sg/VB/J/P+ D/P NSg+ C/P . VB/C NSg/VPt R/C/P D/P J N🅪Sg/VB/J VP/J NSg/VB+ . +> circuit of the house half an hour before — and ran for a huge black knotted tree , +# NSg/VB P D NPr/VB+ N🅪Sg/VB/J/P+ D/P NSg+ C/P . VB/C VPt R/C/P D/P J N🅪Sg/VB/J VP/J NSg/VB+ . > whose massed leaves made a fabric against the rain . Once more it was pouring , # I+ VP/J NPl/V3+ VB D/P N🅪Sg/VB+ C/P D N🅪Sg/VB+ . NSg/C NPr/I/J/R/Dq NPr/ISg+ VPt Nᴹ/Vg/J . > and my irregular lawn , well - shaved by Gatsby’s gardener , abounded in small muddy @@ -8778,8 +8778,8 @@ # NSg/I/J/R/C IPl+ NSg/VPt J/P J/P NPr🅪Sg+ NSg/J/P D+ Nᴹ/Vg/J+ Nᴹ/VB/J+ . > # -> The young Greek , Michaelis , who ran the coffee joint beside the ashheaps was the -# D NPr/VB/J NPr/VB/J . ? . NPr/I+ NSg/VPt D+ N🅪Sg/VB/J+ NSg/VB/J P D ? VPt D +> The young Greek , Michaelis , who ran the coffee joint beside the ashheaps was the +# D NPr/VB/J NPr/VB/J . ? . NPr/I+ VPt D+ N🅪Sg/VB/J+ NSg/VB/J P D ? VPt D > principal witness at the inquest . He had slept through the heat until after # NSg/J NSg/VB NSg/P D NSg/VB . NPr/ISg+ VB VP NSg/J/P D+ Nᴹ/VB+ C/P P > five , when he strolled over to the garage , and found George Wilson sick in his @@ -9002,8 +9002,8 @@ # . R VP/J . . VP/J NPr/VB+ . Nᴹ/Vg/J . > # -> “ She ran out ina road . Son - of - a - bitch didn’t even stopus car . ” -# . ISg+ NSg/VPt NSg/VB/J/R/P NPr N🅪Sg/J+ . NPr/VB+ . P . D/P . NSg/VB VB NSg/VB/J ? NSg+ . . +> “ She ran out ina road . Son - of - a - bitch didn’t even stopus car . ” +# . ISg+ VPt NSg/VB/J/R/P NPr N🅪Sg/J+ . NPr/VB+ . P . D/P . NSg/VB VB NSg/VB/J ? NSg+ . . > # > “ There was two cars , ” said Michaelis , “ one comin ’ , one goin ’ , see ? ” @@ -9016,8 +9016,8 @@ # > “ One goin ’ each way . Well , she ” — his hand rose toward the blankets but stopped # . NSg/I/J ? . Dq NSg/J+ . NSg/VB/J/R . ISg+ . . ISg/D$+ NSg/VB+ NPr/VPt/J J/P D+ NPl/V3+ NSg/C/P VB/J -> half way and fell to his side — “ she ran out there an ’ the one comin ’ from N’York -# N🅪Sg/VB/J/P+ NSg/J+ VB/C NSg/VPt/J P ISg/D$+ NSg/VB/J+ . . ISg+ NSg/VPt NSg/VB/J/R/P R+ D/P . D+ NSg/I/J+ ? . P ? +> half way and fell to his side — “ she ran out there an ’ the one comin ’ from N’York +# N🅪Sg/VB/J/P+ NSg/J+ VB/C NSg/VPt/J P ISg/D$+ NSg/VB/J+ . . ISg+ VPt NSg/VB/J/R/P R+ D/P . D+ NSg/I/J+ ? . P ? > knock right into her , goin ’ thirty or forty miles an hour . ” # NSg/VB NPr/VB/J P ISg/D$+ . ? . NSg NPr/C NSg/J NPrPl+ D/P NSg+ . . > @@ -9238,8 +9238,8 @@ # K NSg/VXB VP/J NSg/C K NSg/VB/J NPr/J/R/P . K VB NSg/I P NSg/I/J/C/Dq P NSg/IPl+ R/C/P NSg/I/J NPr🅪Sg+ . VB/C > suddenly that included Jordan too . She must have seen something of this in my # R NSg/I/C/Ddem+ VP/J NPr+ R . ISg+ NSg/VB NSg/VXB NSg/VPp NSg/I/J+ P I/Ddem+ NPr/J/R/P D$+ -> expression , for she turned abruptly away and ran up the porch steps into the -# N🅪Sg+ . R/C/P ISg+ VP/J R VB/J VB/C NSg/VPt NSg/VB/J/P D+ NSg+ NPl/V3+ P D+ +> expression , for she turned abruptly away and ran up the porch steps into the +# N🅪Sg+ . R/C/P ISg+ VP/J R VB/J VB/C VPt NSg/VB/J/P D+ NSg+ NPl/V3+ P D+ > house . I sat down for a few minutes with my head in my hands , until I heard the # NPr/VB+ . ISg/#r+ NSg/VP/J N🅪Sg/VB/J/P R/C/P D/P NSg/I/Dq+ NPl/V3+ P D$+ NPr/VB/J+ NPr/J/R/P D$+ NPl/V3+ . C/P ISg/#r+ VP/J D+ > phone taken up inside and the butler’s voice calling a taxi . Then I walked @@ -10248,8 +10248,8 @@ # . ISg/#r+ NSg/VB . . NPr/ISg+ VP/J R . . K NSg/I/J P I/Ddem Nᴹ/Vg/J NPl+ VB/C ISg/#r+ VB > think any harm to nobody , but when I get to know a thing I know it . It was the # NSg/VB I/R/Dq N🅪Sg/VB+ P NSg/I+ . NSg/C/P NSg/I/C ISg/#r+ NSg/VB P NSg/VB D/P NSg+ ISg/#r+ NSg/VB NPr/ISg+ . NPr/ISg+ VPt D -> man in that car . She ran out to speak to him and he wouldn’t stop . ” -# NPr/VB/J NPr/J/R/P NSg/I/C/Ddem+ NSg+ . ISg+ NSg/VPt NSg/VB/J/R/P P NSg/VB P ISg+ VB/C NPr/ISg+ VXB NSg/VB . . +> man in that car . She ran out to speak to him and he wouldn’t stop . ” +# NPr/VB/J NPr/J/R/P NSg/I/C/Ddem+ NSg+ . ISg+ VPt NSg/VB/J/R/P P NSg/VB P ISg+ VB/C NPr/ISg+ VXB NSg/VB . . > # > Michaelis had seen this too , but it hadn’t occurred to him that there was any @@ -11532,8 +11532,8 @@ # NPr/VB+ . . NPr/ISg+ NSg/VPt/J NSg/VB/J/P R . . NSg/I+ NSg/C ISg/#r+ VPt NPr/VB ISg+ . NSg/I/C/Ddem NSg/VB VB NPr/ISg+ > coming to him . He threw dust into your eyes just like he did in Daisy’s , but he # Nᴹ/Vg/J P ISg+ . NPr/ISg+ VB Nᴹ/VB+ P D$+ NPl/V3+ J NSg/VB/J/C/P NPr/ISg+ VPt NPr/J/R/P NSg$ . NSg/C/P NPr/ISg+ -> was a tough one . He ran over Myrtle like you’d run over a dog and never even -# VPt D/P NSg/VB/J NSg/I/J+ . NPr/ISg+ NSg/VPt NSg/J/P NPr NSg/VB/J/C/P K NSg/VBPp NSg/J/P D/P NSg/VB/J+ VB/C R NSg/VB/J +> was a tough one . He ran over Myrtle like you’d run over a dog and never even +# VPt D/P NSg/VB/J NSg/I/J+ . NPr/ISg+ VPt NSg/J/P NPr NSg/VB/J/C/P K NSg/VBPp NSg/J/P D/P NSg/VB/J+ VB/C R NSg/VB/J > stopped his car . ” # VB/J ISg/D$+ NSg+ . . > From edf37535a8384b5b56330f68e16891a8ed9401f1 Mon Sep 17 00:00:00 2001 From: hippietrail Date: Mon, 1 Dec 2025 12:56:09 +0800 Subject: [PATCH 5/6] chore: more irregular verbs --- harper-core/src/linting/will_non_lemma.rs | 32 ++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/harper-core/src/linting/will_non_lemma.rs b/harper-core/src/linting/will_non_lemma.rs index 56bc959d4..5e014d7d7 100644 --- a/harper-core/src/linting/will_non_lemma.rs +++ b/harper-core/src/linting/will_non_lemma.rs @@ -9,8 +9,38 @@ use crate::{Lint, Token, TokenStringExt}; /// Maps irregular simple past verb forms to their lemma forms const IRREGULAR_VERBS: &[(&str, &str)] = &[ ("ate", "eat"), + ("awoke", "awake"), + ("broke", "break"), + ("burnt", "burn"), + ("came", "come"), + ("did", "do"), + ("dove", "dive"), + ("drank", "drink"), + ("drove", "drive"), + ("flew", "fly"), + ("forwent", "forgo"), + ("froze", "freeze"), + ("got", "get"), + ("had", "have"), + ("hit", "hit"), + // ("hurt", "hurt"), + ("knew", "know"), + ("laid", "lay"), + ("lit", "light"), + ("lost", "lose"), + ("made", "make"), + ("mistook", "mistake"), ("ran", "run"), - // Add more irregular verbs here as needed + ("rode", "ride"), + ("rose", "rise"), + ("saw", "see"), + ("taught", "teach"), + ("threw", "throw"), + ("took", "take"), + ("tore", "tear"), + ("went", "go"), + ("wore", "wear"), + ("wrote", "write"), ]; lazy_static::lazy_static! { From d58c65718e59334fdc3281c6b1ee323df38ea822 Mon Sep 17 00:00:00 2001 From: hippietrail Date: Mon, 1 Dec 2025 19:55:59 +0800 Subject: [PATCH 6/6] chore: more irregular verbs --- harper-core/dictionary.dict | 16 ++++++++-------- .../linting/simple_past_to_past_participle.rs | 5 +++++ harper-core/src/linting/will_non_lemma.rs | 8 ++++++++ 3 files changed, 21 insertions(+), 8 deletions(-) diff --git a/harper-core/dictionary.dict b/harper-core/dictionary.dict index 458c3210b..f23b0dcd7 100644 --- a/harper-core/dictionary.dict +++ b/harper-core/dictionary.dict @@ -37129,24 +37129,24 @@ overproduction/Nmg overpromise/VSGd overprotect/VSdGv overqualified/J -overran/~V +overran/~Vt overrange/VGdSN overrate/VGdSN overreach/VGdSN overreact/VSdG overreaction/NwSg -overread/JVSdG +overread/JVbtTSG overrefined/VJ overregulate/VdSGn overreliance/Nmg overrepresent/VGdS overrepresentation/Nmg -overridden/V -override/~VGSNg +overridden/VT +override/~VbGSNg overripe/Jg -overrode/V +overrode/Vt overrule/VGdS -overrun/~VSNg +overrun/~VSNgbT overrunning/V6 oversampling/VNmg oversaturate/VdSGn @@ -40209,7 +40209,7 @@ pronouncement/NSg pronto/ pronunciation/~NgS proof/~NwSgJVdGr -proofread/V>GSNZ +proofread/VbtT>GSNZ proofreader/Ng prop/~NgSV propaganda/~Nmg @@ -44504,7 +44504,7 @@ sight/~NwgSVGdz sighting/~NgV6 sightless/J sightly/J^>U -sightread/Vb +sightread/VbtT sightseeing/~NgV6 sightseer/NgS sigma/~NgS diff --git a/harper-core/src/linting/simple_past_to_past_participle.rs b/harper-core/src/linting/simple_past_to_past_participle.rs index 7217f4c0c..8f6202a7b 100644 --- a/harper-core/src/linting/simple_past_to_past_participle.rs +++ b/harper-core/src/linting/simple_past_to_past_participle.rs @@ -41,7 +41,9 @@ const IRREGULAR_VERBS: &[(&str, &str)] = &[ ("led", "led"), ("mistook", "mistaken"), ("output", "output"), + ("overthrew", "overthrown"), ("overtook", "overtaken"), + ("overwrote", "overwritten"), ("paid", "paid"), ("partook", "partaken"), // proved, proved/proven @@ -49,6 +51,7 @@ const IRREGULAR_VERBS: &[(&str, &str)] = &[ ("ran", "run"), ("rang", "rung"), ("read", "read"), + ("redid", "redone"), ("reset", "reset"), ("rode", "ridden"), ("rose", "risen"), @@ -66,9 +69,11 @@ const IRREGULAR_VERBS: &[(&str, &str)] = &[ ("stood", "stood"), ("swam", "swum"), ("swore", "sworn"), + ("taught", "taught"), ("thought", "thought"), ("trod", "trodden"), ("took", "taken"), + ("undid", "undone"), // was, been // were, been ("went", "gone"), diff --git a/harper-core/src/linting/will_non_lemma.rs b/harper-core/src/linting/will_non_lemma.rs index 5e014d7d7..30588207f 100644 --- a/harper-core/src/linting/will_non_lemma.rs +++ b/harper-core/src/linting/will_non_lemma.rs @@ -30,14 +30,22 @@ const IRREGULAR_VERBS: &[(&str, &str)] = &[ ("lost", "lose"), ("made", "make"), ("mistook", "mistake"), + ("overthrew", "overthrow"), + ("overtook", "overtake"), + ("overwrote", "overwrite"), ("ran", "run"), + // ("read", "read"), + ("redid", "redo"), + // ("reread", "reread"), ("rode", "ride"), ("rose", "rise"), ("saw", "see"), ("taught", "teach"), + ("thought", "think"), ("threw", "throw"), ("took", "take"), ("tore", "tear"), + ("undid", "undo"), ("went", "go"), ("wore", "wear"), ("wrote", "write"),