diff --git a/harper-core/dictionary.dict b/harper-core/dictionary.dict index e41489da0..bd77c01f4 100644 --- a/harper-core/dictionary.dict +++ b/harper-core/dictionary.dict @@ -37189,24 +37189,24 @@ overproduction/Nmg overpromise/VSGd overprotect/VSdGv overqualified/J -overran/~V +overran/~Vt overrange/VGdSN overrate/VGdSN overreach/VGdSN overreact/VSdG overreaction/NwSg -overread/JVSdG +overread/JVbtTSG overrefined/VJ overregulate/VdSGn overreliance/Nmg overrepresent/VGdS overrepresentation/Nmg -overridden/V -override/~VGSNg +overridden/VT +override/~VbGSNg overripe/Jg -overrode/V +overrode/Vt overrule/VGdS -overrun/~VSNg +overrun/~VSNgbT overrunning/V6 oversampling/VNmg oversaturate/VdSGn @@ -40279,7 +40279,7 @@ pronouncement/NSg pronto/ pronunciation/~NgS proof/~NwSgJVdGr -proofread/V>GSNZ +proofread/VbtT>GSNZ proofreader/Ng prop/~NgSV propaganda/~Nmg @@ -41154,7 +41154,7 @@ ramrod/NSgV ramrodded/VtT ramrodding/V6 ramshackle/JV -ran/~VtNr +ran/~Vtr # removed nautical noun sense ranch/~NgSVd>GZ rancher/~Ng ranching/~VNg @@ -44579,7 +44579,7 @@ sight/~NwgSVGdz sighting/~NgV6 sightless/J sightly/J^>U -sightread/Vb +sightread/VbtT sightseeing/~NgV6 sightseer/NgS sigma/~NgS diff --git a/harper-core/src/linting/lint_group.rs b/harper-core/src/linting/lint_group.rs index c8d246b60..9190bf80d 100644 --- a/harper-core/src/linting/lint_group.rs +++ b/harper-core/src/linting/lint_group.rs @@ -206,10 +206,12 @@ use super::way_too_adjective::WayTooAdjective; use super::well_educated::WellEducated; use super::whereas::Whereas; use super::widely_accepted::WidelyAccepted; +use super::will_non_lemma::WillNonLemma; use super::win_prize::WinPrize; use super::wish_could::WishCould; use super::wordpress_dotcom::WordPressDotcom; use super::would_never_have::WouldNeverHave; + use super::{ExprLinter, Lint}; use super::{HtmlDescriptionLinter, Linter}; use crate::linting::dashes::Dashes; @@ -738,6 +740,9 @@ impl LintGroup { out.add("AnA", AnA::new(dialect)); out.config.set_rule_enabled("AnA", true); + out.add("WillNonLemma", WillNonLemma::new(dictionary.clone())); + out.config.set_rule_enabled("WillNonLemma", true); + out } diff --git a/harper-core/src/linting/mod.rs b/harper-core/src/linting/mod.rs index 2398ca2bb..900e5fc8b 100644 --- a/harper-core/src/linting/mod.rs +++ b/harper-core/src/linting/mod.rs @@ -218,6 +218,7 @@ mod weir_rules; mod well_educated; mod whereas; mod widely_accepted; +mod will_non_lemma; mod win_prize; mod wish_could; mod wordpress_dotcom; @@ -451,7 +452,6 @@ pub mod tests { if !found_bad.is_empty() || !unseen_good.is_empty() { eprintln!("\n=== Test Summary ==="); - // In the summary section, change these loops: if !found_bad.is_empty() { eprintln!("\n❌ Found {} bad suggestions:", found_bad.len()); for (i, j, text) in &found_bad { @@ -459,7 +459,6 @@ pub mod tests { } } - // And for the good suggestions: if !unseen_good.is_empty() { eprintln!( "\n❌ Missing {} expected good suggestions:", diff --git a/harper-core/src/linting/will_non_lemma.rs b/harper-core/src/linting/will_non_lemma.rs new file mode 100644 index 000000000..30588207f --- /dev/null +++ b/harper-core/src/linting/will_non_lemma.rs @@ -0,0 +1,292 @@ +use hashbrown::HashMap; + +use crate::expr::{Expr, SequenceExpr}; +use crate::linting::expr_linter::Chunk; +use crate::linting::{ExprLinter, LintKind, Suggestion}; +use crate::spell::Dictionary; +use crate::{Lint, Token, TokenStringExt}; + +/// Maps irregular simple past verb forms to their lemma forms +const IRREGULAR_VERBS: &[(&str, &str)] = &[ + ("ate", "eat"), + ("awoke", "awake"), + ("broke", "break"), + ("burnt", "burn"), + ("came", "come"), + ("did", "do"), + ("dove", "dive"), + ("drank", "drink"), + ("drove", "drive"), + ("flew", "fly"), + ("forwent", "forgo"), + ("froze", "freeze"), + ("got", "get"), + ("had", "have"), + ("hit", "hit"), + // ("hurt", "hurt"), + ("knew", "know"), + ("laid", "lay"), + ("lit", "light"), + ("lost", "lose"), + ("made", "make"), + ("mistook", "mistake"), + ("overthrew", "overthrow"), + ("overtook", "overtake"), + ("overwrote", "overwrite"), + ("ran", "run"), + // ("read", "read"), + ("redid", "redo"), + // ("reread", "reread"), + ("rode", "ride"), + ("rose", "rise"), + ("saw", "see"), + ("taught", "teach"), + ("thought", "think"), + ("threw", "throw"), + ("took", "take"), + ("tore", "tear"), + ("undid", "undo"), + ("went", "go"), + ("wore", "wear"), + ("wrote", "write"), +]; + +lazy_static::lazy_static! { + static ref IRREGULAR_VERB_MAP: HashMap<&'static str, &'static str> = + IRREGULAR_VERBS.iter().copied().collect(); +} + +pub struct WillNonLemma +where + D: Dictionary, +{ + expr: Box, + dict: D, +} + +impl WillNonLemma +where + D: Dictionary, +{ + pub fn new(dict: D) -> Self { + Self { + expr: Box::new( + SequenceExpr::word_set(&["will", "shall"]) + .t_ws() + .then_kind_where(|kind| { + kind.is_verb() + && !kind.is_verb_lemma() + && (!kind.is_noun() || kind.is_verb_progressive_form()) + }), + ), + dict, + } + } +} + +impl ExprLinter for WillNonLemma { + type Unit = Chunk; + + fn expr(&self) -> &dyn Expr { + self.expr.as_ref() + } + + fn match_to_lint_with_context( + &self, + toks: &[Token], + src: &[char], + ctx: Option<(&[Token], &[Token])>, + ) -> Option { + let matched_chars = toks.span()?.get_content(src); + + // 'modal' is the 3rd last token, verb is the last token + let verb_idx = toks.len() - 1; + let verb_tok = &toks[verb_idx]; + let verb_str = verb_tok.span.get_content_string(src); + + let suggest = + |text: &str| Suggestion::replace_with_match_case(text.chars().collect(), matched_chars); + + let maybe_prev_word_tok: Option<&Token> = match ctx { + Some((prev, _)) if prev.len() >= 2 => { + let last = &prev[prev.len() - 1]; + let potential_word = &prev[prev.len() - 2]; + if last.kind.is_whitespace() && potential_word.kind.is_word() { + Some(potential_word) + } else { + None + } + } + _ => None, + }; + + let mut suggestions = vec![]; + + if verb_tok.kind.is_verb_simple_past_form() + && let Some(&lemma) = IRREGULAR_VERB_MAP.get(verb_str.as_str()) + && self + .dict + .get_word_metadata_str(lemma) + .is_some_and(|m| m.is_verb_lemma()) + { + suggestions.push(suggest(&format!("will {}", lemma))); + suggestions.push(suggest(&verb_str)); + } + if verb_tok.kind.is_verb_third_person_singular_present_form() { + let candidate = &verb_str[..verb_str.len() - 1]; + if self + .dict + .get_word_metadata_str(candidate) + .is_some_and(|m| m.is_verb_lemma()) + { + suggestions.push(suggest(&format!("will {}", candidate))); + suggestions.push(suggest(&verb_str)); + + // Add suggestion for plural nouns + if maybe_prev_word_tok.is_some_and(|tok| tok.kind.is_plural_nominal()) { + suggestions.push(suggest(candidate)); + } + } + } + if verb_tok.kind.is_verb_progressive_form() { + if let Some(stem) = verb_str.strip_suffix("ing") { + // Check regular form (e.g., 'walking' -> 'walk') + if self + .dict + .get_word_metadata_str(stem) + .is_some_and(|m| m.is_verb_lemma()) + { + suggestions.push(Suggestion::replace_with_match_case( + format!("will {}", stem).chars().collect(), + matched_chars, + )); + } + + // Check form that adds 'e' (e.g., 'coming' -> 'come') + let stem_with_e = format!("{}e", stem); + if self + .dict + .get_word_metadata_str(&stem_with_e) + .is_some_and(|m| m.is_verb_lemma()) + { + suggestions.push(Suggestion::replace_with_match_case( + format!("will {}", stem_with_e).chars().collect(), + matched_chars, + )); + } + } + + let v_ing = Suggestion::replace_with_match_case( + verb_tok.span.get_content(src).to_vec(), + toks.span()?.get_content(src), + ); + suggestions.push(v_ing); + let will_be_v_ing = Suggestion::replace_with_match_case( + format!("will be {}", verb_str) + .chars() + .collect::>(), + toks.span()?.get_content(src), + ); + suggestions.push(will_be_v_ing); + } + + Some(Lint { + span: toks.span()?, + lint_kind: LintKind::Grammar, + suggestions, + message: "`Will` and `shall` should be followed by a verb in its base form." + .to_string(), + ..Default::default() + }) + } + + fn description(&self) -> &str { + "Flags wrong verb forms after `will` or `shall`" + } +} + +#[cfg(test)] +mod tests { + use super::WillNonLemma; + use crate::linting::tests::{assert_good_and_bad_suggestions, assert_lint_count}; + use crate::spell::FstDictionary; + + #[test] + fn fix_will_ran() { + // singular + will + irregular preterite + assert_good_and_bad_suggestions( + "The brown fox will ran thru the meadow.", + WillNonLemma::new(FstDictionary::curated()), + &[ + "The brown fox will run thru the meadow.", + "The brown fox ran thru the meadow.", + ], + &[], + ); + } + + #[test] + fn fix_will_exists() { + // plural + will + 3rd person singular present + assert_good_and_bad_suggestions( + "there is a good chance duplicate Rule IDs will exists.", + WillNonLemma::new(FstDictionary::curated()), + &[ + "there is a good chance duplicate Rule IDs will exist.", + "there is a good chance duplicate Rule IDs exists.", + "there is a good chance duplicate Rule IDs exist.", + ], + &[], + ); + } + + #[test] + fn ignore_shall_vessels() { + // "nor" + shall + (3rd person singular present == plural noun) + assert_lint_count( + "No Preference shall be given by any Regulation of Commerce or Revenue to the Ports of one State over those of another; nor shall Vessels bound to, or from, one State, be obliged to enter, clear, or pay Duties in another.", + WillNonLemma::new(FstDictionary::curated()), + 0, + ); + } + + #[test] + fn ignore_will_tools() { + // "free will" + (3rd person singular present == plural noun) + assert_lint_count( + "Give your AI free will tools.", + WillNonLemma::new(FstDictionary::curated()), + 0, + ); + } + + #[test] + fn fix_will_coming_soon() { + // plural + will + progressive + assert_good_and_bad_suggestions( + "More advanced features will coming soon, so stay tuned!", + WillNonLemma::new(FstDictionary::curated()), + &[ + "More advanced features will come soon, so stay tuned!", + "More advanced features coming soon, so stay tuned!", + "More advanced features will be coming soon, so stay tuned!", + ], + &[], + ); + } + + #[test] + fn fix_will_coming_next() { + // singular + will + progressive + assert_good_and_bad_suggestions( + "on CPU and GPU (NPU support will coming next)", + WillNonLemma::new(FstDictionary::curated()), + &[ + "on CPU and GPU (NPU support will come next)", + "on CPU and GPU (NPU support coming next)", + "on CPU and GPU (NPU support will be coming next)", + ], + &[], + ); + } +} diff --git a/harper-core/tests/text/tagged/Alice's Adventures in Wonderland.md b/harper-core/tests/text/tagged/Alice's Adventures in Wonderland.md index b0048fc1f..b0f87ee7c 100644 --- a/harper-core/tests/text/tagged/Alice's Adventures in Wonderland.md +++ b/harper-core/tests/text/tagged/Alice's Adventures in Wonderland.md @@ -30,8 +30,8 @@ # VP ISg/D$+ NSg/I/VB J/R NSg/J VB/C NSg/J . . I/C D NSg/VB P Nᴹ/Vg/J D/P+ > daisy - chain would be worth the trouble of getting up and picking the daisies , # NPr+ . N🅪Sg/VB+ VXB NSg/VXB NSg/VB/J D N🅪Sg/VB P NSg/Vg NSg/VB/J/P VB/C Nᴹ/Vg/J D NPl . -> when suddenly a White Rabbit with pink eyes ran close by her . -# NSg/I/C R D/P NPr🅪Sg/VB/J NSg/VB+ P N🅪Sg/VB/J NPl/V3+ NSg/VPt NSg/VB/J NSg/J/P ISg/D$+ . +> when suddenly a White Rabbit with pink eyes ran close by her . +# NSg/I/C R D/P NPr🅪Sg/VB/J NSg/VB+ P N🅪Sg/VB/J NPl/V3+ VPt NSg/VB/J NSg/J/P ISg/D$+ . > # > There was nothing so very remarkable in that ; nor did Alice think it so very @@ -50,8 +50,8 @@ # NSg/P ISg/D$+ NSg/VB+ NSg/I/C/Ddem+ ISg+ VP R C/P NSg/VPp D/P NSg/VB+ P I/C D/P > waistcoat - pocket , or a watch to take out of it , and burning with curiosity , she # NSg . NSg/VB/J+ . NPr/C D/P NSg/VB P NSg/VB NSg/VB/J/R/P P NPr/ISg+ . VB/C Nᴹ/Vg/J P NSg+ . ISg+ -> ran across the field after it , and fortunately was just in time to see it pop -# NSg/VPt NSg/P D NSg/VB+ P NPr/ISg+ . VB/C R VPt J/R NPr/J/R/P N🅪Sg/VB/J+ P NSg/VB NPr/ISg+ N🅪Sg/VB/J+ +> ran across the field after it , and fortunately was just in time to see it pop +# VPt NSg/P D NSg/VB+ P NPr/ISg+ . VB/C R VPt J/R NPr/J/R/P N🅪Sg/VB/J+ P NSg/VB NPr/ISg+ N🅪Sg/VB/J+ > down a large rabbit - hole under the hedge . # N🅪Sg/VB/J/P D/P NSg/J NSg/VB+ . NSg/VB+ NSg/J/P D NSg/VB+ . > @@ -558,8 +558,8 @@ # . NSg/I/C/Ddem+ VPt D/P NSg/VB/J N🅪Sg/VB . . VP/J NPr+ . D/P+ NPr/VB/J+ NSg/VB/J+ VP/J NSg/P D+ NSg/J+ > change , but very glad to find herself still in existence ; “ and now for the # N🅪Sg/VB+ . NSg/C/P J/R NSg/VB/J P NSg/VB ISg+ NSg/VB/J/R NPr/J/R/P NSg+ . . VB/C NSg/J/R/C R/C/P D+ -> garden ! ” and she ran with all speed back to the little door : but , alas ! the -# NSg/VB/J+ . . VB/C ISg+ NSg/VPt P NSg/I/J/C/Dq+ N🅪Sg/VB+ NSg/VB/J P D+ NPr/I/J/Dq+ NSg/VB+ . NSg/C/P . NPrPl . D+ +> garden ! ” and she ran with all speed back to the little door : but , alas ! the +# NSg/VB/J+ . . VB/C ISg+ VPt P NSg/I/J/C/Dq+ N🅪Sg/VB+ NSg/VB/J P D+ NPr/I/J/Dq+ NSg/VB+ . NSg/C/P . NPrPl . D+ > little door was shut again , and the little golden key was lying on the glass # NPr/I/J/Dq+ NSg/VB+ VPt NSg/VBP/J P . VB/C D+ NPr/I/J/Dq+ NPr/VB/J+ NPr/VB/J+ VPt Nᴹ/Vg/J J/P D+ NPr🅪Sg/VB+ > table as before , “ and things are worse than ever , ” thought the poor child , “ for @@ -1144,14 +1144,14 @@ # ISg/D$+ NPr/J/R/P D/P+ VB/J+ N🅪Sg/I/VB+ . . NSg/VB . NPr+ NPr/J+ . NSg/I+ VB ISgPl+ Nᴹ/Vg/J NSg/VB/J/R/P J/R . NSg/VBPp NSg/VB/J+ I/Ddem+ > moment , and fetch me a pair of gloves and a fan ! Quick , now ! ” And Alice was so # NSg+ . VB/C NSg/VB NPr/ISg+ D/P NSg/VB P NPl/V3 VB/C D/P+ NSg/VB+ . NSg/VB/J . NSg/J/R/C . . VB/C NPr+ VPt NSg/I/J/R/C -> much frightened that she ran off at once in the direction it pointed to , without -# NSg/I/J/R/Dq VP/J NSg/I/C/Ddem ISg+ NSg/VPt NSg/VB/J/P NSg/P NSg/C NPr/J/R/P D+ N🅪Sg+ NPr/ISg+ VP/J P . C/P +> much frightened that she ran off at once in the direction it pointed to , without +# NSg/I/J/R/Dq VP/J NSg/I/C/Ddem ISg+ VPt NSg/VB/J/P NSg/P NSg/C NPr/J/R/P D+ N🅪Sg+ NPr/ISg+ VP/J P . C/P > trying to explain the mistake it had made . # Nᴹ/Vg/J P VB D+ NSg/VB+ NPr/ISg+ VP VP . > # -> “ He took me for his housemaid , ” she said to herself as she ran . “ How surprised -# . NPr/ISg+ VPt NPr/ISg+ R/C/P ISg/D$+ NSg/VB . . ISg+ VP/J P ISg+ R/C/P ISg+ NSg/VPt . . NSg/C VP/J +> “ He took me for his housemaid , ” she said to herself as she ran . “ How surprised +# . NPr/ISg+ VPt NPr/ISg+ R/C/P ISg/D$+ NSg/VB . . ISg+ VP/J P ISg+ R/C/P ISg+ VPt . . NSg/C VP/J > he’ll be when he finds out who I am ! But I’d better take him his fan and # K NSg/VXB NSg/I/C NPr/ISg+ NPl/V3 NSg/VB/J/R/P NPr/I+ ISg/#r+ NPr/VB/J . NSg/C/P K NSg/VXB/JC NSg/VB ISg+ ISg/D$+ NSg/VB VB/C > gloves — that is , if I can find them . ” As she said this , she came upon a neat @@ -1476,14 +1476,14 @@ # NSg/I/J/R/C ISg+ VP/J NSg/I/J P D+ NPl/V3+ . VB/C VPt VP/J P NSg/VB NSg/I/C/Ddem ISg+ VPt > shrinking directly . As soon as she was small enough to get through the door , she # Nᴹ/Vg/J R/C . R/C/P J/R R/C/P ISg+ VPt NPr/VB/J NSg/I P NSg/VB NSg/J/P D+ NSg/VB+ . ISg+ -> ran out of the house , and found quite a crowd of little animals and birds -# NSg/VPt NSg/VB/J/R/P P D+ NPr/VB+ . VB/C NSg/VP R D/P NSg/VB P NPr/I/J/Dq NPl VB/C NPl/V3+ +> ran out of the house , and found quite a crowd of little animals and birds +# VPt NSg/VB/J/R/P P D+ NPr/VB+ . VB/C NSg/VP R D/P NSg/VB P NPr/I/J/Dq NPl VB/C NPl/V3+ > waiting outside . The poor little Lizard , Bill , was in the middle , being held up # Nᴹ/Vg/J Nᴹ/VB/J/P . D NSg/VB/J NPr/I/J/Dq NSg . NPr/VB+ . VPt NPr/J/R/P D NSg/VB/J . N🅪Sg/Vg/J/C VP NSg/VB/J/P > by two guinea - pigs , who were giving it something out of a bottle . They all made # NSg/J/P NSg NPr+ . NPl/V3+ . NPr/I+ NSg/VPt Nᴹ/Vg/J NPr/ISg+ NSg/I/J+ NSg/VB/J/R/P P D/P NSg/VB+ . IPl+ NSg/I/J/C/Dq VP -> a rush at Alice the moment she appeared ; but she ran off as hard as she could , -# D/P NPr/VB/J+ NSg/P NPr+ D+ NSg+ ISg+ VP/J . NSg/C/P ISg+ NSg/VPt NSg/VB/J/P R/C/P N🅪Sg/J/R R/C/P ISg+ NSg/VXB . +> a rush at Alice the moment she appeared ; but she ran off as hard as she could , +# D/P NPr/VB/J+ NSg/P NPr+ D+ NSg+ ISg+ VP/J . NSg/C/P ISg+ VPt NSg/VB/J/P R/C/P N🅪Sg/J/R R/C/P ISg+ NSg/VXB . > and soon found herself safe in a thick wood . # VB/C J/R NSg/VP ISg+ NSg/VB/J NPr/J/R/P D/P+ NSg/VB/J+ NPr🅪Sg/VB/J+ . > @@ -1532,8 +1532,8 @@ # NSg/P D NSg/VB/J+ . VB/C VP/J NPr/VB/J+ NSg/J/P NPl/V3+ NPr/J/R/P ISg/D$+ NSg/VB+ P NSg/VB NSg/VB/J P NPr/ISg+ . NSg/J/R/C > Alice , thinking it was very like having a game of play with a cart - horse , and # NPr+ . Nᴹ/Vg/J NPr/ISg+ VPt J/R NSg/VB/J/C/P Nᴹ/Vg/J D/P NSg/VB/J+ P N🅪Sg/VB P D/P NSg/VB+ . NSg/VB+ . VB/C -> expecting every moment to be trampled under its feet , ran round the thistle -# Nᴹ/Vg/J Dq NSg+ P NSg/VXB VP/J NSg/J/P ISg/D$+ NPl+ . NSg/VPt NSg/VB/J/P D NSg +> expecting every moment to be trampled under its feet , ran round the thistle +# Nᴹ/Vg/J Dq NSg+ P NSg/VXB VP/J NSg/J/P ISg/D$+ NPl+ . VPt NSg/VB/J/P D NSg > again ; then the puppy began a series of short charges at the stick , running a # P . NSg/J/R/C D NSg/VB+ VPt D/P NSgPl P NPr/VB/J/P NPl/V3+ NSg/P D NSg/VB/J+ . Nᴹ/Vg/J/P D/P > very little way forwards each time and a long way back , and barking hoarsely all @@ -1546,8 +1546,8 @@ # > This seemed to Alice a good opportunity for making her escape ; so she set off at # I/Ddem VP/J P NPr+ D/P+ NPr/VB/J+ N🅪Sg+ R/C/P Nᴹ/Vg/J ISg/D$+ N🅪Sg/VB . NSg/I/J/R/C ISg+ NPr/VBP/J NSg/VB/J/P NSg/P -> once , and ran till she was quite tired and out of breath , and till the puppy’s -# NSg/C . VB/C NSg/VPt NSg/VB/C/P ISg+ VPt R VP/J VB/C NSg/VB/J/R/P P N🅪Sg/VB/J+ . VB/C NSg/VB/C/P D NSg$ +> once , and ran till she was quite tired and out of breath , and till the puppy’s +# NSg/C . VB/C VPt NSg/VB/C/P ISg+ VPt R VP/J VB/C NSg/VB/J/R/P P N🅪Sg/VB/J+ . VB/C NSg/VB/C/P D NSg$ > bark sounded quite faint in the distance . # N🅪Sg/VB+ VP/J R NSg/VB/J NPr/J/R/P D N🅪Sg/VB+ . > @@ -3538,8 +3538,8 @@ # . ISg/#r+ NSg/VB . . VP/J D+ NPr/VB/J+ . NPr/I+ VP NSg NSg/VPp Nᴹ/Vg/J D+ NPl/V3+ . . NSg/VB/J/P P > their heads ! ” and the procession moved on , three of the soldiers remaining # D$+ NPl/V3+ . . VB/C D+ NSg/VB+ VP/J J/P . NSg P D+ NPl/V3+ Nᴹ/Vg/J -> behind to execute the unfortunate gardeners , who ran to Alice for protection . -# NSg/J/P P VB D+ NSg/J+ NPl+ . NPr/I+ NSg/VPt P NPr R/C/P N🅪Sg+ . +> behind to execute the unfortunate gardeners , who ran to Alice for protection . +# NSg/J/P P VB D+ NSg/J+ NPl+ . NPr/I+ VPt P NPr R/C/P N🅪Sg+ . > # > “ You shan’t be beheaded ! ” said Alice , and she put them into a large flower - pot @@ -3726,8 +3726,8 @@ # J . R/C/P NSg/VB+ . K D NSg/VB/J K VP P NSg/VB/J NSg/J/P NSg/J/P Nᴹ/Vg/J J/P > at the other end of the ground — and I should have croqueted the Queen’s hedgehog # NSg/P D NSg/VB/J NSg/VB P D N🅪Sg/VB/J+ . VB/C ISg/#r+ VXB NSg/VXB VP/J D NPr$ NSg/VB+ -> just now , only it ran away when it saw mine coming ! ” -# J/R NSg/J/R/C . J/R/C NPr/ISg+ NSg/VPt VB/J NSg/I/C NPr/ISg+ NSg/VPt NSg/I/VB+ Nᴹ/Vg/J . . +> just now , only it ran away when it saw mine coming ! ” +# J/R NSg/J/R/C . J/R/C NPr/ISg+ VPt VB/J NSg/I/C NPr/ISg+ NSg/VPt NSg/I/VB+ Nᴹ/Vg/J . . > # > “ How do you like the Queen ? ” said the Cat in a low voice . @@ -3890,8 +3890,8 @@ # D NSg$ NPr/VB/J+ VPt Nᴹ/Vg/J VB/J D NSg+ NPr/ISg+ VPt VPp/J/P . VB/C . NSg/J/P D N🅪Sg/VB/J+ NPr/ISg+ VP > come back with the Duchess , it had entirely disappeared ; so the King and the # NSg/VBPp/P NSg/VB/J P D NSg/VB . NPr/ISg+ VP R VP/J . NSg/I/J/R/C D NPr/VB/J+ VB/C D -> executioner ran wildly up and down looking for it , while the rest of the party -# NSg NSg/VPt R NSg/VB/J/P VB/C N🅪Sg/VB/J/P Nᴹ/Vg/J R/C/P NPr/ISg+ . NSg/VB/C/P D NSg/VB P D NSg/VB/J+ +> executioner ran wildly up and down looking for it , while the rest of the party +# NSg VPt R NSg/VB/J/P VB/C N🅪Sg/VB/J/P Nᴹ/Vg/J R/C/P NPr/ISg+ . NSg/VB/C/P D NSg/VB P D NSg/VB/J+ > went back to the game . # NSg/VPt NSg/VB/J P D NSg/VB/J+ . > @@ -4996,10 +4996,10 @@ # C/P Nᴹ/Vg/J R/C/P D NSg/VB P D N🅪Sg+ . > # -> “ What trial is it ? ” Alice panted as she ran ; but the Gryphon only answered “ Come -# . NSg/I+ NSg/VB/J+ VL3 NPr/ISg+ . . NPr+ VP/J R/C/P ISg+ NSg/VPt . NSg/C/P D ? J/R/C VP/J . NSg/VBPp/P -> on ! ” and ran the faster , while more and more faintly came , carried on the breeze -# J/P . . VB/C NSg/VPt D NSg/JC . NSg/VB/C/P NPr/I/J/R/Dq VB/C NPr/I/J/R/Dq R NSg/VPt/P . VP/J J/P D+ NSg/VB+ +> “ What trial is it ? ” Alice panted as she ran ; but the Gryphon only answered “ Come +# . NSg/I+ NSg/VB/J+ VL3 NPr/ISg+ . . NPr+ VP/J R/C/P ISg+ VPt . NSg/C/P D ? J/R/C VP/J . NSg/VBPp/P +> on ! ” and ran the faster , while more and more faintly came , carried on the breeze +# J/P . . VB/C VPt D NSg/JC . NSg/VB/C/P NPr/I/J/R/Dq VB/C NPr/I/J/R/Dq R NSg/VPt/P . VP/J J/P D+ NSg/VB+ > that followed them , the melancholy words : — # NSg/I/C/Ddem+ VP/J NSg/IPl+ . D NSg/J NPl/V3+ . . > @@ -5908,8 +5908,8 @@ # NSg/VXB J/R NSg/VPp NPrᴹ/Vg/J J/P . VB/C NSg/I/C ISg+ VP VP/J . ISg/D$+ NSg/VB+ VP/J ISg/D$+ . > and said , “ It was a curious dream , dear , certainly : but now run in to your tea ; # VB/C VP/J . . NPr/ISg+ VPt D/P J NSg/VB/J . NSg/VB/J . R . NSg/C/P NSg/J/R/C NSg/VBPp NPr/J/R/P P D$+ N🅪Sg/VB+ . -> it’s getting late . ” So Alice got up and ran off , thinking while she ran , as well -# K NSg/Vg NSg/J . . NSg/I/J/R/C NPr+ VP NSg/VB/J/P VB/C NSg/VPt NSg/VB/J/P . Nᴹ/Vg/J NSg/VB/C/P ISg+ NSg/VPt . R/C/P NSg/VB/J/R +> it’s getting late . ” So Alice got up and ran off , thinking while she ran , as well +# K NSg/Vg NSg/J . . NSg/I/J/R/C NPr+ VP NSg/VB/J/P VB/C VPt NSg/VB/J/P . Nᴹ/Vg/J NSg/VB/C/P ISg+ VPt . R/C/P NSg/VB/J/R > she might , what a wonderful dream it had been . # ISg+ Nᴹ/VXB/J . NSg/I+ D/P+ J+ NSg/VB/J+ NPr/ISg+ VP NSg/VPp . > diff --git a/harper-core/tests/text/tagged/Difficult sentences.md b/harper-core/tests/text/tagged/Difficult sentences.md index ae70709ac..a81c27e0c 100644 --- a/harper-core/tests/text/tagged/Difficult sentences.md +++ b/harper-core/tests/text/tagged/Difficult sentences.md @@ -96,8 +96,8 @@ # D NSg VL3 NSg/J/P D NSg/VB+ NSg/VB . > The stream runs by our back door . # D+ NSg/VB+ NPl/V3 NSg/J/P D$+ NSg/VB/J NSg/VB+ . -> He ran straight by me . -# NPr/ISg+ NSg/VPt NSg/VB/J/R NSg/J/P NPr/ISg+ . +> He ran straight by me . +# NPr/ISg+ VPt NSg/VB/J/R NSg/J/P NPr/ISg+ . > Be back by ten o'clock ! . # NSg/VXB NSg/VB/J NSg/J/P NSg R . . > We'll find someone by the end of March . @@ -466,8 +466,8 @@ # R D/P+ NSg/VB/J+ NPr/VB/J+ VP/J NPr/J/R/P . > Would you like that to take away or eat in ? # VXB ISgPl+ NSg/VB/J/C/P NSg/I/C/Ddem+ P NSg/VB VB/J NPr/C VB NPr/J/R/P . -> He ran to the edge of the swimming pool and dived in . -# NPr/ISg+ NSg/VPt P D NSg/VB P D+ NSg/VB NSg/VB+ VB/C VP/J NPr/J/R/P . +> He ran to the edge of the swimming pool and dived in . +# NPr/ISg+ VPt P D NSg/VB P D+ NSg/VB NSg/VB+ VB/C VP/J NPr/J/R/P . > They flew in from London last night . # IPl+ NSg/VPt/J NPr/J/R/P P NPr+ NSg/VB/J+ N🅪Sg/VB+ . > For six hours the tide flows in , then for another six hours it flows out . diff --git a/harper-core/tests/text/tagged/The Great Gatsby.md b/harper-core/tests/text/tagged/The Great Gatsby.md index 10f9df6b7..62ce673aa 100644 --- a/harper-core/tests/text/tagged/The Great Gatsby.md +++ b/harper-core/tests/text/tagged/The Great Gatsby.md @@ -146,8 +146,8 @@ # NSg NSg/P NSg D/P NSg/J+ . NSg/C/P NSg/P D NSg/VB/J NSg/VB/J+ D NSg/VB/J+ VP/J ISg+ P > Washington , and I went out to the country alone . I had a dog — at least I had him # NPr+ . VB/C ISg/#r+ NSg/VPt NSg/VB/J/R/P P D NSg/J+ J . ISg/#r+ VP D/P+ NSg/VB/J+ . NSg/P NSg/J/Dq ISg/#r+ VP ISg+ -> for a few days until he ran away — and an old Dodge and a Finnish woman , who made -# R/C/P D/P+ NSg/I/Dq+ NPl+ C/P NPr/ISg+ NSg/VPt VB/J . VB/C D/P NSg/J NPr/VB/J VB/C D/P+ NSg/J+ NSg/VB+ . NPr/I+ VP +> for a few days until he ran away — and an old Dodge and a Finnish woman , who made +# R/C/P D/P+ NSg/I/Dq+ NPl+ C/P NPr/ISg+ VPt VB/J . VB/C D/P NSg/J NPr/VB/J VB/C D/P+ NSg/J+ NSg/VB+ . NPr/I+ VP > my bed and cooked breakfast and muttered Finnish wisdom to herself over the # D$+ NSg/VBP/J+ VB/C VP/J N🅪Sg/VB+ VB/C VP/J NSg/J+ Nᴹ+ P ISg+ NSg/J/P D+ > electric stove . @@ -304,8 +304,8 @@ # NSg+ NSg/J+ NPrPl/V3+ I+ ISg/#r+ R VPt NSg/P NSg/I/J/C/Dq . D$+ NPr/VB+ VPt NSg/VB/J/R NPr/I/J/R/Dq VB/J > than I expected , a cheerful red - and - white Georgian Colonial mansion , overlooking # C/P ISg/#r+ NSg/VP/J . D/P J N🅪Sg/J . VB/C . NPr🅪Sg/VB/J NSg/J NSg/J+ NSg+ . Nᴹ/Vg/J -> the bay . The lawn started at the beach and ran toward the front door for a -# D NSg/VB/J+ . D+ NSg/VB+ VP/J NSg/P D+ NPr/VB+ VB/C NSg/VPt J/P D+ NSg/VB/J+ NSg/VB+ R/C/P D/P +> the bay . The lawn started at the beach and ran toward the front door for a +# D NSg/VB/J+ . D+ NSg/VB+ VP/J NSg/P D+ NPr/VB+ VB/C VPt J/P D+ NSg/VB/J+ NSg/VB+ R/C/P D/P > quarter of a mile , jumping over sun - dials and brick walks and burning # NSg/VB/J P D/P+ NSg+ . Nᴹ/Vg/J NSg/J/P NPr/VB+ . NPl/V3 VB/C N🅪Sg/VB/J+ NPl/V3 VB/C Nᴹ/Vg/J > gardens — finally when it reached the house drifting up the side in bright vines @@ -1256,8 +1256,8 @@ # R NPr/ISg+ VPt NSg/J NPr🅪Sg/VB J/P NSg+ NPl/V3+ VB/C NPr/J/R/P NSg/VB/J+ P NSg/J NPl/V3 . > where new red gaspumps sat out in pools of light , and when I reached my estate # NSg/R/C NSg/J N🅪Sg/J ? NSg/VP/J NSg/VB/J/R/P NPr/J/R/P NPl/V3 P N🅪Sg/VB/J+ . VB/C NSg/I/C ISg/#r+ VP/J D$+ NSg/VB/J+ -> at West Egg I ran the car under its shed and sat for a while on an abandoned -# NSg/P NPr/VB/J+ N🅪Sg/VB+ ISg/#r+ NSg/VPt D NSg+ NSg/J/P ISg/D$+ NSg/VP+ VB/C NSg/VP/J R/C/P D/P NSg/VB/C/P+ J/P D/P VP/J +> at West Egg I ran the car under its shed and sat for a while on an abandoned +# NSg/P NPr/VB/J+ N🅪Sg/VB+ ISg/#r+ VPt D NSg+ NSg/J/P ISg/D$+ NSg/VP+ VB/C NSg/VP/J R/C/P D/P NSg/VB/C/P+ J/P D/P VP/J > grass roller in the yard . The wind had blown off , leaving a loud , bright night , # NPr🅪Sg/VB+ NSg/VB NPr/J/R/P D NSg/VB+ . D+ N🅪Sg/VB+ VP VPp/J NSg/VB/J/P . Nᴹ/Vg/J D/P NSg/J . NPr/VB/J N🅪Sg/VB+ . > with wings beating in the trees and a persistent organ sound as the full bellows @@ -3718,8 +3718,8 @@ # NSg/VB/J NPr+ NPrPl P NPr+ . VB/C D ? VB/C D NPr ? . > Snell was there three days before he went to the penitentiary , so drunk out on # NPr VPt R+ NSg NPl+ C/P NPr/ISg+ NSg/VPt P D NSg/J+ . NSg/I/J/R/C NSg/VPp/J NSg/VB/J/R/P J/P -> the gravel drive that Mrs . Ulysses Swett’s automobile ran over his right hand . -# D Nᴹ/VB/J+ N🅪Sg/VB NSg/I/C/Ddem NPl+ . NPr+ ? NSg/VB/J NSg/VPt NSg/J/P ISg/D$+ NPr/VB/J NSg/VB+ . +> the gravel drive that Mrs . Ulysses Swett’s automobile ran over his right hand . +# D Nᴹ/VB/J+ N🅪Sg/VB NSg/I/C/Ddem NPl+ . NPr+ ? NSg/VB/J VPt NSg/J/P ISg/D$+ NPr/VB/J NSg/VB+ . > The Dancies came , too , and S. B. Whitebait , who was well over sixty , and Maurice # D ? NSg/VPt/P . R . VB/C ? ? NSg/VB . NPr/I+ VPt NSg/VB/J/R NSg/J/P NSg . VB/C NPr > A. Flink , and the Hammerheads , and Beluga the tobacco importer , and Beluga’s @@ -4008,8 +4008,8 @@ # . NSg$ D NSg/I/J P NPr+ . . > # -> To my astonishment , the thing had an authentic look . “ Orderi di Danilo , ” ran the -# P D$+ Nᴹ+ . D+ NSg+ VP D/P+ J+ NSg/VB+ . . ? NPr/#r+ ? . . NSg/VPt D +> To my astonishment , the thing had an authentic look . “ Orderi di Danilo , ” ran the +# P D$+ Nᴹ+ . D+ NSg+ VP D/P+ J+ NSg/VB+ . . ? NPr/#r+ ? . . VPt D > circular legend , “ Montenegro , Nicolas Rex . ” # NSg/VB/J N🅪Sg/VB+ . . NPr+ . NPrPl NPr . . > @@ -4742,8 +4742,8 @@ # Nᴹ/Vg/J NSg/P ISg+ P J+ N🅪Sg/VB/J+ . NPr/ISg+ VPt Nᴹ/Vg/J/P P NSg/VB NSg/IPl+ > together — it made you laugh in a hushed , fascinated way . That was in August . A # J . NPr/ISg+ VP ISgPl+ NSg/VB NPr/J/R/P D/P VP/J . VP/J NSg/J+ . NSg/I/C/Ddem+ VPt NPr/J/R/P NPr/VB/J+ . D/P+ -> week after I left Santa Barbara Tom ran into a wagon on the Ventura road one -# NSg/J+ P ISg/#r+ NPr/VP/J NPr+ NPr+ NPr/VB+ NSg/VPt P D/P+ NSg/VB+ J/P D ? N🅪Sg/J+ NSg/I/J +> week after I left Santa Barbara Tom ran into a wagon on the Ventura road one +# NSg/J+ P ISg/#r+ NPr/VP/J NPr+ NPr+ NPr/VB+ VPt P D/P+ NSg/VB+ J/P D ? N🅪Sg/J+ NSg/I/J > night , and ripped a front wheel off his car . The girl who was with him got into # N🅪Sg/VB+ . VB/C VP/J D/P NSg/VB/J+ NSg/VB+ NSg/VB/J/P ISg/D$+ NSg+ . D+ NSg/VB+ NPr/I+ VPt P ISg+ VP P > the papers , too , because her arm was broken — she was one of the chambermaids in @@ -5472,8 +5472,8 @@ # > I walked out the back way — just as Gatsby had when he had made his nervous # ISg/#r+ VP/J NSg/VB/J/R/P D+ NSg/VB/J+ NSg/J+ . J/R R/C/P NPr VP NSg/I/C NPr/ISg+ VP VP ISg/D$+ J -> circuit of the house half an hour before — and ran for a huge black knotted tree , -# NSg/VB P D NPr/VB+ N🅪Sg/J/P+ D/P NSg+ C/P . VB/C NSg/VPt R/C/P D/P J N🅪Sg/VB/J VP/J NSg/VB+ . +> circuit of the house half an hour before — and ran for a huge black knotted tree , +# NSg/VB P D NPr/VB+ N🅪Sg/J/P+ D/P NSg+ C/P . VB/C VPt R/C/P D/P J N🅪Sg/VB/J VP/J NSg/VB+ . > whose massed leaves made a fabric against the rain . Once more it was pouring , # I+ VP/J NPl/V3+ VP D/P N🅪Sg/VB+ C/P D N🅪Sg/VB+ . NSg/C NPr/I/J/R/Dq NPr/ISg+ VPt Nᴹ/Vg/J . > and my irregular lawn , well - shaved by Gatsby’s gardener , abounded in small muddy @@ -8778,8 +8778,8 @@ # NSg/I/J/R/C IPl+ NSg/VPt J/P J/P NPr🅪Sg+ NSg/J/P D+ Nᴹ/Vg/J+ Nᴹ/VB/J+ . > # -> The young Greek , Michaelis , who ran the coffee joint beside the ashheaps was the -# D NPr/VB/J NPr/VB/J . ? . NPr/I+ NSg/VPt D+ N🅪Sg/VB/J+ NSg/VB/J P D ? VPt D +> The young Greek , Michaelis , who ran the coffee joint beside the ashheaps was the +# D NPr/VB/J NPr/VB/J . ? . NPr/I+ VPt D+ N🅪Sg/VB/J+ NSg/VB/J P D ? VPt D > principal witness at the inquest . He had slept through the heat until after # NSg/J NSg/VB NSg/P D NSg/VB . NPr/ISg+ VP VP NSg/J/P D+ Nᴹ/VB+ C/P P > five , when he strolled over to the garage , and found George Wilson sick in his @@ -9002,8 +9002,8 @@ # . R VP/J . . VP/J NPr/VB+ . Nᴹ/Vg/J . > # -> “ She ran out ina road . Son - of - a - bitch didn’t even stopus car . ” -# . ISg+ NSg/VPt NSg/VB/J/R/P NPr N🅪Sg/J+ . NPr/VB+ . P . D/P . NSg/VB VXPt NSg/VB/J/R ? NSg+ . . +> “ She ran out ina road . Son - of - a - bitch didn’t even stopus car . ” +# . ISg+ VPt NSg/VB/J/R/P NPr N🅪Sg/J+ . NPr/VB+ . P . D/P . NSg/VB VXPt NSg/VB/J/R ? NSg+ . . > # > “ There was two cars , ” said Michaelis , “ one comin ’ , one goin ’ , see ? ” @@ -9016,8 +9016,8 @@ # > “ One goin ’ each way . Well , she ” — his hand rose toward the blankets but stopped # . NSg/I/J ? . Dq NSg/J+ . NSg/VB/J/R . ISg+ . . ISg/D$+ NSg/VB+ NPr/VPt/J J/P D+ NPl/V3+ NSg/C/P VP/J -> half way and fell to his side — “ she ran out there an ’ the one comin ’ from N’York -# N🅪Sg/J/P+ NSg/J+ VB/C NSg/VPt/J P ISg/D$+ NSg/VB/J+ . . ISg+ NSg/VPt NSg/VB/J/R/P R+ D/P . D+ NSg/I/J+ ? . P ? +> half way and fell to his side — “ she ran out there an ’ the one comin ’ from N’York +# N🅪Sg/J/P+ NSg/J+ VB/C NSg/VPt/J P ISg/D$+ NSg/VB/J+ . . ISg+ VPt NSg/VB/J/R/P R+ D/P . D+ NSg/I/J+ ? . P ? > knock right into her , goin ’ thirty or forty miles an hour . ” # NSg/VB NPr/VB/J P ISg/D$+ . ? . NSg NPr/C NSg/J NPrPl+ D/P NSg+ . . > @@ -9238,8 +9238,8 @@ # K NSg/VXB VP/J NSg/C K NSg/VB/J NPr/J/R/P . K VP NSg/I P NSg/I/J/C/Dq P NSg/IPl+ R/C/P NSg/I/J NPr🅪Sg+ . VB/C > suddenly that included Jordan too . She must have seen something of this in my # R NSg/I/C/Ddem+ VP/J NPr+ R . ISg+ NSg/VXB NSg/VXB NSg/VPp NSg/I/J+ P I/Ddem+ NPr/J/R/P D$+ -> expression , for she turned abruptly away and ran up the porch steps into the -# N🅪Sg+ . R/C/P ISg+ VP/J R VB/J VB/C NSg/VPt NSg/VB/J/P D+ NSg+ NPl/V3+ P D+ +> expression , for she turned abruptly away and ran up the porch steps into the +# N🅪Sg+ . R/C/P ISg+ VP/J R VB/J VB/C VPt NSg/VB/J/P D+ NSg+ NPl/V3+ P D+ > house . I sat down for a few minutes with my head in my hands , until I heard the # NPr/VB+ . ISg/#r+ NSg/VP/J N🅪Sg/VB/J/P R/C/P D/P NSg/I/Dq+ NPl/V3+ P D$+ NPr/VB/J+ NPr/J/R/P D$+ NPl/V3+ . C/P ISg/#r+ VP/J D+ > phone taken up inside and the butler’s voice calling a taxi . Then I walked @@ -10248,8 +10248,8 @@ # . ISg/#r+ NSg/VB . . NPr/ISg+ VP/J R . . K NSg/I/J P I/Ddem Nᴹ/Vg/J NPl+ VB/C ISg/#r+ VXB > think any harm to nobody , but when I get to know a thing I know it . It was the # NSg/VB I/R/Dq N🅪Sg/VB+ P NSg/I+ . NSg/C/P NSg/I/C ISg/#r+ NSg/VB P NSg/VB D/P NSg+ ISg/#r+ NSg/VB NPr/ISg+ . NPr/ISg+ VPt D -> man in that car . She ran out to speak to him and he wouldn’t stop . ” -# NPr/VB/J NPr/J/R/P NSg/I/C/Ddem+ NSg+ . ISg+ NSg/VPt NSg/VB/J/R/P P NSg/VB P ISg+ VB/C NPr/ISg+ VXB NSg/VB . . +> man in that car . She ran out to speak to him and he wouldn’t stop . ” +# NPr/VB/J NPr/J/R/P NSg/I/C/Ddem+ NSg+ . ISg+ VPt NSg/VB/J/R/P P NSg/VB P ISg+ VB/C NPr/ISg+ VXB NSg/VB . . > # > Michaelis had seen this too , but it hadn’t occurred to him that there was any @@ -11532,8 +11532,8 @@ # NPr/VB+ . . NPr/ISg+ NSg/VPt/J NSg/VB/J/P R . . NSg/I+ NSg/C ISg/#r+ VXPt NPr/VB ISg+ . NSg/I/C/Ddem NSg VP NPr/ISg+ > coming to him . He threw dust into your eyes just like he did in Daisy’s , but he # Nᴹ/Vg/J P ISg+ . NPr/ISg+ VPt Nᴹ/VB+ P D$+ NPl/V3+ J/R NSg/VB/J/C/P NPr/ISg+ VXPt NPr/J/R/P NPr$ . NSg/C/P NPr/ISg+ -> was a tough one . He ran over Myrtle like you’d run over a dog and never even -# VPt D/P NSg/VB/J NSg/I/J+ . NPr/ISg+ NSg/VPt NSg/J/P NPr NSg/VB/J/C/P K NSg/VBPp NSg/J/P D/P NSg/VB/J+ VB/C R NSg/VB/J/R +> was a tough one . He ran over Myrtle like you’d run over a dog and never even +# VPt D/P NSg/VB/J NSg/I/J+ . NPr/ISg+ VPt NSg/J/P NPr NSg/VB/J/C/P K NSg/VBPp NSg/J/P D/P NSg/VB/J+ VB/C R NSg/VB/J/R > stopped his car . ” # VP/J ISg/D$+ NSg+ . . >