Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion harper-core/dictionary.dict
Original file line number Diff line number Diff line change
Expand Up @@ -35437,7 +35437,7 @@ newsworthy/Jp
newsy/J^>N
newt/~NgS
newton/~NgS
next/~JPNg
next/~JP
nexus/~NgS
niacin/Nmg
nib/NSgV
Expand Down
2 changes: 2 additions & 0 deletions harper-core/src/expr/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ mod first_match_of;
mod fixed_phrase;
mod longest_match_of;
mod mergeable_words;
mod nominal_phrase;
mod optional;
mod reflexive_pronoun;
mod repeating;
Expand All @@ -47,6 +48,7 @@ pub use first_match_of::FirstMatchOf;
pub use fixed_phrase::FixedPhrase;
pub use longest_match_of::LongestMatchOf;
pub use mergeable_words::MergeableWords;
pub use nominal_phrase::NominalPhrase;
pub use optional::Optional;
pub use reflexive_pronoun::ReflexivePronoun;
pub use repeating::Repeating;
Expand Down
204 changes: 204 additions & 0 deletions harper-core/src/expr/nominal_phrase.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
use crate::expr::{Expr, SequenceExpr};
use crate::{Span, Token};

#[derive(Default)]
pub struct NominalPhrase;

impl Expr for NominalPhrase {
fn run(&self, cursor: usize, tokens: &[Token], source: &[char]) -> Option<Span<Token>> {
// Match a noun phrase (optional determiner + optional adjectives + one or more nouns)
let noun_phrase = SequenceExpr::default()
.then_optional(SequenceExpr::default().then_determiner().t_ws())
.then_zero_or_more(SequenceExpr::default().then_adjective().t_ws())
.then(
SequenceExpr::default().then_noun().then_zero_or_more(
SequenceExpr::default()
.t_ws()
.then(SequenceExpr::default().then_noun()),
),
);
let nominal_phrase = SequenceExpr::any_of(vec![
Box::new(noun_phrase),
Box::new(SequenceExpr::default().then_pronoun()),
]);

nominal_phrase.run(cursor, tokens, source)
}
}

#[cfg(test)]
mod tests {
use super::NominalPhrase;
use crate::Document;
use crate::expr::ExprExt;
use crate::linting::tests::SpanVecExt;

#[test]
fn thing() {
let doc = Document::new_markdown_default_curated("thing");
let matches = NominalPhrase.iter_matches_in_doc(&doc).collect::<Vec<_>>();

assert_eq!(matches.to_strings(&doc), vec!["thing"])
}

#[test]
fn a_thing() {
let doc = Document::new_markdown_default_curated("a thing");
let matches = NominalPhrase.iter_matches_in_doc(&doc).collect::<Vec<_>>();

assert_eq!(matches.to_strings(&doc), vec!["a thing"])
}

#[test]
fn red_thing() {
let doc = Document::new_markdown_default_curated("red thing");
let matches = NominalPhrase.iter_matches_in_doc(&doc).collect::<Vec<_>>();

assert_eq!(matches.to_strings(&doc), vec!["red thing"])
}

#[test]
fn big_red_thing() {
let doc = Document::new_markdown_default_curated("big red thing");
let matches = NominalPhrase.iter_matches_in_doc(&doc).collect::<Vec<_>>();

assert_eq!(matches.to_strings(&doc), vec!["big red thing"])
}

#[test]
fn a_red_thing() {
let doc = Document::new_markdown_default_curated("a red thing");
let matches = NominalPhrase.iter_matches_in_doc(&doc).collect::<Vec<_>>();

assert_eq!(matches.to_strings(&doc), vec!["a red thing"])
}

#[test]
fn a_big_red_thing() {
let doc = Document::new_markdown_default_curated("a big red thing");
let matches = NominalPhrase.iter_matches_in_doc(&doc).collect::<Vec<_>>();

assert_eq!(matches.to_strings(&doc), vec!["a big red thing"])
}

#[test]
fn test_present_participle_and_plural() {
let doc = Document::new_markdown_default_curated("the falling rocks");
let matches = NominalPhrase.iter_matches_in_doc(&doc).collect::<Vec<_>>();

assert_eq!(matches.to_strings(&doc), vec!["the falling rocks"])
}

#[test]
fn test_gerund() {
let doc = Document::new_markdown_default_curated("a spate of vomiting");
let matches = NominalPhrase.iter_matches_in_doc(&doc).collect::<Vec<_>>();

assert_eq!(matches.to_strings(&doc), vec!["a spate", "vomiting"])
}

#[test]
fn test_compound_nouns() {
let doc = Document::new_markdown_default_curated(
"the new car park next to the old train station",
);
let matches = NominalPhrase.iter_matches_in_doc(&doc).collect::<Vec<_>>();

assert_eq!(
matches.to_strings(&doc),
vec!["the new car park", "the old train station"]
)
}

#[test]
fn test_pronouns() {
let doc = Document::new_markdown_default_curated("Me, myself, and I.");
let matches = NominalPhrase.iter_matches_in_doc(&doc).collect::<Vec<_>>();

assert_eq!(matches.to_strings(&doc), vec!["Me", "myself", "I"])
}

#[test]
fn test_noun_and_pronoun() {
let doc = Document::new_markdown_default_curated("Me and my dog.");
let matches = NominalPhrase.iter_matches_in_doc(&doc).collect::<Vec<_>>();

assert_eq!(matches.to_strings(&doc), vec!["Me", "my dog"])
}

// From the `NominalPhrase` `Pattern`

#[test]
fn simple_apple() {
let doc = Document::new_markdown_default_curated("A red apple");
let matches = NominalPhrase.iter_matches_in_doc(&doc).collect::<Vec<_>>();

assert_eq!(matches.to_strings(&doc), vec!["A red apple"])
}

#[test]
fn complex_apple() {
let doc = Document::new_markdown_default_curated("A red apple with a long stem");
let matches = NominalPhrase.iter_matches_in_doc(&doc).collect::<Vec<_>>();

assert_eq!(matches.to_strings(&doc), vec!["A red apple", "a long stem"])
}

#[test]
fn list_fruit() {
let doc = Document::new_markdown_default_curated("An apple, a banana and a pear");
let matches = NominalPhrase.iter_matches_in_doc(&doc).collect::<Vec<_>>();

assert_eq!(
matches.to_strings(&doc),
vec!["An apple", "a banana", "a pear"]
)
}

#[test]
fn simplest_banana() {
let doc = Document::new_markdown_default_curated("a banana");
assert!(NominalPhrase.iter_matches_in_doc(&doc).next().is_some());
}

#[test]
fn food() {
let doc = Document::new_markdown_default_curated(
"My favorite foods are pizza, sushi, tacos and burgers.",
);
let matches = NominalPhrase.iter_matches_in_doc(&doc).collect::<Vec<_>>();

dbg!(&matches);
dbg!(matches.to_strings(&doc));

for span in &matches {
let gc = span
.to_char_span(doc.get_tokens())
.get_content(doc.get_source());
dbg!(gc);
}

assert_eq!(
matches.to_strings(&doc),
vec!["My favorite foods", "pizza", "sushi", "tacos", "burgers"]
)
}

#[test]
fn simplest_way() {
let doc = Document::new_markdown_default_curated("a way");
assert!(NominalPhrase.iter_matches_in_doc(&doc).next().is_some());
}

#[test]
fn present_participle_way() {
let doc = Document::new_markdown_default_curated("a winning way");
assert!(NominalPhrase.iter_matches_in_doc(&doc).next().is_some());
}

#[test]
fn perfect_participle_way() {
let doc = Document::new_markdown_default_curated("a failed way");
assert!(NominalPhrase.iter_matches_in_doc(&doc).next().is_some());
}
}
4 changes: 4 additions & 0 deletions harper-core/src/expr/sequence_expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,10 @@ impl SequenceExpr {
self.then(Repeating::new(Box::new(expr), 1))
}

pub fn then_zero_or_more(self, expr: impl Expr + 'static) -> Self {
self.then(Repeating::new(Box::new(expr), 0))
}

/// Create a new condition that will step one token forward if met.
/// If the condition is _not_ met, the whole expression returns `None`.
///
Expand Down
6 changes: 2 additions & 4 deletions harper-core/src/linting/for_noun.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
use crate::expr::Expr;
use crate::expr::OwnedExprExt;
use crate::expr::SequenceExpr;
use crate::{
Token,
patterns::{NominalPhrase, Word},
expr::{Expr, NominalPhrase, OwnedExprExt, SequenceExpr},
patterns::Word,
};

use super::{ExprLinter, Lint, LintKind, Suggestion};
Expand Down
7 changes: 3 additions & 4 deletions harper-core/src/linting/hyphenate_number_day.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
use crate::expr::Expr;
use crate::expr::SequenceExpr;
use crate::{Token, patterns::NominalPhrase};
use crate::Token;
use crate::expr::{Expr, NominalPhrase, SequenceExpr};
use crate::linting::expr_linter::Chunk;

use super::{ExprLinter, Lint, LintKind, Suggestion};
use crate::linting::expr_linter::Chunk;

pub struct HyphenateNumberDay {
expr: Box<dyn Expr>,
Expand Down
4 changes: 2 additions & 2 deletions harper-core/src/linting/if_wouldve.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use crate::expr::{Expr, SequenceExpr};
use crate::expr::{Expr, NominalPhrase, SequenceExpr};
use crate::linting::expr_linter::Chunk;
use crate::linting::{ExprLinter, LintKind, Suggestion};
use crate::patterns::{NominalPhrase, WordSet};
use crate::patterns::WordSet;
use crate::token_string_ext::TokenStringExt;
use crate::{CharStringExt, Lint, Token};

Expand Down
5 changes: 2 additions & 3 deletions harper-core/src/linting/its_contraction.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,9 @@ use crate::TokenStringExt;
use crate::expr::All;
use crate::expr::Expr;
use crate::expr::ExprExt;
use crate::expr::NominalPhrase;
use crate::expr::OwnedExprExt;
use crate::expr::SequenceExpr;
use crate::patterns::NominalPhrase;
use crate::patterns::Pattern;
use crate::patterns::UPOSSet;
use crate::patterns::WordSet;
use crate::{
Expand Down Expand Up @@ -79,7 +78,7 @@ impl ItsContraction {
let offender_chars = offender.span.get_content(source);

if toks.get(2)?.kind.is_upos(UPOS::VERB)
&& NominalPhrase.matches(&toks[2..], source).is_some()
&& NominalPhrase.run(0, &toks[2..], source).is_some()
{
return None;
}
Expand Down
5 changes: 2 additions & 3 deletions harper-core/src/linting/no_oxford_comma.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
use crate::expr::ExprExt;
use crate::expr::SequenceExpr;
use crate::{
Document, Token, TokenStringExt,
patterns::{NominalPhrase, WordSet},
expr::{ExprExt, NominalPhrase, SequenceExpr},
patterns::WordSet,
};

use super::{Lint, LintKind, Linter, Suggestion};
Expand Down
9 changes: 4 additions & 5 deletions harper-core/src/linting/pronoun_inflection_be.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
use harper_brill::UPOS;

use crate::expr::{All, AnchorStart, Expr, ExprMap, SequenceExpr};
use crate::patterns::{NominalPhrase, UPOSSet};
use crate::expr::{All, AnchorStart, Expr, ExprMap, NominalPhrase, SequenceExpr};
use crate::linting::expr_linter::Chunk;
use crate::patterns::UPOSSet;
use crate::{Lrc, Token, TokenKind};

use super::Suggestion;
use super::{ExprLinter, Lint, LintKind};
use crate::linting::expr_linter::Chunk;
use super::{ExprLinter, Lint, LintKind, Suggestion};

pub struct PronounInflectionBe {
expr: Box<dyn Expr>,
Expand Down
4 changes: 2 additions & 2 deletions harper-core/src/linting/quantifier_numeral_conflict.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use crate::expr::{All, Expr, SequenceExpr, SpelledNumberExpr};
use crate::expr::{All, Expr, NominalPhrase, SequenceExpr, SpelledNumberExpr};
use crate::linting::expr_linter::Chunk;
use crate::linting::{ExprLinter, LintKind, Suggestion};
use crate::patterns::{NominalPhrase, WordSet};
use crate::patterns::WordSet;
use crate::token_string_ext::TokenStringExt;
use crate::{CharStringExt, Lint, Token};

Expand Down
7 changes: 3 additions & 4 deletions harper-core/src/linting/take_serious.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
use crate::linting::expr_linter::Chunk;
use crate::{
Token, TokenStringExt,
expr::{Expr, SequenceExpr},
linting::{ExprLinter, Lint, LintKind, Suggestion},
patterns::{NominalPhrase, WordSet},
expr::{Expr, NominalPhrase, SequenceExpr},
linting::{ExprLinter, Lint, LintKind, Suggestion, expr_linter::Chunk},
patterns::WordSet,
};

/// Linter that corrects "take X serious" to "take X seriously".
Expand Down
32 changes: 32 additions & 0 deletions harper-core/src/patterns/nominal_phrase.rs
Original file line number Diff line number Diff line change
Expand Up @@ -143,4 +143,36 @@ mod tests {
.is_some()
);
}

// Tests that should not pass but do

#[test]
fn the_the_car() {
let doc = Document::new_markdown_default_curated("the the car");
assert!(
NominalPhrase
.matches(doc.get_tokens(), doc.get_source())
.is_some()
);
}

#[test]
fn red_the_car() {
let doc = Document::new_markdown_default_curated("red the car");
assert!(
NominalPhrase
.matches(doc.get_tokens(), doc.get_source())
.is_some()
);
}

#[test]
fn speeding_the_a_car() {
let doc = Document::new_markdown_default_curated("speeding the a car");
assert!(
NominalPhrase
.matches(doc.get_tokens(), doc.get_source())
.is_some()
);
}
}
Loading