Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 11 additions & 10 deletions harper-core/dictionary.dict
Original file line number Diff line number Diff line change
Expand Up @@ -14155,15 +14155,15 @@ batter/~VGd>SNwgzZ
batterer/Ng
battery/~NSg
batting/~NgV6
battle/~NSgVd>GJLZ
battle/~NSgVdGJLZ
battleaxe/NgS
battledore/NSg
battledress/N
battlefield/~NgS
battlefront/NgS
battleground/~NgS
battlement/NSg
battler/Ng
battler/NgS
battleship/~NSg
batty/J>^N
bauble/NSg
Expand Down Expand Up @@ -14971,7 +14971,8 @@ blitz/~NgSVGd
blitzkrieg/NgS
blivet/NS
blizzard/~NSgV
bloat/VGd>SNJZ
bloat/VGdSNJZ
bloater/NgS
bloatware/Nmg
blob/~NSgV
blobbed/VtT
Expand Down Expand Up @@ -16080,7 +16081,7 @@ buzzkill/NSg
buzzword/NSg
bx/~N
bxs/N
by/~PJNg
by/~PNg # removed adjective sense
bye/~NSgJP
bygone/JNSg
bylaw/NSg
Expand Down Expand Up @@ -27120,11 +27121,11 @@ grouch/NgSVGd
grouchily/Ry
grouchiness/Nmg
grouchy/J>^p
ground/~NwgSVGd>JZz
ground/~NwgSVGdJZz
groundbreaking/~JNgS
groundcloth/N0
groundcloths/N9
grounder/Ng
grounder/NgS
groundhog/NgS
grounding/~NwgSV6
groundless/JY
Expand Down Expand Up @@ -31988,7 +31989,7 @@ lii
likability/Nmg
likable/JpE
likableness/Nmg
like/~VGdSNgJ^CPE
like/~VGdSNgJCPE
likeability/Ng!@_
likeable/Jp!@_
likeableness/Ng!@_
Expand Down Expand Up @@ -34252,7 +34253,7 @@ modality/~NwSg
modded/VtT
modding/V6Nm
mode/~NgS
model/~NSgJ>VGdZz
model/~NSgJVGdZz
modeler/NgS
modeling/~VNg
modelled/JVr!@_
Expand Down Expand Up @@ -48640,7 +48641,7 @@ timberland/NSg
timberline/~NgS
timbre/~NSg
timbrel/NSgV
time/~NwgSVGd>YZz
time/~NwgSVGdYZz
timekeeper/NgS
timekeeping/NgV
timeless/~JYNp
Expand All @@ -48650,7 +48651,7 @@ timeliness/NmgU
timely/~J>^Up
timeout/NSg
timepiece/NgS
timer/~Ng
timer/~NgS
timescale/~NS
timeserver/NSg
timeserving/NgJ
Expand Down
239 changes: 134 additions & 105 deletions harper-core/src/linting/lint_group.rs
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ use super::mixed_bag::MixedBag;
use super::modal_of::ModalOf;
use super::modal_seem::ModalSeem;
use super::months::Months;
use super::more_adjective::MoreAdjective;
use super::more_better::MoreBetter;
use super::most_number::MostNumber;
use super::most_of_the_times::MostOfTheTimes;
Expand Down Expand Up @@ -325,6 +326,8 @@ pub struct LintGroup {
}

impl LintGroup {
// Constructor methods

pub fn empty() -> Self {
Self {
config: LintGroupConfig::default(),
Expand All @@ -335,102 +338,6 @@ impl LintGroup {
}
}

/// Check if the group already contains a linter with a given name.
pub fn contains_key(&self, name: impl AsRef<str>) -> bool {
self.linters.contains_key(name.as_ref())
|| self.chunk_expr_linters.contains_key(name.as_ref())
}

/// Add a [`Linter`] to the group, returning whether the operation was successful.
/// If it returns `false`, it is because a linter with that key already existed in the group.
pub fn add(&mut self, name: impl AsRef<str>, linter: impl Linter + 'static) -> bool {
if self.contains_key(&name) {
false
} else {
self.linters
.insert(name.as_ref().to_string(), Box::new(linter));
true
}
}

/// Add a chunk-based [`ExprLinter`] to the group, returning whether the operation was successful.
/// If it returns `false`, it is because a linter with that key already existed in the group.
///
/// This function is not significantly different from [`Self::add`], but allows us to take
/// advantage of some properties of chunk-based [`ExprLinter`]s for cache optimization.
pub fn add_chunk_expr_linter(
&mut self,
name: impl AsRef<str>,
// linter: impl ExprLinter + 'static,
linter: impl ExprLinter<Unit = Chunk> + 'static,
) -> bool {
if self.contains_key(&name) {
false
} else {
self.chunk_expr_linters
.insert(name.as_ref().to_string(), Box::new(linter) as _);
true
}
}

/// Merge the contents of another [`LintGroup`] into this one.
/// The other lint group will be left empty after this operation.
pub fn merge_from(&mut self, other: &mut LintGroup) {
self.config.merge_from(&mut other.config);

let other_linters = std::mem::take(&mut other.linters);
self.linters.extend(other_linters);

let other_expr_linters = std::mem::take(&mut other.chunk_expr_linters);
self.chunk_expr_linters.extend(other_expr_linters);
}

pub fn iter_keys(&self) -> impl Iterator<Item = &str> {
self.linters
.keys()
.chain(self.chunk_expr_linters.keys())
.map(|v| v.as_str())
}

/// Set all contained rules to a specific value.
/// Passing `None` will unset that rule, allowing it to assume its default state.
pub fn set_all_rules_to(&mut self, enabled: Option<bool>) {
let keys = self.iter_keys().map(|v| v.to_string()).collect::<Vec<_>>();

for key in keys {
match enabled {
Some(v) => self.config.set_rule_enabled(key, v),
None => self.config.unset_rule_enabled(key),
}
}
}

/// Get map from each contained linter's name to its associated description.
pub fn all_descriptions(&self) -> HashMap<&str, &str> {
self.linters
.iter()
.map(|(key, value)| (key.as_str(), value.description()))
.chain(
self.chunk_expr_linters
.iter()
.map(|(key, value)| (key.as_str(), ExprLinter::description(value))),
)
.collect()
}

/// Get map from each contained linter's name to its associated description, rendered to HTML.
pub fn all_descriptions_html(&self) -> HashMap<&str, String> {
self.linters
.iter()
.map(|(key, value)| (key.as_str(), value.description_html()))
.chain(
self.chunk_expr_linters
.iter()
.map(|(key, value)| (key.as_str(), value.description_html())),
)
.collect()
}

/// Swap out [`Self::config`] with another [`LintGroupConfig`].
pub fn with_lint_config(mut self, config: LintGroupConfig) -> Self {
self.config = config;
Expand Down Expand Up @@ -670,6 +577,9 @@ impl LintGroup {
out.add("MassPlurals", MassPlurals::new(dictionary.clone()));
out.config.set_rule_enabled("MassPlurals", true);

out.add("MoreAdjective", MoreAdjective::new(dictionary.clone()));
out.config.set_rule_enabled("MoreAdjective", true);

out
}

Expand All @@ -683,6 +593,104 @@ impl LintGroup {
group
}

// Non-constructor methods

/// Check if the group already contains a linter with a given name.
pub fn contains_key(&self, name: impl AsRef<str>) -> bool {
self.linters.contains_key(name.as_ref())
|| self.chunk_expr_linters.contains_key(name.as_ref())
}

/// Add a [`Linter`] to the group, returning whether the operation was successful.
/// If it returns `false`, it is because a linter with that key already existed in the group.
pub fn add(&mut self, name: impl AsRef<str>, linter: impl Linter + 'static) -> bool {
if self.contains_key(&name) {
false
} else {
self.linters
.insert(name.as_ref().to_string(), Box::new(linter));
true
}
}

/// Add a chunk-based [`ExprLinter`] to the group, returning whether the operation was successful.
/// If it returns `false`, it is because a linter with that key already existed in the group.
///
/// This function is not significantly different from [`Self::add`], but allows us to take
/// advantage of some properties of chunk-based [`ExprLinter`]s for cache optimization.
pub fn add_chunk_expr_linter(
&mut self,
name: impl AsRef<str>,
// linter: impl ExprLinter + 'static,
linter: impl ExprLinter<Unit = Chunk> + 'static,
) -> bool {
if self.contains_key(&name) {
false
} else {
self.chunk_expr_linters
.insert(name.as_ref().to_string(), Box::new(linter) as _);
true
}
}

/// Merge the contents of another [`LintGroup`] into this one.
/// The other lint group will be left empty after this operation.
pub fn merge_from(&mut self, other: &mut LintGroup) {
self.config.merge_from(&mut other.config);

let other_linters = std::mem::take(&mut other.linters);
self.linters.extend(other_linters);

let other_expr_linters = std::mem::take(&mut other.chunk_expr_linters);
self.chunk_expr_linters.extend(other_expr_linters);
}

pub fn iter_keys(&self) -> impl Iterator<Item = &str> {
self.linters
.keys()
.chain(self.chunk_expr_linters.keys())
.map(|v| v.as_str())
}

/// Set all contained rules to a specific value.
/// Passing `None` will unset that rule, allowing it to assume its default state.
pub fn set_all_rules_to(&mut self, enabled: Option<bool>) {
let keys = self.iter_keys().map(|v| v.to_string()).collect::<Vec<_>>();

for key in keys {
match enabled {
Some(v) => self.config.set_rule_enabled(key, v),
None => self.config.unset_rule_enabled(key),
}
}
}

/// Get map from each contained linter's name to its associated description.
pub fn all_descriptions(&self) -> HashMap<&str, &str> {
self.linters
.iter()
.map(|(key, value)| (key.as_str(), value.description()))
.chain(
self.chunk_expr_linters
.iter()
.map(|(key, value)| (key.as_str(), ExprLinter::description(value))),
)
.collect()
}

/// Get map from each contained linter's name to its associated description, rendered to HTML.
pub fn all_descriptions_html(&self) -> HashMap<&str, String> {
self.linters
.iter()
.map(|(key, value)| (key.as_str(), value.description_html()))
.chain(
self.chunk_expr_linters
.iter()
.map(|(key, value)| (key.as_str(), value.description_html())),
)
.collect()
}

pub fn organized_lints(&mut self, document: &Document) -> BTreeMap<String, Vec<Lint>> {
let mut results = BTreeMap::new();

Expand Down Expand Up @@ -763,7 +771,7 @@ impl Linter for LintGroup {
mod tests {
use std::sync::Arc;

use super::LintGroup;
use super::{LintGroup, LintGroupConfig};
use crate::linting::tests::assert_no_lints;
use crate::spell::{FstDictionary, MutableDictionary};
use crate::{Dialect, Document, linting::Linter};
Expand Down Expand Up @@ -815,21 +823,42 @@ mod tests {
);
}

/// Tests that no linters' descriptions contain errors handled by other linters.
///
/// This test verifies that the description of each linter (which is written in natural language)
/// doesn't trigger any other linter's rules, with the exception of certain linters that
/// suggest mere alternatives rather than flagging actual errors.
///
/// For example, we disable the "MoreAdjective" linter since some comparative and superlative
/// adjectives can be more awkward than their two-word counterparts, even if technically correct.
///
/// If this test fails, it means either:
/// 1. A linter's description contains an actual error that should be fixed, or
/// 2. A linter is being too aggressive in flagging text that is actually correct English
/// in the context of another linter's description.
#[test]
fn lint_descriptions_are_clean() {
let mut group = LintGroup::new_curated(FstDictionary::curated(), Dialect::American);
let pairs: Vec<_> = group
let lints_to_check = LintGroup::new_curated(FstDictionary::curated(), Dialect::American);

let mut enforcer_config = LintGroupConfig::new_curated();
enforcer_config.set_rule_enabled("MoreAdjective", false);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't love the idea of disabling specific rules from this test. Our goal here is to make sure our descriptions are clean. If we want to ignore this rule becuase it's more of a stylistic choice, rather than a grammatical error, I think we should be ignoring specific LintKinds instead.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's totally fair of course. But one problem is that it often feels more like fixing an actual grammatical error, as in "more big" or "more long", whereas it seems like a stylistic choice for words like "funner" or "oftener".

It might be best solved by a new annotation flag or two for adjectives?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's totally fair of course. But one problem is that it often feels more like fixing an actual grammatical error, as in "more big" or "more long", whereas it seems like a stylistic choice for words like "funner" or "oftener".

It might be best solved by a new annotation flag or two for adjectives?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's totally fair of course. But one problem is that it often feels more like fixing an actual grammatical error, as in "more big" or "more long", whereas it seems like a stylistic choice for words like "funner" or "oftener".

It might be best solved by a new annotation flag or two for adjectives?


let mut lints_to_enforce =
LintGroup::new_curated(FstDictionary::curated(), Dialect::American)
.with_lint_config(enforcer_config);

let name_description_pairs: Vec<_> = lints_to_check
.all_descriptions()
.into_iter()
.map(|(a, b)| (a.to_string(), b.to_string()))
.map(|(n, d)| (n.to_string(), d.to_string()))
.collect();

for (key, value) in pairs {
let doc = Document::new_markdown_default_curated(&value);
eprintln!("{key}: {value}");
for (lint_name, description) in name_description_pairs {
let doc = Document::new_markdown_default_curated(&description);
eprintln!("{lint_name}: {description}");

if !group.lint(&doc).is_empty() {
dbg!(&group.lint(&doc));
if !lints_to_enforce.lint(&doc).is_empty() {
dbg!(&lints_to_enforce.lint(&doc));
panic!();
}
}
Expand Down
1 change: 1 addition & 0 deletions harper-core/src/linting/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ mod mixed_bag;
mod modal_of;
mod modal_seem;
mod months;
mod more_adjective;
mod more_better;
mod most_number;
mod most_of_the_times;
Expand Down
Loading
Loading