Skip to content
Closed
Changes from 2 commits
Commits
Show all changes
156 commits
Select commit Hold shift + click to select a range
5f7f7f7
Merge branch 'Automattic:master' into master
hippietrail Mar 31, 2025
23b587e
Merge branch 'Automattic:master' into master
hippietrail Mar 31, 2025
4e91e3d
chore: add false positive "head" with test
hippietrail Apr 1, 2025
16f5d0d
Merge branch 'master' of https://github.com/Automattic/harper
hippietrail Apr 1, 2025
657c4e6
Merge branch 'Automattic:master' into master
hippietrail Apr 3, 2025
fac079e
Merge branch 'Automattic:master' into master
hippietrail Apr 3, 2025
0f90096
Merge branch 'master' of https://github.com/Automattic/harper
hippietrail Apr 3, 2025
10c9dc9
Merge branch 'master' of https://github.com/hippietrail/harper
hippietrail Apr 3, 2025
b42b0c5
Merge branch 'Automattic:master' into master
hippietrail Apr 4, 2025
581786e
Merge branch 'Automattic:master' into master
hippietrail Apr 4, 2025
61aadfc
Merge branch 'Automattic:master' into master
hippietrail Apr 4, 2025
2b696eb
Merge branch 'Automattic:master' into master
hippietrail Apr 4, 2025
f6a1154
Merge branch 'Automattic:master' into master
hippietrail Apr 4, 2025
21bf7c4
Merge branch 'Automattic:master' into master
hippietrail Apr 7, 2025
d2c12b2
Merge branch 'Automattic:master' into master
hippietrail Apr 7, 2025
51f0a28
Merge branch 'Automattic:master' into master
hippietrail Apr 7, 2025
c57135f
Merge branch 'Automattic:master' into master
hippietrail Apr 8, 2025
9f056bd
Merge branch 'Automattic:master' into master
hippietrail Apr 9, 2025
ab18683
Merge branch 'Automattic:master' into master
hippietrail Apr 10, 2025
076d146
Merge branch 'Automattic:master' into master
hippietrail Apr 11, 2025
c300ab2
Merge branch 'Automattic:master' into master
hippietrail Apr 14, 2025
221bfc6
Merge branch 'Automattic:master' into master
hippietrail Apr 15, 2025
d491db5
Merge branch 'Automattic:master' into master
hippietrail Apr 15, 2025
a693880
Merge branch 'Automattic:master' into master
hippietrail Apr 16, 2025
578c9e8
Merge branch 'Automattic:master' into master
hippietrail Apr 17, 2025
62a2d29
Merge branch 'Automattic:master' into master
hippietrail Apr 18, 2025
c865f09
Merge branch 'Automattic:master' into master
hippietrail Apr 18, 2025
d5bd76f
Merge branch 'master' of https://github.com/Automattic/harper
hippietrail Apr 21, 2025
ca0d6ce
Merge branch 'Automattic:master' into master
hippietrail Apr 22, 2025
a9644c3
Merge branch 'master' of https://github.com/hippietrail/harper
hippietrail Apr 22, 2025
9e0f61b
Merge branch 'master' of https://github.com/Automattic/harper
hippietrail Apr 22, 2025
d59efa7
Merge branch 'Automattic:master' into master
hippietrail Apr 22, 2025
6c98007
Merge branch 'master' of https://github.com/hippietrail/harper
hippietrail Apr 23, 2025
805ee08
Merge branch 'Automattic:master' into master
hippietrail Apr 23, 2025
68ff49d
Merge branch 'master' of https://github.com/Automattic/harper
hippietrail Apr 23, 2025
b08280e
Merge branch 'Automattic:master' into master
hippietrail Apr 23, 2025
29d99d7
Merge branch 'master' of https://github.com/hippietrail/harper
hippietrail Apr 23, 2025
0f3a1c1
Merge branch 'Automattic:master' into master
hippietrail Apr 24, 2025
85a15ae
Merge branch 'Automattic:master' into master
hippietrail Apr 24, 2025
a70517d
Merge branch 'Automattic:master' into master
hippietrail Apr 24, 2025
31de7bd
Merge branch 'Automattic:master' into master
hippietrail Apr 25, 2025
120344a
Merge branch 'Automattic:master' into master
hippietrail Apr 27, 2025
f3a9acb
Merge branch 'master' of https://github.com/Automattic/harper
hippietrail Apr 29, 2025
9cb4400
Merge branch 'Automattic:master' into master
hippietrail Apr 29, 2025
8e77955
Merge branch 'master' of https://github.com/Automattic/harper
hippietrail Apr 29, 2025
37861ff
Merge branch 'Automattic:master' into master
hippietrail Apr 30, 2025
47a60bf
Merge branch 'master' of https://github.com/Automattic/harper
hippietrail Apr 30, 2025
8fcc811
Merge branch 'Automattic:master' into master
hippietrail Apr 30, 2025
965f7b7
Merge branch 'master' of https://github.com/hippietrail/harper
hippietrail Apr 30, 2025
55fb24d
Merge branch 'master' of https://github.com/hippietrail/harper
hippietrail May 1, 2025
cca3c3e
Merge branch 'Automattic:master' into master
hippietrail May 1, 2025
a1bfd94
Merge branch 'master' of https://github.com/hippietrail/harper
hippietrail May 1, 2025
6dff899
Merge branch 'Automattic:master' into master
hippietrail May 1, 2025
948e4be
Merge branch 'Automattic:master' into master
hippietrail May 1, 2025
943a440
Merge branch 'master' of https://github.com/hippietrail/harper
hippietrail May 1, 2025
73c7a8d
Merge branch 'Automattic:master' into master
hippietrail May 1, 2025
609994b
Merge branch 'Automattic:master' into master
hippietrail May 3, 2025
02100a0
Merge branch 'master' of https://github.com/hippietrail/harper
hippietrail May 3, 2025
8fb4269
Merge branch 'Automattic:master' into master
hippietrail May 5, 2025
cfa8a15
Merge branch 'master' of https://github.com/hippietrail/harper
hippietrail May 5, 2025
1b1b530
Merge branch 'Automattic:master' into master
hippietrail May 6, 2025
7603f2f
Merge branch 'Automattic:master' into master
hippietrail May 6, 2025
21f63ee
Merge branch 'Automattic:master' into master
hippietrail May 8, 2025
0034b9e
Merge branch 'Automattic:master' into master
hippietrail May 9, 2025
629622d
Merge branch 'Automattic:master' into master
hippietrail May 10, 2025
ad02eb7
Merge branch 'master' of https://github.com/hippietrail/harper
hippietrail May 10, 2025
19b591f
Merge branch 'Automattic:master' into master
hippietrail May 13, 2025
7fa5803
Merge branch 'Automattic:master' into master
hippietrail May 13, 2025
3e56320
Merge branch 'Automattic:master' into master
hippietrail May 14, 2025
7034eae
Merge branch 'Automattic:master' into master
hippietrail May 14, 2025
b51d3e8
Merge branch 'Automattic:master' into master
hippietrail May 14, 2025
fdb76b7
Merge branch 'Automattic:master' into master
hippietrail May 15, 2025
b0edb4b
Merge branch 'master' of https://github.com/Automattic/harper
hippietrail May 15, 2025
5f1cabb
Merge branch 'master' of https://github.com/Automattic/harper
hippietrail May 15, 2025
b142657
Merge branch 'master' of https://github.com/hippietrail/harper
hippietrail May 15, 2025
c345783
Merge branch 'Automattic:master' into master
hippietrail May 16, 2025
69674f3
Merge branch 'master' of https://github.com/Automattic/harper
hippietrail May 19, 2025
62b6c56
Merge branch 'Automattic:master' into master
hippietrail May 19, 2025
0b6bc86
Merge branch 'Automattic:master' into master
hippietrail May 20, 2025
cb6573d
Merge branch 'Automattic:master' into master
hippietrail May 21, 2025
21ed364
Merge branch 'master' of https://github.com/Automattic/harper
hippietrail May 21, 2025
4317924
Merge branch 'Automattic:master' into master
hippietrail May 22, 2025
9a179d6
Merge branch 'Automattic:master' into master
hippietrail May 22, 2025
49671d5
Merge branch 'master' of https://github.com/hippietrail/harper
hippietrail May 22, 2025
97c0972
Merge branch 'Automattic:master' into master
hippietrail May 23, 2025
84a03e9
Merge branch 'Automattic:master' into master
hippietrail May 23, 2025
2161c07
Merge branch 'Automattic:master' into master
hippietrail May 23, 2025
da5ac9a
Merge branch 'Automattic:master' into master
hippietrail May 26, 2025
8496544
Merge branch 'Automattic:master' into master
hippietrail May 27, 2025
e585e49
Merge branch 'Automattic:master' into master
hippietrail May 28, 2025
f81285f
Merge branch 'Automattic:master' into master
hippietrail May 29, 2025
f37732c
Merge branch 'master' of https://github.com/Automattic/harper
hippietrail May 30, 2025
307fae6
Merge branch 'Automattic:master' into master
hippietrail May 30, 2025
4a7ad49
Merge branch 'Automattic:master' into master
hippietrail May 30, 2025
a6eb073
Merge branch 'Automattic:master' into master
hippietrail May 30, 2025
1953561
Merge branch 'master' of https://github.com/hippietrail/harper
hippietrail Jun 2, 2025
b9eb6e1
Merge branch 'Automattic:master' into master
hippietrail Jun 3, 2025
7daad61
Merge branch 'Automattic:master' into master
hippietrail Jun 3, 2025
3b70dcb
Merge branch 'master' of https://github.com/hippietrail/harper
hippietrail Jun 16, 2025
36d8076
Merge branch 'master' of https://github.com/hippietrail/harper
hippietrail Jun 16, 2025
78b47e6
Merge branch 'master' of https://github.com/hippietrail/harper
hippietrail Jun 17, 2025
4909ddb
Merge branch 'master' of https://github.com/hippietrail/harper
hippietrail Jun 18, 2025
6c510f7
Merge branch 'master' of https://github.com/hippietrail/harper
hippietrail Jun 19, 2025
1ffef89
Merge branch 'master' of https://github.com/Automattic/harper
hippietrail Jun 19, 2025
dffcf01
Merge branch 'master' of https://github.com/hippietrail/harper
hippietrail Jun 20, 2025
c343633
Merge branch 'master' of https://github.com/Automattic/harper
hippietrail Jun 20, 2025
ebb0bec
Merge branch 'Automattic:master' into master
hippietrail Jun 20, 2025
ca2c62e
Merge branch 'Automattic:master' into master
hippietrail Jun 22, 2025
6f4efa8
Merge branch 'Automattic:master' into master
hippietrail Jun 24, 2025
8ed9448
Merge branch 'master' of https://github.com/hippietrail/harper
hippietrail Jun 24, 2025
56fc93e
Merge branch 'master' of https://github.com/Automattic/harper
hippietrail Jun 24, 2025
0f68ef1
Merge branch 'Automattic:master' into master
hippietrail Jun 25, 2025
ecf5d96
Merge branch 'Automattic:master' into master
hippietrail Jun 26, 2025
0cfd1c6
Merge branch 'Automattic:master' into master
hippietrail Jun 27, 2025
8f2b8bd
Merge branch 'Automattic:master' into master
hippietrail Jun 28, 2025
13465df
Merge branch 'Automattic:master' into master
hippietrail Jun 30, 2025
c56dcec
Merge branch 'Automattic:master' into master
hippietrail Jul 1, 2025
dfe6ad9
Merge branch 'Automattic:master' into master
hippietrail Jul 1, 2025
fe49d29
Merge branch 'Automattic:master' into master
hippietrail Jul 1, 2025
3d2dd4e
Merge branch 'Automattic:master' into master
hippietrail Jul 3, 2025
3f5cbcf
Merge branch 'master' of https://github.com/Automattic/harper
hippietrail Jul 4, 2025
15554a9
Merge branch 'Automattic:master' into master
hippietrail Jul 4, 2025
73173d9
Merge branch 'Automattic:master' into master
hippietrail Jul 4, 2025
4146f9e
Merge branch 'Automattic:master' into master
hippietrail Jul 4, 2025
c9ebdeb
Merge branch 'master' of https://github.com/hippietrail/harper
hippietrail Jul 4, 2025
d438447
Merge branch 'Automattic:master' into master
hippietrail Jul 7, 2025
52fbbd3
Merge branch 'Automattic:master' into master
hippietrail Jul 8, 2025
9584ef4
Merge branch 'Automattic:master' into master
hippietrail Jul 9, 2025
87b2cf8
Merge branch 'Automattic:master' into master
hippietrail Jul 10, 2025
7c03846
Merge branch 'Automattic:master' into master
hippietrail Jul 10, 2025
284304a
Merge branch 'Automattic:master' into master
hippietrail Jul 11, 2025
d96e1df
Merge branch 'master' of https://github.com/hippietrail/harper
hippietrail Jul 11, 2025
f78f98e
Merge branch 'Automattic:master' into master
hippietrail Jul 11, 2025
791816d
Merge branch 'Automattic:master' into master
hippietrail Jul 14, 2025
43b7eb9
Merge branch 'Automattic:master' into master
hippietrail Jul 15, 2025
a21444c
Merge branch 'Automattic:master' into master
hippietrail Jul 15, 2025
315e43c
Merge branch 'Automattic:master' into master
hippietrail Jul 16, 2025
9dd8ce8
Merge branch 'Automattic:master' into master
hippietrail Jul 18, 2025
7c62d75
Merge branch 'master' of https://github.com/hippietrail/harper
hippietrail Jul 21, 2025
0c57a7a
Merge branch 'master' of https://github.com/hippietrail/harper
hippietrail Jul 23, 2025
eb3ed85
Merge branch 'master' of https://github.com/hippietrail/harper
hippietrail Aug 6, 2025
00e5831
Merge branch 'master' of https://github.com/hippietrail/harper
hippietrail Sep 4, 2025
9ad2409
Merge branch 'master' of https://github.com/Automattic/harper
hippietrail Sep 4, 2025
175838c
Merge branch 'Automattic:master' into master
hippietrail Sep 5, 2025
70c1591
Merge branch 'Automattic:master' into master
hippietrail Sep 6, 2025
1cf6c00
feat: `harper-cli normalize-dictionary`
hippietrail Sep 9, 2025
01112e1
chore: appease clippy
hippietrail Sep 9, 2025
c4d1343
chore: keep `N` and `O` together, maintaining relative order
hippietrail Sep 25, 2025
74e3c24
Merge branch 'master' of http://github.com/hippietrail/harper
hippietrail Sep 30, 2025
d014b42
chore: trying new flag normalize design
hippietrail Oct 30, 2025
2e1d74e
Merge branch 'master' of http://github.com/hippietrail/harper into di…
hippietrail Oct 30, 2025
661152e
Merge branch 'master' of http://github.com/Automattic/harper into dic…
hippietrail Oct 30, 2025
f8c78d9
Merge branch 'master' of http://github.com/Automattic/harper into dic…
hippietrail Nov 2, 2025
da54c12
chore: improved normalization code
hippietrail Nov 3, 2025
79a703a
Improve annotation flag normalization
hippietrail Nov 3, 2025
f442f7c
fix: clippy issues
hippietrail Nov 3, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
142 changes: 142 additions & 0 deletions harper-cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,11 @@ enum Args {
/// The directory containing the dictionary and affixes.
dir: PathBuf,
},
/// Normalize the `dictionary.dict` file.
NormalizeDictionary {
/// The directory containing the dictionary and affixes.
dir: PathBuf,
},
/// Emit a decompressed, line-separated list of the compounds in Harper's dictionary.
/// As long as there's either an open or hyphenated spelling.
Compounds,
Expand Down Expand Up @@ -666,6 +671,49 @@ fn main() -> anyhow::Result<()> {

Ok(())
}
Args::NormalizeDictionary { dir } => {
let dict_path = dir.join("dictionary.dict");
let dict_content = fs::read_to_string(&dict_path)
.map_err(|e| anyhow!("Failed to read dictionary: {e}"))?;

for oldline in dict_content.lines() {
let newline = if oldline.is_empty()
|| oldline.starts_with('#')
|| oldline.chars().all(|c| c.is_ascii_digit())
{
oldline.into()
} else {
let (entry_part, comment_part) = oldline
.split_once('#')
.map_or((oldline, ""), |(e, c)| (e, c));

if let Some((lexeme, rest)) = entry_part.split_once('/') {
let (annotation, whitespace) = match rest.split_once([' ', '\t']) {
Some((a, _)) => (a, &rest[a.len()..]),
None => (rest, ""),
};

let normalized = format!(
"{}/{}{}",
lexeme,
normalize_annotation_flags(annotation),
whitespace
);
if !comment_part.is_empty() {
format!("{}{}#{}", normalized.trim_end(), whitespace, comment_part)
} else {
normalized
}
} else {
oldline.into()
}
};

println!("{newline}");
}

Ok(())
}
Args::Compounds => {
let mut compound_map: HashMap<String, Vec<String>> = HashMap::new();

Expand Down Expand Up @@ -891,3 +939,97 @@ fn file_dict_name(path: &Path) -> PathBuf {

rewritten.into()
}

/// Normalizes a dictionary entry's annotation flags
fn normalize_annotation_flags(flag_str: &str) -> String {
let char_vec = flag_str.chars().collect::<Vec<_>>();

let mut pos_order: Vec<char> = Vec::new();
let mut pos_map: HashMap<char, Vec<char>> = HashMap::new();

let pos_tags = "NOVJRIPCD";
let noun_props = "09gmw";
let verb_props = "lAbdGtT6h";
let pron_props = "aso123F";
let adj_props = "^cuY*.:";
let det_props = "qM5";

// Create a special key in the map for the 'unused' flags, using the null byte char as the key
pos_map.insert('\0', vec![]);

// Check for ~ because it must be first if present
if char_vec.contains(&'~') {
pos_order.push('~');
pos_map.insert('~', vec![]);
}

char_vec.iter().for_each(|flag| {
if flag == &'~' {
// This will drop any duplicates should they exist
return;
}
if pos_tags.contains(*flag) {
// This one is a POS tag so if it's not already in the map, add it as a key and push it to the pos_order vec
// but if it is already in the map (a dupe), treat it as a property of the POS and push it onto the value of the key
if pos_map.contains_key(flag) {
// add dupe
pos_map.get_mut(flag).unwrap().push(*flag);
} else {
pos_map.insert(*flag, vec![]);
pos_order.push(*flag);
}
} else if noun_props.contains(*flag) {
if pos_map.contains_key(&'N') {
pos_map.get_mut(&'N').unwrap().push(*flag);
} else if pos_map.contains_key(&'O') {
pos_map.get_mut(&'O').unwrap().push(*flag);
} else {
// we got a noun property before we got the 'N' tag, so add it to the unused flags
pos_map.get_mut(&'\0').unwrap().push(*flag);
}
} else if verb_props.contains(*flag) {
if pos_map.contains_key(&'V') {
pos_map.get_mut(&'V').unwrap().push(*flag);
} else {
pos_order.push(*flag);
}
} else if *flag == 'S' {
if pos_map.contains_key(&'N') {
pos_map.get_mut(&'N').unwrap().push(*flag);
} else if pos_map.contains_key(&'V') {
pos_map.get_mut(&'V').unwrap().push(*flag);
} else {
pos_order.push(*flag);
}
} else if *flag == '>' {
if pos_map.contains_key(&'V') {
pos_map.get_mut(&'V').unwrap().push(*flag);
} else if pos_map.contains_key(&'J') {
pos_map.get_mut(&'J').unwrap().push(*flag);
} else {
pos_order.push(*flag);
}
} else if adj_props.contains(*flag) {
pos_map.get_mut(&'J').unwrap().push(*flag);
} else if pron_props.contains(*flag) {
pos_map.get_mut(&'I').unwrap().push(*flag);
} else if det_props.contains(*flag) {
pos_map.get_mut(&'D').unwrap().push(*flag);
} else {
// This one is not a POS tag so add it to the 'unused' flags
pos_map.get_mut(&'\0').unwrap().push(*flag);
}
});

let mut result = String::new();
// Get the pos in order then append it and its values to the result string
pos_order.iter().for_each(|pos_flag_char| {
result.push(*pos_flag_char);
if let Some(props) = pos_map.get(pos_flag_char) {
result.extend(props);
}
});
// finally append the unused flags
result.extend(pos_map.get(&'\0').unwrap());
result
}