Skip to content

Commit 6e5fb64

Browse files
committed
Add common wordlist
1 parent d74db82 commit 6e5fb64

File tree

16 files changed

+3987
-2426
lines changed

16 files changed

+3987
-2426
lines changed

codebook/src/lib.rs

Lines changed: 57 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,10 @@ mod queries;
33
mod splitter;
44
use lru::LruCache;
55

6-
use crate::queries::{get_language_name_from_filename, get_language_setting, LanguageSetting};
6+
use crate::queries::{
7+
get_language_name_from_filename, get_language_setting, LanguageSetting, LanguageType,
8+
COMMON_DICTIONARY,
9+
};
710
use std::{
811
collections::{HashMap, HashSet},
912
num::NonZeroUsize,
@@ -39,9 +42,9 @@ pub struct TextRange {
3942

4043
#[derive(Debug)]
4144
pub struct CodeDictionary {
42-
custom_dictionary: HashSet<String>,
45+
custom_dictionary: Arc<RwLock<HashSet<String>>>,
4346
dictionary: spellbook::Dictionary,
44-
dictionary_lookup_cache: Arc<RwLock<LruCache<String, Vec<String>>>>,
47+
suggestion_cache: Arc<RwLock<LruCache<String, Vec<String>>>>,
4548
}
4649

4750
impl CodeDictionary {
@@ -50,29 +53,41 @@ impl CodeDictionary {
5053
let dic = std::fs::read_to_string(dic_path)?;
5154
let dict = spellbook::Dictionary::new(&aff, &dic)
5255
.map_err(|e| format!("Dictionary parse error: {}", e))?;
53-
56+
let mut custom_dictionary: HashSet<String> = HashSet::new();
57+
for word in COMMON_DICTIONARY.lines() {
58+
custom_dictionary.insert(word.to_string());
59+
}
5460
Ok(CodeDictionary {
55-
custom_dictionary: HashSet::new(),
61+
custom_dictionary: Arc::new(RwLock::new(custom_dictionary)),
5662
dictionary: dict,
57-
dictionary_lookup_cache: Arc::new(RwLock::new(LruCache::new(
63+
suggestion_cache: Arc::new(RwLock::new(LruCache::new(
5864
NonZeroUsize::new(10000).unwrap(),
5965
))),
6066
})
6167
}
6268

6369
pub fn check(&self, word: &str) -> bool {
64-
self.custom_dictionary.contains(word) || self.dictionary.check(word)
65-
// self.dictionary_lookup_cache.read().unwrap().contains(word) || self.dictionary.check(word)
70+
self.custom_dictionary
71+
.read()
72+
.unwrap()
73+
.contains(word.to_lowercase().as_str())
74+
|| self.dictionary.check(word)
75+
// self.lookup_cache.read().unwrap().contains(word) || self.dictionary.check(word)
6676
}
6777

68-
pub fn add_to_dictionary(&mut self, word: String) {
69-
self.custom_dictionary.insert(word);
78+
pub fn add_to_dictionary(&self, strings: &str) {
79+
for line in strings.lines() {
80+
self.custom_dictionary
81+
.write()
82+
.unwrap()
83+
.insert(line.to_string());
84+
}
7085
}
7186

7287
pub fn suggest(&self, word: &str) -> Vec<String> {
7388
println!("Checking Cache: {:?}", word);
7489
// First try to get from cache with write lock since get() needs to modify LRU order
75-
if let Some(suggestions) = self.dictionary_lookup_cache.write().unwrap().get_mut(word) {
90+
if let Some(suggestions) = self.suggestion_cache.write().unwrap().get_mut(word) {
7691
println!("Cache hit for {:?}", word);
7792
return suggestions.clone();
7893
}
@@ -81,7 +96,7 @@ impl CodeDictionary {
8196
let mut suggestions = Vec::new();
8297
self.dictionary.suggest(word, &mut suggestions);
8398
if !suggestions.is_empty() {
84-
self.dictionary_lookup_cache
99+
self.suggestion_cache
85100
.write()
86101
.unwrap()
87102
.put(word.to_string(), suggestions.clone());
@@ -90,27 +105,41 @@ impl CodeDictionary {
90105
}
91106

92107
pub fn spell_check(&self, text: &str, language: &str) -> Vec<SpellCheckResult> {
93-
// print!("language: {:?}", language);
94-
let lang = get_language_setting(language);
95-
match lang {
96-
None => {
97-
return self.spell_check_text(text);
98-
}
99-
Some(lang) => {
100-
return self.spell_check_code(text, lang);
101-
}
102-
}
108+
let lang_type = LanguageType::from_str(language);
109+
return self.spell_check_enum(text, lang_type);
103110
}
104111

105112
pub fn spell_check_file(&self, path: &str) -> Vec<SpellCheckResult> {
106-
let lang_name = get_language_name_from_filename(path);
113+
let lang_type = get_language_name_from_filename(path);
107114
let file_text = std::fs::read_to_string(path).unwrap();
108-
return self.spell_check(&file_text, &lang_name);
115+
return self.spell_check_enum(&file_text, lang_type);
109116
}
110117

111118
pub fn spell_check_file_memory(&self, path: &str, contents: &str) -> Vec<SpellCheckResult> {
112-
let lang_name = get_language_name_from_filename(path);
113-
return self.spell_check(&contents, &lang_name);
119+
let lang_type = get_language_name_from_filename(path);
120+
return self.spell_check_enum(&contents, lang_type);
121+
}
122+
123+
fn spell_check_enum(
124+
&self,
125+
text: &str,
126+
language_type: Option<LanguageType>,
127+
) -> Vec<SpellCheckResult> {
128+
let language = match language_type {
129+
None => None,
130+
Some(lang) => get_language_setting(lang),
131+
};
132+
match language {
133+
None => {
134+
return self.spell_check_text(text);
135+
}
136+
Some(lang) => {
137+
// if let Some(dictionary) = lang.language_dictionary {
138+
// self.add_to_dictionary(dictionary);
139+
// }
140+
return self.spell_check_code(text, lang);
141+
}
142+
}
114143
}
115144

116145
fn spell_check_text(&self, text: &str) -> Vec<SpellCheckResult> {
@@ -301,7 +330,7 @@ mod lib_tests {
301330
let mut cdict =
302331
CodeDictionary::new("./tests/en_index.aff", "./tests/en_index.dic").unwrap();
303332
for word in EXTRA_WORDS {
304-
cdict.add_to_dictionary(word.to_string());
333+
cdict.add_to_dictionary(word);
305334
}
306335
cdict
307336
}
@@ -311,7 +340,7 @@ mod lib_tests {
311340
let processor = get_processor();
312341

313342
let text = "HelloWorld calc_wrld";
314-
let misspelled = processor.spell_check(text, "text");
343+
let misspelled = processor.spell_check_enum(text, None);
315344
println!("{:?}", misspelled);
316345
assert!(misspelled.iter().any(|r| r.word == "wrld"));
317346
}

codebook/src/main.rs

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,13 @@
11
mod downloader;
2-
32
use codebook::CodeDictionary;
43
use downloader::DictionaryDownloader;
54
use std::env;
65
use std::path::Path;
76

87
fn main() {
98
let args: Vec<String> = env::args().collect();
10-
let downloader =
11-
DictionaryDownloader::new(downloader::DEFAULT_BASE_URL, "../.cache/dictionaries");
12-
let files = downloader.get("en").unwrap();
9+
let loader = DictionaryDownloader::new(downloader::DEFAULT_BASE_URL, "../.cache/dictionaries");
10+
let files = loader.get("en").unwrap();
1311
let processor = CodeDictionary::new(&files.aff_local_path, &files.dic_local_path).unwrap();
1412

1513
// println!("My path is {:?}", args);

codebook/src/queries.rs

Lines changed: 51 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,38 @@
11
use tree_sitter::Language;
22

3+
#[derive(Debug, Clone, PartialEq, Copy)]
4+
pub enum LanguageType {
5+
Rust,
6+
Python,
7+
Javascript,
8+
Typescript,
9+
Html,
10+
Css,
11+
Go,
12+
Text,
13+
}
14+
15+
impl LanguageType {
16+
pub fn from_str(s: &str) -> Option<LanguageType> {
17+
match s {
18+
"rust" => Some(LanguageType::Rust),
19+
"python" => Some(LanguageType::Python),
20+
"javascript" => Some(LanguageType::Javascript),
21+
"typescript" => Some(LanguageType::Typescript),
22+
"html" => Some(LanguageType::Html),
23+
"css" => Some(LanguageType::Css),
24+
"go" => Some(LanguageType::Go),
25+
"text" => Some(LanguageType::Text),
26+
_ => None,
27+
}
28+
}
29+
}
30+
31+
pub static COMMON_DICTIONARY: &str = include_str!("../../wordlists/common.txt");
332
// Use https://intmainreturn0.com/ts-visualizer/ to help with writing grammar queries
433
pub static LANGUAGE_SETTINGS: [LanguageSetting; 7] = [
534
LanguageSetting {
35+
type_: LanguageType::Rust,
636
name: "rust",
737
query: r#"
838
(function_item
@@ -18,6 +48,7 @@ pub static LANGUAGE_SETTINGS: [LanguageSetting; 7] = [
1848
extensions: &["rs"],
1949
},
2050
LanguageSetting {
51+
type_: LanguageType::Python,
2152
name: "python",
2253
query: r#"
2354
(identifier) @identifier
@@ -32,6 +63,7 @@ pub static LANGUAGE_SETTINGS: [LanguageSetting; 7] = [
3263
extensions: &["py"],
3364
},
3465
LanguageSetting {
66+
type_: LanguageType::Javascript,
3567
name: "javascript",
3668
query: r#"
3769
(identifier) @identifier
@@ -49,6 +81,7 @@ pub static LANGUAGE_SETTINGS: [LanguageSetting; 7] = [
4981
extensions: &["js"],
5082
},
5183
LanguageSetting {
84+
type_: LanguageType::Typescript,
5285
name: "typescript",
5386
query: r#"
5487
(identifier) @identifier
@@ -65,6 +98,7 @@ pub static LANGUAGE_SETTINGS: [LanguageSetting; 7] = [
6598
extensions: &["ts"],
6699
},
67100
LanguageSetting {
101+
type_: LanguageType::Html,
68102
name: "html",
69103
query: r#"
70104
(text) @string
@@ -74,6 +108,7 @@ pub static LANGUAGE_SETTINGS: [LanguageSetting; 7] = [
74108
extensions: &["html", "htm"],
75109
},
76110
LanguageSetting {
111+
type_: LanguageType::Css,
77112
name: "css",
78113
query: r#"
79114
(class_name) @identifier
@@ -86,6 +121,7 @@ pub static LANGUAGE_SETTINGS: [LanguageSetting; 7] = [
86121
extensions: &["css"],
87122
},
88123
LanguageSetting {
124+
type_: LanguageType::Go,
89125
name: "go",
90126
query: r#"
91127
(comment) @comment
@@ -99,28 +135,30 @@ pub static LANGUAGE_SETTINGS: [LanguageSetting; 7] = [
99135

100136
#[derive(Debug)]
101137
pub struct LanguageSetting {
138+
pub type_: LanguageType,
102139
pub query: &'static str,
103140
pub name: &'static str,
104141
pub extensions: &'static [&'static str],
105142
}
106143

107144
impl LanguageSetting {
108145
pub fn language(&self) -> Option<Language> {
109-
match self.name {
110-
"rust" => Some(tree_sitter_rust::LANGUAGE.into()),
111-
"python" => Some(tree_sitter_python::LANGUAGE.into()),
112-
"javascript" => Some(tree_sitter_javascript::LANGUAGE.into()),
113-
"typescript" => Some(tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into()),
114-
"html" => Some(tree_sitter_html::LANGUAGE.into()),
115-
"go" => Some(tree_sitter_go::LANGUAGE.into()),
116-
_ => None,
146+
match self.type_ {
147+
LanguageType::Rust => Some(tree_sitter_rust::LANGUAGE.into()),
148+
LanguageType::Python => Some(tree_sitter_python::LANGUAGE.into()),
149+
LanguageType::Javascript => Some(tree_sitter_javascript::LANGUAGE.into()),
150+
LanguageType::Typescript => Some(tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into()),
151+
LanguageType::Html => Some(tree_sitter_html::LANGUAGE.into()),
152+
LanguageType::Css => None,
153+
LanguageType::Go => Some(tree_sitter_go::LANGUAGE.into()),
154+
LanguageType::Text => None,
117155
}
118156
}
119157
}
120158

121-
pub fn get_language_setting(language_name: &str) -> Option<&LanguageSetting> {
159+
pub fn get_language_setting(language_type: LanguageType) -> Option<&'static LanguageSetting> {
122160
for setting in LANGUAGE_SETTINGS.iter() {
123-
if setting.name == language_name {
161+
if setting.type_ == language_type {
124162
if setting.language().is_some() {
125163
return Some(setting);
126164
}
@@ -129,14 +167,14 @@ pub fn get_language_setting(language_name: &str) -> Option<&LanguageSetting> {
129167
None
130168
}
131169

132-
pub fn get_language_name_from_filename(filename: &str) -> String {
170+
pub fn get_language_name_from_filename(filename: &str) -> Option<LanguageType> {
133171
let extension = filename.split('.').last().unwrap();
134172
for setting in LANGUAGE_SETTINGS.iter() {
135173
for ext in setting.extensions.iter() {
136174
if ext == &extension {
137-
return setting.name.to_string();
175+
return Some(setting.type_);
138176
}
139177
}
140178
}
141-
"text".to_string()
179+
None
142180
}

codebook/tests/test_files.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ fn test_example_files() {
114114
vec!["Wolrd", "bvd", "regulr", "splellin", "wolrd"],
115115
),
116116
("example.txt", vec!["Splellin", "bd"]),
117-
("example.rs", vec!["birt", "curent", "jalopin", "usr"]),
117+
("example.rs", vec!["birt", "calclate", "curent", "jalopin"]),
118118
(
119119
"example.go",
120120
vec!["speling", "Wolrd", "mispeled", "Funcion"],
@@ -146,7 +146,7 @@ fn test_example_files() {
146146
];
147147
for mut file in files {
148148
let path = example_file_path(file.0);
149-
println!("Checking file: {path:?}");
149+
println!("---------- Checking file: {path:?} ----------");
150150
let processor = utils::get_processor();
151151
let results = processor.spell_check_file(&path);
152152
let mut misspelled = results

codebook/tests/test_python.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ fn test_python_simple() {
1111
usrAge = get_curent_date() - bithDate
1212
userAge
1313
"#;
14-
let expected = vec!["bith", "calculat", "curent", "examle", "usr"];
14+
let expected = vec!["bith", "calculat", "curent", "examle"];
1515
let binding = processor.spell_check(sample_text, "python").to_vec();
1616
let mut misspelled = binding
1717
.iter()

codebook/tests/test_rust.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ fn test_rust_simple() {
1212
userAge
1313
}
1414
"#;
15-
let expected = vec!["bith", "calculat", "examle", "usr"];
15+
let expected = vec!["bith", "calculat", "examle"];
1616
let binding = processor.spell_check(sample_text, "rust").to_vec();
1717
let mut misspelled = binding
1818
.iter()

codebook/tests/utils/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ static EXTRA_WORDS: &'static [&'static str] = &["http", "https", "www", "viewpor
44
pub fn get_processor() -> CodeDictionary {
55
let mut cdict = CodeDictionary::new("./tests/en_index.aff", "./tests/en_index.dic").unwrap();
66
for word in EXTRA_WORDS {
7-
cdict.add_to_dictionary(word.to_string());
7+
cdict.add_to_dictionary(word);
88
}
99
cdict
1010
}

0 commit comments

Comments
 (0)