Skip to content

Commit 9d98c2a

Browse files
committed
continuation marker for incompletion?
1 parent e7acb72 commit 9d98c2a

File tree

5 files changed

+56
-18
lines changed

5 files changed

+56
-18
lines changed

accuracy/src/main.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ static CFG: SpellerConfig = SpellerConfig {
2121
beam: None,
2222
case_handling: Some(CaseHandlingConfig::default()),
2323
node_pool_size: 128,
24+
completion_marker: None,
2425
};
2526

2627
fn load_words(

divvunspell-bin/src/main.rs

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,9 @@ impl OutputWriter for StdoutWriter {
4242

4343
fn write_suggestions(&mut self, _word: &str, suggestions: &[Suggestion]) {
4444
for sugg in suggestions {
45-
println!("{}\t\t{}", sugg.value, sugg.weight);
45+
46+
println!("{}\t\t{} (is complete {})", sugg.value, sugg.weight,
47+
sugg.completed);
4648
}
4749
println!();
4850
}
@@ -183,6 +185,9 @@ struct SuggestArgs {
183185
#[options(help = "maximum number of results")]
184186
nbest: Option<usize>,
185187

188+
#[options(help = "Character for incomplete predictions")]
189+
continuation_marker: Option<String>,
190+
186191
#[options(
187192
no_short,
188193
long = "no-case-handling",
@@ -310,7 +315,7 @@ fn suggest(args: SuggestArgs) -> anyhow::Result<()> {
310315
if args.disable_case_handling {
311316
suggest_cfg.case_handling = None;
312317
}
313-
318+
suggest_cfg.completion_marker = args.continuation_marker;
314319
if let Some(v) = args.nbest {
315320
if v == 0 {
316321
suggest_cfg.n_best = None;

divvunspell/src/speller/mod.rs

Lines changed: 27 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ pub struct SpellerConfig {
2929
pub beam: Option<Weight>,
3030
pub case_handling: Option<CaseHandlingConfig>,
3131
pub node_pool_size: usize,
32+
pub completion_marker: Option<String>,
3233
}
3334

3435
impl SpellerConfig {
@@ -39,6 +40,7 @@ impl SpellerConfig {
3940
beam: None,
4041
case_handling: Some(CaseHandlingConfig::default()),
4142
node_pool_size: 128,
43+
completion_marker: None,
4244
}
4345
}
4446
}
@@ -236,6 +238,7 @@ where
236238
fn suggest_single(self: Arc<Self>, word: &str, config: &SpellerConfig) -> Vec<Suggestion> {
237239
let worker = SpellerWorker::new(self.clone(), self.to_input_vec(word), config.clone());
238240

241+
log::trace!("suggesting single {}", word);
239242
worker.suggest()
240243
}
241244

@@ -247,6 +250,7 @@ where
247250
) -> Vec<Suggestion> {
248251
use crate::tokenizer::case_handling::*;
249252

253+
log::trace!("suggesting cases...");
250254
let CaseHandler {
251255
original_input,
252256
mutation,
@@ -256,6 +260,7 @@ where
256260
let mut best: HashMap<SmolStr, f32> = HashMap::new();
257261

258262
for word in std::iter::once(&original_input).chain(words.iter()) {
263+
log::trace!("suggesting for word {}", word);
259264
let worker = SpellerWorker::new(self.clone(), self.to_input_vec(&word), config.clone());
260265
let mut suggestions = worker.suggest();
261266

@@ -275,7 +280,9 @@ where
275280

276281
match mode {
277282
CaseMode::MergeAll => {
283+
log::trace!("Case merge all");
278284
for sugg in suggestions.into_iter() {
285+
log::trace!("for {}", sugg.value);
279286
let penalty_start =
280287
if !sugg.value().starts_with(word.chars().next().unwrap()) {
281288
case_handling.start_penalty
@@ -316,14 +323,27 @@ where
316323
if best.is_empty() {
317324
return vec![];
318325
}
319-
320-
let mut out = best
321-
.into_iter()
322-
.map(|(k, v)| Suggestion {
323-
value: k,
324-
weight: v,
325-
})
326+
let mut out: Vec<Suggestion>;
327+
if let Some(s) = &config.completion_marker {
328+
out = best
329+
.into_iter()
330+
.map(|(k, v)| Suggestion {
331+
value: k.clone(),
332+
weight: v,
333+
completed: !k.ends_with(s),
334+
})
326335
.collect::<Vec<_>>();
336+
}
337+
else {
338+
out = best
339+
.into_iter()
340+
.map(|(k, v)| Suggestion {
341+
value: k,
342+
weight: v,
343+
completed: true,
344+
})
345+
.collect::<Vec<_>>();
346+
}
327347
out.sort();
328348
if let Some(n_best) = config.n_best {
329349
out.truncate(n_best);

divvunspell/src/speller/suggestion.rs

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,12 @@ use std::cmp::Ordering::Equal;
88
pub struct Suggestion {
99
pub value: SmolStr,
1010
pub weight: Weight,
11+
pub completed: bool,
1112
}
1213

1314
impl Suggestion {
14-
pub fn new(value: SmolStr, weight: Weight) -> Suggestion {
15-
Suggestion { value, weight }
15+
pub fn new(value: SmolStr, weight: Weight, completed: bool) -> Suggestion {
16+
Suggestion { value, weight, completed }
1617
}
1718

1819
pub fn value(&self) -> &str {
@@ -22,6 +23,10 @@ impl Suggestion {
2223
pub fn weight(&self) -> Weight {
2324
self.weight
2425
}
26+
27+
pub fn completed(&self) -> bool {
28+
self.completed
29+
}
2530
}
2631

2732
impl PartialOrd for Suggestion {

divvunspell/src/speller/worker.rs

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -611,7 +611,7 @@ where
611611
.lexicon()
612612
.alphabet()
613613
.string_from_symbols(&next_node.string);
614-
log::trace!("suggesting? {}::{}", string, weight);
614+
// log::trace!("suggesting? {}::{}", string, weight);
615615
if weight < best_weight {
616616
best_weight = weight;
617617
}
@@ -633,17 +633,24 @@ where
633633
corrections: &HashMap<SmolStr, Weight>,
634634
) -> Vec<Suggestion> {
635635
log::trace!("Generating sorted suggestions");
636-
let mut c: Vec<Suggestion> = corrections
637-
.into_iter()
638-
.map(|x| Suggestion::new(x.0.clone(), *x.1))
639-
.collect();
640-
636+
let mut c: Vec<Suggestion>;
637+
if let Some(s) = &self.config.completion_marker {
638+
c = corrections
639+
.into_iter()
640+
.map(|x| Suggestion::new(x.0.clone(), *x.1, x.0.ends_with(s)))
641+
.collect();
642+
}
643+
else {
644+
c = corrections
645+
.into_iter()
646+
.map(|x| Suggestion::new(x.0.clone(), *x.1, true))
647+
.collect();
648+
}
641649
c.sort();
642650

643651
if let Some(n) = self.config.n_best {
644652
c.truncate(n);
645653
}
646-
647654
c
648655
}
649656
}

0 commit comments

Comments
 (0)